Compare commits
75 Commits
mesa-18.0.
...
mesa-18.0.
Author | SHA1 | Date | |
---|---|---|---|
|
d38da7bd2d | ||
|
ff629ffcd3 | ||
|
53ff157c33 | ||
|
3b9b66560a | ||
|
60c5cf011d | ||
|
95d88ba0da | ||
|
6bd2fba19d | ||
|
ead5bf4f6a | ||
|
d45bb9f505 | ||
|
d75054d0d0 | ||
|
7673c72f3d | ||
|
264cda58ab | ||
|
40ed4b0285 | ||
|
251a36d629 | ||
|
b62b3eb259 | ||
|
f581dc608b | ||
|
e1b87631a9 | ||
|
279c628560 | ||
|
e7709adf7a | ||
|
cd52573fac | ||
|
5edd3192e7 | ||
|
a1c421c638 | ||
|
8bd719e3fa | ||
|
4a0d3a68a7 | ||
|
19db663cf0 | ||
|
825e950aea | ||
|
a989e999b4 | ||
|
1d44ea348e | ||
|
362c4f4c72 | ||
|
d2991fc2c6 | ||
|
66964df17a | ||
|
5eef557ddd | ||
|
7711ae2987 | ||
|
5f4009079c | ||
|
c9b6960f34 | ||
|
e49d7abf87 | ||
|
1ec9166598 | ||
|
f1604f69c2 | ||
|
44c7d1aa2e | ||
|
f2a13363cf | ||
|
df6c2bef90 | ||
|
1550c67a3a | ||
|
92cb895316 | ||
|
9710a7042c | ||
|
7fe3731e9f | ||
|
4cfb3553eb | ||
|
31f323165c | ||
|
7279b0c5ce | ||
|
08b7ec9b20 | ||
|
e26892d902 | ||
|
c9e2de3398 | ||
|
7a02062da5 | ||
|
48cbac76a6 | ||
|
42cf180fb5 | ||
|
50e3fb590c | ||
|
9776580d97 | ||
|
2165cc0a17 | ||
|
8521e00b5c | ||
|
0007574c41 | ||
|
755d07c269 | ||
|
b44df1d118 | ||
|
b582a4e910 | ||
|
ab520c95bd | ||
|
490c34281c | ||
|
510f4f3362 | ||
|
51265b844b | ||
|
b3343407a8 | ||
|
af4f8d426a | ||
|
a602ef9352 | ||
|
fd6ded8f9c | ||
|
bcdbcfca6e | ||
|
9ea807c8ee | ||
|
25bf9b2bb7 | ||
|
85a862949d | ||
|
fb64913d19 |
@@ -39,12 +39,12 @@ matrix:
|
|||||||
addons:
|
addons:
|
||||||
apt:
|
apt:
|
||||||
sources:
|
sources:
|
||||||
- llvm-toolchain-trusty-3.9
|
- llvm-toolchain-trusty-4.0
|
||||||
packages:
|
packages:
|
||||||
# LLVM packaging is broken and misses these dependencies
|
# LLVM packaging is broken and misses these dependencies
|
||||||
- libedit-dev
|
- libedit-dev
|
||||||
# From sources above
|
# From sources above
|
||||||
- llvm-3.9-dev
|
- llvm-4.0-dev
|
||||||
# Common
|
# Common
|
||||||
- xz-utils
|
- xz-utils
|
||||||
- libexpat1-dev
|
- libexpat1-dev
|
||||||
|
@@ -70,6 +70,7 @@ LOCAL_CFLAGS += \
|
|||||||
-DHAVE_DLADDR \
|
-DHAVE_DLADDR \
|
||||||
-DHAVE_DL_ITERATE_PHDR \
|
-DHAVE_DL_ITERATE_PHDR \
|
||||||
-DHAVE_LINUX_FUTEX_H \
|
-DHAVE_LINUX_FUTEX_H \
|
||||||
|
-DHAVE_ENDIAN_H \
|
||||||
-DHAVE_ZLIB \
|
-DHAVE_ZLIB \
|
||||||
-DMAJOR_IN_SYSMACROS \
|
-DMAJOR_IN_SYSMACROS \
|
||||||
-fvisibility=hidden \
|
-fvisibility=hidden \
|
||||||
|
@@ -64,7 +64,8 @@ EXTRA_DIST = \
|
|||||||
meson_options.txt \
|
meson_options.txt \
|
||||||
bin/meson.build \
|
bin/meson.build \
|
||||||
include/meson.build \
|
include/meson.build \
|
||||||
bin/install_megadrivers.py
|
bin/install_megadrivers.py \
|
||||||
|
bin/meson_get_version.py
|
||||||
|
|
||||||
noinst_HEADERS = \
|
noinst_HEADERS = \
|
||||||
include/c99_alloca.h \
|
include/c99_alloca.h \
|
||||||
|
@@ -4,3 +4,31 @@ ac4437b20b87c7285b89466f05b51518ae616873 automake: small cleanup after the meson
|
|||||||
|
|
||||||
# stable: The KHX extension is disabled all together in the stable branches.
|
# stable: The KHX extension is disabled all together in the stable branches.
|
||||||
2ffe395cba0f7b3c1f1c41062f4376eae3a188b5 radv: Don't expose VK_KHX_multiview on android.
|
2ffe395cba0f7b3c1f1c41062f4376eae3a188b5 radv: Don't expose VK_KHX_multiview on android.
|
||||||
|
|
||||||
|
# stable: There is a specific version for this patch for stable branches, but
|
||||||
|
# it is causing regressions.
|
||||||
|
85d0bec9616bc1ffa8e4ab5e7c5d12ff4e414872 anv: Be more careful about fast-clear colors
|
||||||
|
|
||||||
|
# fixes: The commit fixes earlier commit 1c57a6da5e3 which did not land in
|
||||||
|
# branch.
|
||||||
|
3401b028df1074a06a7fbc3fb1cda949646ef75d ac/shader: fix vertex input with components.
|
||||||
|
|
||||||
|
# fixes: The commit requires earlier commits 639c4f2b54a6 and 2cfba40eea4c
|
||||||
|
# which did not land in branch.
|
||||||
|
8f052a3e257a61240cb311032497d016278117a8 radv: handle exporting view index to fragment shader. (v1.1)
|
||||||
|
|
||||||
|
# fixes: The commit fixes earlier commits 83d4a5d5aea5a8a05be2,
|
||||||
|
# b2f2236dc565dd1460f0 and c62cf1f165919bc74296 which did not land in
|
||||||
|
# branch.
|
||||||
|
880c1718b6d14b33fe5ba918af70fea5be890c6b omx: always define ENABLE_ST_OMX_{BELLAGIO,TIZONIA}
|
||||||
|
|
||||||
|
# stable: There is a specific port for this patch for stable branch.
|
||||||
|
d15fb766aa3c98ffbe16d050b2af4804e4b12c57 radeonsi/gfx9: fix a hang with an empty first IB
|
||||||
|
|
||||||
|
# stable: Explicit 18.1 only nominations
|
||||||
|
0e945fdf23bac5a62c15edfcbfd9d6ac4eee592f nir: Do not use progress for unreachable code in return lowering.
|
||||||
|
84fef802fb16cef68ec358cbfed1cac9c3bfa410 ac/nir: add missing round_slice for 1D arrays
|
||||||
|
d136a5fad9c7e67c1362453388914ecc60420883 ac: fix the number of coordinates for ac_image_get_lod and arrays
|
||||||
|
|
||||||
|
# stable: There is a specific port for this patch for stable branch.
|
||||||
|
fedd0a4215bcd387525000d76b77993ca38916ae radv/winsys: allow to submit up to 4 IBs for chips without chaining
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# encoding=utf-8
|
# encoding=utf-8
|
||||||
# Copyright © 2017 Intel Corporation
|
# Copyright © 2017-2018 Intel Corporation
|
||||||
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
# of this software and associated documentation files (the "Software"), to deal
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
@@ -35,7 +35,11 @@ def main():
|
|||||||
parser.add_argument('drivers', nargs='+')
|
parser.add_argument('drivers', nargs='+')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
to = os.path.join(os.environ.get('MESON_INSTALL_DESTDIR_PREFIX'), args.libdir)
|
if os.path.isabs(args.libdir):
|
||||||
|
to = os.path.join(os.environ.get('DESTDIR', '/'), args.libdir[1:])
|
||||||
|
else:
|
||||||
|
to = os.path.join(os.environ['MESON_INSTALL_DESTDIR_PREFIX'], args.libdir)
|
||||||
|
|
||||||
master = os.path.join(to, os.path.basename(args.megadriver))
|
master = os.path.join(to, os.path.basename(args.megadriver))
|
||||||
|
|
||||||
if not os.path.exists(to):
|
if not os.path.exists(to):
|
||||||
@@ -58,7 +62,7 @@ def main():
|
|||||||
while ext != '.so':
|
while ext != '.so':
|
||||||
if os.path.exists(name):
|
if os.path.exists(name):
|
||||||
os.unlink(name)
|
os.unlink(name)
|
||||||
os.symlink(driver, name)
|
os.symlink(each, name)
|
||||||
name, ext = os.path.splitext(name)
|
name, ext = os.path.splitext(name)
|
||||||
finally:
|
finally:
|
||||||
os.chdir(ret)
|
os.chdir(ret)
|
||||||
|
@@ -862,6 +862,7 @@ fi
|
|||||||
AC_HEADER_MAJOR
|
AC_HEADER_MAJOR
|
||||||
AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
|
AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
|
||||||
AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
|
AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
|
||||||
|
AC_CHECK_HEADERS([endian.h])
|
||||||
AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
|
AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
|
||||||
AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
|
AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
|
||||||
AC_CHECK_FUNC([timespec_get], [DEFINES="$DEFINES -DHAVE_TIMESPEC_GET"])
|
AC_CHECK_FUNC([timespec_get], [DEFINES="$DEFINES -DHAVE_TIMESPEC_GET"])
|
||||||
|
@@ -88,22 +88,40 @@ This is a work-around for that.
|
|||||||
<li>MESA_GL_VERSION_OVERRIDE - changes the value returned by
|
<li>MESA_GL_VERSION_OVERRIDE - changes the value returned by
|
||||||
glGetString(GL_VERSION) and possibly the GL API type.
|
glGetString(GL_VERSION) and possibly the GL API type.
|
||||||
<ul>
|
<ul>
|
||||||
<li> The format should be MAJOR.MINOR[FC]
|
<li>The format should be MAJOR.MINOR[FC|COMPAT]
|
||||||
<li> FC is an optional suffix that indicates a forward compatible context.
|
<li>FC is an optional suffix that indicates a forward compatible
|
||||||
This is only valid for versions >= 3.0.
|
context. This is only valid for versions >= 3.0.
|
||||||
<li> GL versions < 3.0 are set to a compatibility (non-Core) profile
|
<li>COMPAT is an optional suffix that indicates a compatibility
|
||||||
<li> GL versions = 3.0, see below
|
context or GL_ARB_compatibility support. This is only valid for
|
||||||
<li> GL versions > 3.0 are set to a Core profile
|
versions >= 3.1.
|
||||||
<li> Examples: 2.1, 3.0, 3.0FC, 3.1, 3.1FC
|
<li>GL versions <= 3.0 are set to a compatibility (non-Core)
|
||||||
<ul>
|
profile
|
||||||
<li> 2.1 - select a compatibility (non-Core) profile with GL version 2.1
|
<li>GL versions = 3.1, depending on the driver, it may or may not
|
||||||
<li> 3.0 - select a compatibility (non-Core) profile with GL version 3.0
|
have the ARB_compatibility extension enabled.
|
||||||
<li> 3.0FC - select a Core+Forward Compatible profile with GL version 3.0
|
<li>GL versions >= 3.2 are set to a Core profile
|
||||||
<li> 3.1 - select a Core profile with GL version 3.1
|
<li>Examples: 2.1, 3.0, 3.0FC, 3.1, 3.1FC, 3.1COMPAT, X.Y, X.YFC,
|
||||||
<li> 3.1FC - select a Core+Forward Compatible profile with GL version 3.1
|
X.YCOMPAT.
|
||||||
</ul>
|
<ul>
|
||||||
<li> Mesa may not really implement all the features of the given version.
|
<li>2.1 - select a compatibility (non-Core) profile with GL
|
||||||
(for developers only)
|
version 2.1.
|
||||||
|
<li>3.0 - select a compatibility (non-Core) profile with GL
|
||||||
|
version 3.0.
|
||||||
|
<li>3.0FC - select a Core+Forward Compatible profile with GL
|
||||||
|
version 3.0.
|
||||||
|
<li>3.1 - select GL version 3.1 with GL_ARB_compatibility enabled
|
||||||
|
per the driver default.
|
||||||
|
<li>3.1FC - select GL version 3.1 with forward compatibility and
|
||||||
|
GL_ARB_compatibility disabled.
|
||||||
|
<li>3.1COMPAT - select GL version 3.1 with GL_ARB_compatibility
|
||||||
|
enabled.
|
||||||
|
<li>X.Y - override GL version to X.Y without changing the profile.
|
||||||
|
<li>X.YFC - select a Core+Forward Compatible profile with GL
|
||||||
|
version X.Y.
|
||||||
|
<li>X.YCOMPAT - select a Compatibility profile with GL version
|
||||||
|
X.Y.
|
||||||
|
</ul>
|
||||||
|
<li>Mesa may not really implement all the features of the given
|
||||||
|
version. (for developers only)
|
||||||
</ul>
|
</ul>
|
||||||
<li>MESA_GLES_VERSION_OVERRIDE - changes the value returned by
|
<li>MESA_GLES_VERSION_OVERRIDE - changes the value returned by
|
||||||
glGetString(GL_VERSION) for OpenGL ES.
|
glGetString(GL_VERSION) for OpenGL ES.
|
||||||
|
@@ -14,15 +14,15 @@
|
|||||||
<iframe src="../contents.html"></iframe>
|
<iframe src="../contents.html"></iframe>
|
||||||
<div class="content">
|
<div class="content">
|
||||||
|
|
||||||
<h1>Mesa 17.4.0 Release Notes / March 27 2018</h1>
|
<h1>Mesa 18.0.0 Release Notes / March 27 2018</h1>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
Mesa 17.4.0 is a new development release.
|
Mesa 18.0.0 is a new development release.
|
||||||
People who are concerned with stability and reliability should stick
|
People who are concerned with stability and reliability should stick
|
||||||
with a previous release or wait for Mesa 17.4.1.
|
with a previous release or wait for Mesa 18.0.1.
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
Mesa 17.4.0 implements the OpenGL 4.5 API, but the version reported by
|
Mesa 18.0.0 implements the OpenGL 4.5 API, but the version reported by
|
||||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||||
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.
|
|||||||
|
|
||||||
<h2>SHA256 checksums</h2>
|
<h2>SHA256 checksums</h2>
|
||||||
<pre>
|
<pre>
|
||||||
TBD.
|
93c2d3504b2871ac2146603fb1270f341d36a39695e2950a469c5eac74f98457 mesa-18.0.0.tar.gz
|
||||||
|
694e5c3d37717d23258c1f88bc134223c5d1aac70518d2f9134d6df3ee791eea mesa-18.0.0.tar.xz
|
||||||
</pre>
|
</pre>
|
||||||
|
|
||||||
|
|
||||||
|
225
docs/relnotes/18.0.1.html
Normal file
225
docs/relnotes/18.0.1.html
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||||
|
<title>Mesa Release Notes</title>
|
||||||
|
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<div class="header">
|
||||||
|
<h1>The Mesa 3D Graphics Library</h1>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<iframe src="../contents.html"></iframe>
|
||||||
|
<div class="content">
|
||||||
|
|
||||||
|
<h1>Mesa 18.0.1 Release Notes / April 18, 2018</h1>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Mesa 18.0.1 is a bug fix release which fixes bugs found since the 18.0.0 release.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Mesa 18.0.1 implements the OpenGL 4.5 API, but the version reported by
|
||||||
|
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||||
|
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||||
|
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||||
|
4.5 is <strong>only</strong> available if requested at context creation
|
||||||
|
because compatibility contexts are not supported.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
<h2>SHA256 checksums</h2>
|
||||||
|
<pre>
|
||||||
|
0c93ba892c0610f5dd87f2e2673b9445187995c395b3ddb33fd4260bfb291e89 mesa-18.0.1.tar.gz
|
||||||
|
b2d2f5b5dbaab13e15cb0dcb5ec81887467f55ebc9625945b303a3647cd87954 mesa-18.0.1.tar.xz
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
|
||||||
|
<h2>New features</h2>
|
||||||
|
<p>None</p>
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Bug fixes</h2>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101408">Bug 101408</a> - [Gen8+] Xonotic fails to render one of the weapons</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102342">Bug 102342</a> - mesa-17.1.7/src/gallium/auxiliary/pipebuffer/pb_cache.c:169]: (style) Suspicious condition</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102542">Bug 102542</a> - mesa-17.2.0/src/gallium/state_trackers/nine/nine_ff.c:1938: bad assignment ?</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105317">Bug 105317</a> - The GPU Vega 56 was hang while try to pass #GraphicsFuzz shader15 test</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105440">Bug 105440</a> - GEN7: rendering issue on citra</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105442">Bug 105442</a> - Hang when running nine ff lighting shader with radeonsi</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105567">Bug 105567</a> - meson/ninja: 1. mesa/vdpau incorrect symlinks in DESTDIR and 2. Ddri-drivers-path Dvdpau-libs-path overrides DESTDIR</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105670">Bug 105670</a> - [regression][hang] Trine1EE hangs GPU after loading screen on Mesa3D-17.3 and later</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105704">Bug 105704</a> - compiler assertion hit</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105717">Bug 105717</a> - [bisected] Mesa build tests fails: BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105942">Bug 105942</a> - Graphical artefacts after update to mesa 18.0.0-2</li>
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Changes</h2>
|
||||||
|
|
||||||
|
<p>Andres Gomez (2):</p>
|
||||||
|
<ul>
|
||||||
|
<li>dri_util: when overriding, always reset the core version</li>
|
||||||
|
<li>mesa: adds some comments regarding MESA_GLES_VERSION_OVERRIDE usage</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Axel Davy (5):</p>
|
||||||
|
<ul>
|
||||||
|
<li>st/nine: Fix bad tracking of vs textures for NINESBT_ALL</li>
|
||||||
|
<li>st/nine: Fixes warning about implicit conversion</li>
|
||||||
|
<li>st/nine: Fix non inversible matrix check</li>
|
||||||
|
<li>st/nine: Declare lighting consts for ff shaders</li>
|
||||||
|
<li>st/nine: Do not use scratch for face register</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Bas Nieuwenhuizen (3):</p>
|
||||||
|
<ul>
|
||||||
|
<li>ac/nir: Add workaround for GFX9 buffer views.</li>
|
||||||
|
<li>radv: Don't set instance count using predication.</li>
|
||||||
|
<li>radv: Always reset draw user SGPRs after secondary command buffer.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Caio Marcelo de Oliveira Filho (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>anv/pipeline: fail if TCS/TES compile fail</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Daniel Stone (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>st/dri: Initialise modifier to INVALID for DRI2</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Derek Foreman (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>egl/wayland: Make swrast display_sync the correct queue</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Dylan Baker (4):</p>
|
||||||
|
<ul>
|
||||||
|
<li>meson: don't use compiler.has_header</li>
|
||||||
|
<li>autotools: include meson_get_version</li>
|
||||||
|
<li>meson: Set .so version for xa like autotools does</li>
|
||||||
|
<li>meson: fix megadriver symlinking</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Emil Velikov (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>docs: add sha256 checksums for 18.0.0</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Eric Engestrom (3):</p>
|
||||||
|
<ul>
|
||||||
|
<li>meson/configure: detect endian.h instead of trying to guess when it's available</li>
|
||||||
|
<li>docs: fix 18.0 release note version</li>
|
||||||
|
<li>gbm: remove never-implemented function</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Henri Verbeet (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>mesa: Inherit texture view multi-sample information from the original texture images.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Iago Toral Quiroga (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>compiler/spirv: set is_shadow for depth comparitor sampling opcodes</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Ian Romanick (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>i965/vec4: Fix null destination register in 3-source instructions</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Jason Ekstrand (4):</p>
|
||||||
|
<ul>
|
||||||
|
<li>nir/vars_to_ssa: Remove copies from the correct set</li>
|
||||||
|
<li>nir/lower_indirect_derefs: Support interp_var_at intrinsics</li>
|
||||||
|
<li>intel/vec4: Set channel_sizes for MOV_INDIRECT sources</li>
|
||||||
|
<li>nir/lower_vec_to_movs: Only coalesce if the vec had a SSA destination</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Juan A. Suarez Romero (5):</p>
|
||||||
|
<ul>
|
||||||
|
<li>cherry-ignore anv: Be more careful about fast-clear colors</li>
|
||||||
|
<li>cherry-ignore: ac/shader: fix vertex input with components.</li>
|
||||||
|
<li>cherry-ignore: radv: handle exporting view index to fragment shader. (v1.1)</li>
|
||||||
|
<li>cherry-ignore: omx: always define ENABLE_ST_OMX_{BELLAGIO,TIZONIA}</li>
|
||||||
|
<li>Update version to 18.0.1</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Leo Liu (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>radeon/vce: move feedback command inside of destroy function</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Lionel Landwerlin (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>i965/perf: fix config registration when uploading to kernel</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Marc Dietrich (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>meson: fix HAVE_LLVM version define in meson build</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Marek Olšák (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>mesa: simplify MESA_GL_VERSION_OVERRIDE behavior of API override</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Mark Thompson (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>st/va: Enable vaExportSurfaceHandle()</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Rob Clark (3):</p>
|
||||||
|
<ul>
|
||||||
|
<li>nir: fix per_vertex_output intrinsic</li>
|
||||||
|
<li>freedreno/a5xx: fix page faults on last level</li>
|
||||||
|
<li>freedreno/a5xx: don't align height for PIPE_BUFFER</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Samuel Pitoiset (2):</p>
|
||||||
|
<ul>
|
||||||
|
<li>radv: fix picking the method for resolve subpass</li>
|
||||||
|
<li>radv: fix radv_layout_dcc_compressed() when image doesn't have DCC</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Sergii Romantsov (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>i965: Extend the negative 32-bit deltas to 64-bits</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Timothy Arceri (7):</p>
|
||||||
|
<ul>
|
||||||
|
<li>ac: add if/loop build helpers</li>
|
||||||
|
<li>radeonsi: make use of if/loop build helpers in ac</li>
|
||||||
|
<li>ac: make use of if/loop build helpers</li>
|
||||||
|
<li>glsl: fix infinite loop caused by bug in loop unrolling pass</li>
|
||||||
|
<li>nir: fix crash in loop unroll corner case</li>
|
||||||
|
<li>gallium/pipebuffer: fix parenthesis location</li>
|
||||||
|
<li>glsl: always call do_lower_jumps() after loop unrolling</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Xiong, James (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>i965: return the fourcc saved in __DRIimage when possible</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
143
docs/relnotes/18.0.2.html
Normal file
143
docs/relnotes/18.0.2.html
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||||
|
<title>Mesa Release Notes</title>
|
||||||
|
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<div class="header">
|
||||||
|
<h1>The Mesa 3D Graphics Library</h1>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<iframe src="../contents.html"></iframe>
|
||||||
|
<div class="content">
|
||||||
|
|
||||||
|
<h1>Mesa 18.0.2 Release Notes / April 28, 2018</h1>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Mesa 18.0.2 is a bug fix release which fixes bugs found since the 18.0.1 release.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Mesa 18.0.2 implements the OpenGL 4.5 API, but the version reported by
|
||||||
|
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||||
|
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||||
|
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||||
|
4.5 is <strong>only</strong> available if requested at context creation
|
||||||
|
because compatibility contexts are not supported.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
<h2>SHA256 checksums</h2>
|
||||||
|
<pre>
|
||||||
|
TBD
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
|
||||||
|
<h2>New features</h2>
|
||||||
|
<p>None</p>
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Bug fixes</h2>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95009">Bug 95009</a> - [SNB] amd_shader_trinary_minmax.execution.built-in-functions.gs-mid3-ivec2-ivec2-ivec2 intermittent</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95012">Bug 95012</a> - [SNB] glsl-1_50.execution.built-in-functions.gs-op tests intermittent</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98281">Bug 98281</a> - 'message's in ctx->Debug.LogMessages[] seem to leak.</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105320">Bug 105320</a> - Storage texel buffer access produces wrong results (RX Vega)</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105775">Bug 105775</a> - SI reaches the maximum IB size in dwords and fail to submit</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105994">Bug 105994</a> - surface state leak when creating and destroying image views with aspectMask depth and stencil</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106074">Bug 106074</a> - radv: si_scissor_from_viewport returns incorrect result when using half-pixel viewport offset</li>
|
||||||
|
|
||||||
|
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106126">Bug 106126</a> - eglMakeCurrent does not always ensure dri_drawable->update_drawable_info has been called for a new EGLSurface if another has been created and destroyed first</li>
|
||||||
|
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Changes</h2>
|
||||||
|
|
||||||
|
<p>Bas Nieuwenhuizen (2):</p>
|
||||||
|
<ul>
|
||||||
|
<li>ac/nir: Make the GFX9 buffer size fix apply to image loads/atomics too.</li>
|
||||||
|
<li>radv: Mark GTT memory as device local for APUs.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Dylan Baker (2):</p>
|
||||||
|
<ul>
|
||||||
|
<li>bin/install_megadrivers: fix DESTDIR and -D*-path</li>
|
||||||
|
<li>meson: don't build classic mesa tests without dri_drivers</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Ian Romanick (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>intel/compiler: Add scheduler deps for instructions that implicitly read g0</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Jason Ekstrand (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>i965/fs: Return mlen * 8 for size_read() for INTERPOLATE_AT_*</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Johan Klokkhammer Helsing (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>st/dri: Fix dangling pointer to a destroyed dri_drawable</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Juan A. Suarez Romero (4):</p>
|
||||||
|
<ul>
|
||||||
|
<li>docs: add sha256 checksums for 18.0.1</li>
|
||||||
|
<li>travis: radv needs LLVM 4.0</li>
|
||||||
|
<li>cherry-ignore: add explicit 18.1 only nominations</li>
|
||||||
|
<li>Update version to 18.0.2</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Kenneth Graunke (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>i965: Fix shadow batches to be the same size as the real BO.</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Lionel Landwerlin (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>anv: fix number of planes for depth & stencil</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Lucas Stach (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>etnaviv: fix texture_format_needs_swiz</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Marek Olšák (3):</p>
|
||||||
|
<ul>
|
||||||
|
<li>radeonsi/gfx9: fix a hang with an empty first IB</li>
|
||||||
|
<li>glsl_to_tgsi: try harder to lower unsupported ir_binop_vector_extract</li>
|
||||||
|
<li>Revert "st/dri: Fix dangling pointer to a destroyed dri_drawable"</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Samuel Pitoiset (2):</p>
|
||||||
|
<ul>
|
||||||
|
<li>radv: fix scissor computation when using half-pixel viewport offset</li>
|
||||||
|
<li>radv/winsys: allow to submit up to 4 IBs for chips without chaining</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Thomas Hellstrom (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>svga: Fix incorrect advertizing of EGL_KHR_gl_colorspace</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>Timothy Arceri (1):</p>
|
||||||
|
<ul>
|
||||||
|
<li>mesa: free debug messages when destroying the debug state</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
@@ -838,8 +838,8 @@ elif cc.has_header_symbol('sys/mkdev.h', 'major')
|
|||||||
pre_args += '-DMAJOR_IN_MKDEV'
|
pre_args += '-DMAJOR_IN_MKDEV'
|
||||||
endif
|
endif
|
||||||
|
|
||||||
foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h']
|
foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h']
|
||||||
if cc.has_header(h)
|
if cc.compiles('#include <@0@>'.format(h), name : '@0@ works'.format(h))
|
||||||
pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify())
|
pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify())
|
||||||
endif
|
endif
|
||||||
endforeach
|
endforeach
|
||||||
@@ -1027,7 +1027,7 @@ if with_llvm
|
|||||||
_llvm_patch = _llvm_patch.split('g')[0]
|
_llvm_patch = _llvm_patch.split('g')[0]
|
||||||
endif
|
endif
|
||||||
pre_args += [
|
pre_args += [
|
||||||
'-DHAVE_LLVM=0x0@0@@1@@2@'.format(_llvm_version[0], _llvm_version[1], _llvm_patch),
|
'-DHAVE_LLVM=0x0@0@0@1@'.format(_llvm_version[0], _llvm_version[1]),
|
||||||
'-DMESA_LLVM_VERSION_PATCH=@0@'.format(_llvm_patch),
|
'-DMESA_LLVM_VERSION_PATCH=@0@'.format(_llvm_patch),
|
||||||
]
|
]
|
||||||
elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
|
elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
|
||||||
|
@@ -352,6 +352,9 @@ def generate(env):
|
|||||||
if check_header(env, 'xlocale.h'):
|
if check_header(env, 'xlocale.h'):
|
||||||
cppdefines += ['HAVE_XLOCALE_H']
|
cppdefines += ['HAVE_XLOCALE_H']
|
||||||
|
|
||||||
|
if check_header(env, 'endian.h'):
|
||||||
|
cppdefines += ['HAVE_ENDIAN_H']
|
||||||
|
|
||||||
if check_functions(env, ['strtod_l', 'strtof_l']):
|
if check_functions(env, ['strtod_l', 'strtof_l']):
|
||||||
cppdefines += ['HAVE_STRTOD_L']
|
cppdefines += ['HAVE_STRTOD_L']
|
||||||
|
|
||||||
|
@@ -41,6 +41,16 @@
|
|||||||
|
|
||||||
#include "shader_enums.h"
|
#include "shader_enums.h"
|
||||||
|
|
||||||
|
#define AC_LLVM_INITIAL_CF_DEPTH 4
|
||||||
|
|
||||||
|
/* Data for if/else/endif and bgnloop/endloop control flow structures.
|
||||||
|
*/
|
||||||
|
struct ac_llvm_flow {
|
||||||
|
/* Loop exit or next part of if/else/endif. */
|
||||||
|
LLVMBasicBlockRef next_block;
|
||||||
|
LLVMBasicBlockRef loop_entry_block;
|
||||||
|
};
|
||||||
|
|
||||||
/* Initialize module-independent parts of the context.
|
/* Initialize module-independent parts of the context.
|
||||||
*
|
*
|
||||||
* The caller is responsible for initializing ctx::module and ctx::builder.
|
* The caller is responsible for initializing ctx::module and ctx::builder.
|
||||||
@@ -103,6 +113,14 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
|
|||||||
ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
|
ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ac_llvm_context_dispose(struct ac_llvm_context *ctx)
|
||||||
|
{
|
||||||
|
free(ctx->flow);
|
||||||
|
ctx->flow = NULL;
|
||||||
|
ctx->flow_depth_max = 0;
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
ac_get_llvm_num_components(LLVMValueRef value)
|
ac_get_llvm_num_components(LLVMValueRef value)
|
||||||
{
|
{
|
||||||
@@ -1010,6 +1028,26 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
|
|||||||
ac_get_load_intr_attribs(can_speculate));
|
ac_get_load_intr_attribs(can_speculate));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
|
||||||
|
LLVMValueRef rsrc,
|
||||||
|
LLVMValueRef vindex,
|
||||||
|
LLVMValueRef voffset,
|
||||||
|
bool can_speculate)
|
||||||
|
{
|
||||||
|
LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
|
||||||
|
LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 1, 0), "");
|
||||||
|
stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), "");
|
||||||
|
|
||||||
|
LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder,
|
||||||
|
LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""),
|
||||||
|
elem_count, stride, "");
|
||||||
|
|
||||||
|
LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count,
|
||||||
|
LLVMConstInt(ctx->i32, 2, 0), "");
|
||||||
|
|
||||||
|
return ac_build_buffer_load_format(ctx, new_rsrc, vindex, voffset, can_speculate);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set range metadata on an instruction. This can only be used on load and
|
* Set range metadata on an instruction. This can only be used on load and
|
||||||
* call instructions. If you know an instruction can only produce the values
|
* call instructions. If you know an instruction can only produce the values
|
||||||
@@ -1865,3 +1903,174 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
|
|||||||
ctx->i32_0, ""),
|
ctx->i32_0, ""),
|
||||||
LLVMConstInt(ctx->i32, -1, 0), lsb, "");
|
LLVMConstInt(ctx->i32, -1, 0), lsb, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct ac_llvm_flow *
|
||||||
|
get_current_flow(struct ac_llvm_context *ctx)
|
||||||
|
{
|
||||||
|
if (ctx->flow_depth > 0)
|
||||||
|
return &ctx->flow[ctx->flow_depth - 1];
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ac_llvm_flow *
|
||||||
|
get_innermost_loop(struct ac_llvm_context *ctx)
|
||||||
|
{
|
||||||
|
for (unsigned i = ctx->flow_depth; i > 0; --i) {
|
||||||
|
if (ctx->flow[i - 1].loop_entry_block)
|
||||||
|
return &ctx->flow[i - 1];
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ac_llvm_flow *
|
||||||
|
push_flow(struct ac_llvm_context *ctx)
|
||||||
|
{
|
||||||
|
struct ac_llvm_flow *flow;
|
||||||
|
|
||||||
|
if (ctx->flow_depth >= ctx->flow_depth_max) {
|
||||||
|
unsigned new_max = MAX2(ctx->flow_depth << 1,
|
||||||
|
AC_LLVM_INITIAL_CF_DEPTH);
|
||||||
|
|
||||||
|
ctx->flow = realloc(ctx->flow, new_max * sizeof(*ctx->flow));
|
||||||
|
ctx->flow_depth_max = new_max;
|
||||||
|
}
|
||||||
|
|
||||||
|
flow = &ctx->flow[ctx->flow_depth];
|
||||||
|
ctx->flow_depth++;
|
||||||
|
|
||||||
|
flow->next_block = NULL;
|
||||||
|
flow->loop_entry_block = NULL;
|
||||||
|
return flow;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base,
|
||||||
|
int label_id)
|
||||||
|
{
|
||||||
|
char buf[32];
|
||||||
|
snprintf(buf, sizeof(buf), "%s%d", base, label_id);
|
||||||
|
LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Append a basic block at the level of the parent flow.
|
||||||
|
*/
|
||||||
|
static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx,
|
||||||
|
const char *name)
|
||||||
|
{
|
||||||
|
assert(ctx->flow_depth >= 1);
|
||||||
|
|
||||||
|
if (ctx->flow_depth >= 2) {
|
||||||
|
struct ac_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
|
||||||
|
|
||||||
|
return LLVMInsertBasicBlockInContext(ctx->context,
|
||||||
|
flow->next_block, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
LLVMValueRef main_fn =
|
||||||
|
LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder));
|
||||||
|
return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Emit a branch to the given default target for the current block if
|
||||||
|
* applicable -- that is, if the current block does not already contain a
|
||||||
|
* branch from a break or continue.
|
||||||
|
*/
|
||||||
|
static void emit_default_branch(LLVMBuilderRef builder,
|
||||||
|
LLVMBasicBlockRef target)
|
||||||
|
{
|
||||||
|
if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
|
||||||
|
LLVMBuildBr(builder, target);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id)
|
||||||
|
{
|
||||||
|
struct ac_llvm_flow *flow = push_flow(ctx);
|
||||||
|
flow->loop_entry_block = append_basic_block(ctx, "LOOP");
|
||||||
|
flow->next_block = append_basic_block(ctx, "ENDLOOP");
|
||||||
|
set_basicblock_name(flow->loop_entry_block, "loop", label_id);
|
||||||
|
LLVMBuildBr(ctx->builder, flow->loop_entry_block);
|
||||||
|
LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ac_build_break(struct ac_llvm_context *ctx)
|
||||||
|
{
|
||||||
|
struct ac_llvm_flow *flow = get_innermost_loop(ctx);
|
||||||
|
LLVMBuildBr(ctx->builder, flow->next_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ac_build_continue(struct ac_llvm_context *ctx)
|
||||||
|
{
|
||||||
|
struct ac_llvm_flow *flow = get_innermost_loop(ctx);
|
||||||
|
LLVMBuildBr(ctx->builder, flow->loop_entry_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ac_build_else(struct ac_llvm_context *ctx, int label_id)
|
||||||
|
{
|
||||||
|
struct ac_llvm_flow *current_branch = get_current_flow(ctx);
|
||||||
|
LLVMBasicBlockRef endif_block;
|
||||||
|
|
||||||
|
assert(!current_branch->loop_entry_block);
|
||||||
|
|
||||||
|
endif_block = append_basic_block(ctx, "ENDIF");
|
||||||
|
emit_default_branch(ctx->builder, endif_block);
|
||||||
|
|
||||||
|
LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
|
||||||
|
set_basicblock_name(current_branch->next_block, "else", label_id);
|
||||||
|
|
||||||
|
current_branch->next_block = endif_block;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
|
||||||
|
{
|
||||||
|
struct ac_llvm_flow *current_branch = get_current_flow(ctx);
|
||||||
|
|
||||||
|
assert(!current_branch->loop_entry_block);
|
||||||
|
|
||||||
|
emit_default_branch(ctx->builder, current_branch->next_block);
|
||||||
|
LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
|
||||||
|
set_basicblock_name(current_branch->next_block, "endif", label_id);
|
||||||
|
|
||||||
|
ctx->flow_depth--;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
|
||||||
|
{
|
||||||
|
struct ac_llvm_flow *current_loop = get_current_flow(ctx);
|
||||||
|
|
||||||
|
assert(current_loop->loop_entry_block);
|
||||||
|
|
||||||
|
emit_default_branch(ctx->builder, current_loop->loop_entry_block);
|
||||||
|
|
||||||
|
LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
|
||||||
|
set_basicblock_name(current_loop->next_block, "endloop", label_id);
|
||||||
|
ctx->flow_depth--;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void if_cond_emit(struct ac_llvm_context *ctx, LLVMValueRef cond,
|
||||||
|
int label_id)
|
||||||
|
{
|
||||||
|
struct ac_llvm_flow *flow = push_flow(ctx);
|
||||||
|
LLVMBasicBlockRef if_block;
|
||||||
|
|
||||||
|
if_block = append_basic_block(ctx, "IF");
|
||||||
|
flow->next_block = append_basic_block(ctx, "ELSE");
|
||||||
|
set_basicblock_name(if_block, "if", label_id);
|
||||||
|
LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block);
|
||||||
|
LLVMPositionBuilderAtEnd(ctx->builder, if_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
|
||||||
|
int label_id)
|
||||||
|
{
|
||||||
|
LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
|
||||||
|
value, ctx->f32_0, "");
|
||||||
|
if_cond_emit(ctx, cond, label_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
|
||||||
|
int label_id)
|
||||||
|
{
|
||||||
|
LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
|
||||||
|
ac_to_integer(ctx, value),
|
||||||
|
ctx->i32_0, "");
|
||||||
|
if_cond_emit(ctx, cond, label_id);
|
||||||
|
}
|
||||||
|
@@ -38,6 +38,8 @@ enum {
|
|||||||
AC_LOCAL_ADDR_SPACE = 3,
|
AC_LOCAL_ADDR_SPACE = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ac_llvm_flow;
|
||||||
|
|
||||||
struct ac_llvm_context {
|
struct ac_llvm_context {
|
||||||
LLVMContextRef context;
|
LLVMContextRef context;
|
||||||
LLVMModuleRef module;
|
LLVMModuleRef module;
|
||||||
@@ -70,6 +72,10 @@ struct ac_llvm_context {
|
|||||||
LLVMValueRef i1true;
|
LLVMValueRef i1true;
|
||||||
LLVMValueRef i1false;
|
LLVMValueRef i1false;
|
||||||
|
|
||||||
|
struct ac_llvm_flow *flow;
|
||||||
|
unsigned flow_depth;
|
||||||
|
unsigned flow_depth_max;
|
||||||
|
|
||||||
unsigned range_md_kind;
|
unsigned range_md_kind;
|
||||||
unsigned invariant_load_md_kind;
|
unsigned invariant_load_md_kind;
|
||||||
unsigned uniform_md_kind;
|
unsigned uniform_md_kind;
|
||||||
@@ -87,6 +93,9 @@ void
|
|||||||
ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
|
ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
|
||||||
enum chip_class chip_class, enum radeon_family family);
|
enum chip_class chip_class, enum radeon_family family);
|
||||||
|
|
||||||
|
void
|
||||||
|
ac_llvm_context_dispose(struct ac_llvm_context *ctx);
|
||||||
|
|
||||||
int
|
int
|
||||||
ac_get_llvm_num_components(LLVMValueRef value);
|
ac_get_llvm_num_components(LLVMValueRef value);
|
||||||
|
|
||||||
@@ -216,6 +225,14 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
|
|||||||
LLVMValueRef voffset,
|
LLVMValueRef voffset,
|
||||||
bool can_speculate);
|
bool can_speculate);
|
||||||
|
|
||||||
|
/* load_format that handles the stride & element count better if idxen is
|
||||||
|
* disabled by LLVM. */
|
||||||
|
LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
|
||||||
|
LLVMValueRef rsrc,
|
||||||
|
LLVMValueRef vindex,
|
||||||
|
LLVMValueRef voffset,
|
||||||
|
bool can_speculate);
|
||||||
|
|
||||||
LLVMValueRef
|
LLVMValueRef
|
||||||
ac_get_thread_id(struct ac_llvm_context *ctx);
|
ac_get_thread_id(struct ac_llvm_context *ctx);
|
||||||
|
|
||||||
@@ -326,6 +343,18 @@ void ac_lds_store(struct ac_llvm_context *ctx,
|
|||||||
LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
|
LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
|
||||||
LLVMTypeRef dst_type,
|
LLVMTypeRef dst_type,
|
||||||
LLVMValueRef src0);
|
LLVMValueRef src0);
|
||||||
|
|
||||||
|
void ac_build_bgnloop(struct ac_llvm_context *ctx, int lable_id);
|
||||||
|
void ac_build_break(struct ac_llvm_context *ctx);
|
||||||
|
void ac_build_continue(struct ac_llvm_context *ctx);
|
||||||
|
void ac_build_else(struct ac_llvm_context *ctx, int lable_id);
|
||||||
|
void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
|
||||||
|
void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
|
||||||
|
void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
|
||||||
|
int lable_id);
|
||||||
|
void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
|
||||||
|
int lable_id);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@@ -2308,11 +2308,19 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
|
|||||||
struct ac_image_args *args)
|
struct ac_image_args *args)
|
||||||
{
|
{
|
||||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
|
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
|
||||||
return ac_build_buffer_load_format(&ctx->ac,
|
if (ctx->abi->gfx9_stride_size_workaround) {
|
||||||
args->resource,
|
return ac_build_buffer_load_format_gfx9_safe(&ctx->ac,
|
||||||
args->addr,
|
args->resource,
|
||||||
ctx->ac.i32_0,
|
args->addr,
|
||||||
true);
|
ctx->ac.i32_0,
|
||||||
|
true);
|
||||||
|
} else {
|
||||||
|
return ac_build_buffer_load_format(&ctx->ac,
|
||||||
|
args->resource,
|
||||||
|
args->addr,
|
||||||
|
ctx->ac.i32_0,
|
||||||
|
true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
args->opcode = ac_image_sample;
|
args->opcode = ac_image_sample;
|
||||||
@@ -3609,6 +3617,25 @@ static LLVMValueRef get_image_coords(struct ac_nir_context *ctx,
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
|
||||||
|
const nir_intrinsic_instr *instr, bool write)
|
||||||
|
{
|
||||||
|
LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, write);
|
||||||
|
if (ctx->abi->gfx9_stride_size_workaround) {
|
||||||
|
LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
|
||||||
|
LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
|
||||||
|
stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
|
||||||
|
|
||||||
|
LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
|
||||||
|
LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
|
||||||
|
elem_count, stride, "");
|
||||||
|
|
||||||
|
rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
|
||||||
|
LLVMConstInt(ctx->ac.i32, 2, 0), "");
|
||||||
|
}
|
||||||
|
return rsrc;
|
||||||
|
}
|
||||||
|
|
||||||
static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
|
static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
|
||||||
const nir_intrinsic_instr *instr)
|
const nir_intrinsic_instr *instr)
|
||||||
{
|
{
|
||||||
@@ -3623,7 +3650,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
|
|||||||
|
|
||||||
type = glsl_without_array(type);
|
type = glsl_without_array(type);
|
||||||
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
|
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
|
||||||
params[0] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, false);
|
params[0] = get_image_buffer_descriptor(ctx, instr, false);
|
||||||
params[1] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
|
params[1] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
|
||||||
ctx->ac.i32_0, ""); /* vindex */
|
ctx->ac.i32_0, ""); /* vindex */
|
||||||
params[2] = ctx->ac.i32_0; /* voffset */
|
params[2] = ctx->ac.i32_0; /* voffset */
|
||||||
@@ -3685,8 +3712,10 @@ static void visit_image_store(struct ac_nir_context *ctx,
|
|||||||
glc = ctx->ac.i1true;
|
glc = ctx->ac.i1true;
|
||||||
|
|
||||||
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
|
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
|
||||||
|
LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true);
|
||||||
|
|
||||||
params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
|
params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
|
||||||
params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
|
params[1] = rsrc;
|
||||||
params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
|
params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
|
||||||
ctx->ac.i32_0, ""); /* vindex */
|
ctx->ac.i32_0, ""); /* vindex */
|
||||||
params[3] = ctx->ac.i32_0; /* voffset */
|
params[3] = ctx->ac.i32_0; /* voffset */
|
||||||
@@ -3778,8 +3807,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
|
|||||||
params[param_count++] = get_src(ctx, instr->src[2]);
|
params[param_count++] = get_src(ctx, instr->src[2]);
|
||||||
|
|
||||||
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
|
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
|
||||||
params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER,
|
params[param_count++] = get_image_buffer_descriptor(ctx, instr, true);
|
||||||
NULL, true, true);
|
|
||||||
params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
|
params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
|
||||||
ctx->ac.i32_0, ""); /* vindex */
|
ctx->ac.i32_0, ""); /* vindex */
|
||||||
params[param_count++] = ctx->ac.i32_0; /* voffset */
|
params[param_count++] = ctx->ac.i32_0; /* voffset */
|
||||||
@@ -5190,17 +5218,15 @@ static void visit_ssa_undef(struct ac_nir_context *ctx,
|
|||||||
_mesa_hash_table_insert(ctx->defs, &instr->def, undef);
|
_mesa_hash_table_insert(ctx->defs, &instr->def, undef);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void visit_jump(struct ac_nir_context *ctx,
|
static void visit_jump(struct ac_llvm_context *ctx,
|
||||||
const nir_jump_instr *instr)
|
const nir_jump_instr *instr)
|
||||||
{
|
{
|
||||||
switch (instr->type) {
|
switch (instr->type) {
|
||||||
case nir_jump_break:
|
case nir_jump_break:
|
||||||
LLVMBuildBr(ctx->ac.builder, ctx->break_block);
|
ac_build_break(ctx);
|
||||||
LLVMClearInsertionPosition(ctx->ac.builder);
|
|
||||||
break;
|
break;
|
||||||
case nir_jump_continue:
|
case nir_jump_continue:
|
||||||
LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
|
ac_build_continue(ctx);
|
||||||
LLVMClearInsertionPosition(ctx->ac.builder);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "Unknown NIR jump instr: ");
|
fprintf(stderr, "Unknown NIR jump instr: ");
|
||||||
@@ -5238,7 +5264,7 @@ static void visit_block(struct ac_nir_context *ctx, nir_block *block)
|
|||||||
visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
|
visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
|
||||||
break;
|
break;
|
||||||
case nir_instr_type_jump:
|
case nir_instr_type_jump:
|
||||||
visit_jump(ctx, nir_instr_as_jump(instr));
|
visit_jump(&ctx->ac, nir_instr_as_jump(instr));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "Unknown NIR instr type: ");
|
fprintf(stderr, "Unknown NIR instr type: ");
|
||||||
@@ -5255,56 +5281,34 @@ static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
|
|||||||
{
|
{
|
||||||
LLVMValueRef value = get_src(ctx, if_stmt->condition);
|
LLVMValueRef value = get_src(ctx, if_stmt->condition);
|
||||||
|
|
||||||
LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
|
nir_block *then_block =
|
||||||
LLVMBasicBlockRef merge_block =
|
(nir_block *) exec_list_get_head(&if_stmt->then_list);
|
||||||
LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
|
|
||||||
LLVMBasicBlockRef if_block =
|
|
||||||
LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
|
|
||||||
LLVMBasicBlockRef else_block = merge_block;
|
|
||||||
if (!exec_list_is_empty(&if_stmt->else_list))
|
|
||||||
else_block = LLVMAppendBasicBlockInContext(
|
|
||||||
ctx->ac.context, fn, "");
|
|
||||||
|
|
||||||
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, value,
|
ac_build_uif(&ctx->ac, value, then_block->index);
|
||||||
ctx->ac.i32_0, "");
|
|
||||||
LLVMBuildCondBr(ctx->ac.builder, cond, if_block, else_block);
|
|
||||||
|
|
||||||
LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block);
|
|
||||||
visit_cf_list(ctx, &if_stmt->then_list);
|
visit_cf_list(ctx, &if_stmt->then_list);
|
||||||
if (LLVMGetInsertBlock(ctx->ac.builder))
|
|
||||||
LLVMBuildBr(ctx->ac.builder, merge_block);
|
|
||||||
|
|
||||||
if (!exec_list_is_empty(&if_stmt->else_list)) {
|
if (!exec_list_is_empty(&if_stmt->else_list)) {
|
||||||
LLVMPositionBuilderAtEnd(ctx->ac.builder, else_block);
|
nir_block *else_block =
|
||||||
|
(nir_block *) exec_list_get_head(&if_stmt->else_list);
|
||||||
|
|
||||||
|
ac_build_else(&ctx->ac, else_block->index);
|
||||||
visit_cf_list(ctx, &if_stmt->else_list);
|
visit_cf_list(ctx, &if_stmt->else_list);
|
||||||
if (LLVMGetInsertBlock(ctx->ac.builder))
|
|
||||||
LLVMBuildBr(ctx->ac.builder, merge_block);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
|
ac_build_endif(&ctx->ac, then_block->index);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
|
static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
|
||||||
{
|
{
|
||||||
LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
|
nir_block *first_loop_block =
|
||||||
LLVMBasicBlockRef continue_parent = ctx->continue_block;
|
(nir_block *) exec_list_get_head(&loop->body);
|
||||||
LLVMBasicBlockRef break_parent = ctx->break_block;
|
|
||||||
|
|
||||||
ctx->continue_block =
|
ac_build_bgnloop(&ctx->ac, first_loop_block->index);
|
||||||
LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
|
|
||||||
ctx->break_block =
|
|
||||||
LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
|
|
||||||
|
|
||||||
LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
|
|
||||||
LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->continue_block);
|
|
||||||
visit_cf_list(ctx, &loop->body);
|
visit_cf_list(ctx, &loop->body);
|
||||||
|
|
||||||
if (LLVMGetInsertBlock(ctx->ac.builder))
|
ac_build_endloop(&ctx->ac, first_loop_block->index);
|
||||||
LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
|
|
||||||
LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->break_block);
|
|
||||||
|
|
||||||
ctx->continue_block = continue_parent;
|
|
||||||
ctx->break_block = break_parent;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void visit_cf_list(struct ac_nir_context *ctx,
|
static void visit_cf_list(struct ac_nir_context *ctx,
|
||||||
@@ -6629,6 +6633,8 @@ static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
|
|||||||
|
|
||||||
LLVMDisposeBuilder(ctx->builder);
|
LLVMDisposeBuilder(ctx->builder);
|
||||||
LLVMDisposePassManager(passmgr);
|
LLVMDisposePassManager(passmgr);
|
||||||
|
|
||||||
|
ac_llvm_context_dispose(&ctx->ac);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -6851,6 +6857,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
|
|||||||
ctx.abi.load_ssbo = radv_load_ssbo;
|
ctx.abi.load_ssbo = radv_load_ssbo;
|
||||||
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
|
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
|
||||||
ctx.abi.clamp_shadow_reference = false;
|
ctx.abi.clamp_shadow_reference = false;
|
||||||
|
ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
|
||||||
|
|
||||||
if (shader_count >= 2)
|
if (shader_count >= 2)
|
||||||
ac_init_exec_full_mask(&ctx.ac);
|
ac_init_exec_full_mask(&ctx.ac);
|
||||||
|
@@ -145,6 +145,10 @@ struct ac_shader_abi {
|
|||||||
/* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
|
/* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
|
||||||
* uses it due to promoting D16 to D32, but radv needs it off. */
|
* uses it due to promoting D16 to D32, but radv needs it off. */
|
||||||
bool clamp_shadow_reference;
|
bool clamp_shadow_reference;
|
||||||
|
|
||||||
|
/* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
|
||||||
|
* and LLVM optimizes an indexed load with constant index to IDXEN=0. */
|
||||||
|
bool gfx9_stride_size_workaround;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* AC_SHADER_ABI_H */
|
#endif /* AC_SHADER_ABI_H */
|
||||||
|
@@ -3006,20 +3006,9 @@ void radv_CmdExecuteCommands(
|
|||||||
secondary->state.last_ia_multi_vgt_param;
|
secondary->state.last_ia_multi_vgt_param;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (secondary->state.last_first_instance != -1) {
|
primary->state.last_first_instance = secondary->state.last_first_instance;
|
||||||
primary->state.last_first_instance =
|
primary->state.last_num_instances = secondary->state.last_num_instances;
|
||||||
secondary->state.last_first_instance;
|
primary->state.last_vertex_offset = secondary->state.last_vertex_offset;
|
||||||
}
|
|
||||||
|
|
||||||
if (secondary->state.last_num_instances != -1) {
|
|
||||||
primary->state.last_num_instances =
|
|
||||||
secondary->state.last_num_instances;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (secondary->state.last_vertex_offset != -1) {
|
|
||||||
primary->state.last_vertex_offset =
|
|
||||||
secondary->state.last_vertex_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (secondary->state.last_index_type != -1) {
|
if (secondary->state.last_index_type != -1) {
|
||||||
primary->state.last_index_type =
|
primary->state.last_index_type =
|
||||||
@@ -3364,7 +3353,7 @@ radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (state->last_num_instances != info->instance_count) {
|
if (state->last_num_instances != info->instance_count) {
|
||||||
radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, state->predicating));
|
radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false));
|
||||||
radeon_emit(cs, info->instance_count);
|
radeon_emit(cs, info->instance_count);
|
||||||
state->last_num_instances = info->instance_count;
|
state->last_num_instances = info->instance_count;
|
||||||
}
|
}
|
||||||
|
@@ -141,7 +141,7 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
|
|||||||
gart_index = device->memory_properties.memoryHeapCount++;
|
gart_index = device->memory_properties.memoryHeapCount++;
|
||||||
device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
|
device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
|
||||||
.size = device->rad_info.gart_size,
|
.size = device->rad_info.gart_size,
|
||||||
.flags = 0,
|
.flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -158,7 +158,8 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
|
|||||||
device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
|
device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
|
||||||
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
|
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
|
||||||
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||||
|
(device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
|
||||||
.heapIndex = gart_index,
|
.heapIndex = gart_index,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -176,7 +177,8 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
|
|||||||
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
|
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
|
||||||
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
|
VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
|
||||||
|
(device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
|
||||||
.heapIndex = gart_index,
|
.heapIndex = gart_index,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@@ -1172,7 +1172,7 @@ bool radv_layout_dcc_compressed(const struct radv_image *image,
|
|||||||
(queue_mask & (1u << RADV_QUEUE_COMPUTE)))
|
(queue_mask & (1u << RADV_QUEUE_COMPUTE)))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return image->surface.num_dcc_levels > 0 && layout != VK_IMAGE_LAYOUT_GENERAL;
|
return image->surface.dcc_size && layout != VK_IMAGE_LAYOUT_GENERAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -621,7 +621,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
|
struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
|
||||||
struct radv_image *src_img = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;
|
struct radv_image *src_img = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;
|
||||||
|
|
||||||
radv_pick_resolve_method_images(dst_img, src_img, dest_att.layout, cmd_buffer, &resolve_method);
|
radv_pick_resolve_method_images(src_img, dst_img, dest_att.layout, cmd_buffer, &resolve_method);
|
||||||
if (resolve_method == RESOLVE_FRAGMENT) {
|
if (resolve_method == RESOLVE_FRAGMENT) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@@ -647,10 +647,10 @@ static VkRect2D si_scissor_from_viewport(const VkViewport *viewport)
|
|||||||
|
|
||||||
get_viewport_xform(viewport, scale, translate);
|
get_viewport_xform(viewport, scale, translate);
|
||||||
|
|
||||||
rect.offset.x = translate[0] - abs(scale[0]);
|
rect.offset.x = translate[0] - fabs(scale[0]);
|
||||||
rect.offset.y = translate[1] - abs(scale[1]);
|
rect.offset.y = translate[1] - fabs(scale[1]);
|
||||||
rect.extent.width = ceilf(translate[0] + abs(scale[0])) - rect.offset.x;
|
rect.extent.width = ceilf(translate[0] + fabs(scale[0])) - rect.offset.x;
|
||||||
rect.extent.height = ceilf(translate[1] + abs(scale[1])) - rect.offset.y;
|
rect.extent.height = ceilf(translate[1] + fabs(scale[1])) - rect.offset.y;
|
||||||
|
|
||||||
return rect;
|
return rect;
|
||||||
}
|
}
|
||||||
|
@@ -66,6 +66,10 @@ struct radv_amdgpu_cs {
|
|||||||
struct radeon_winsys_bo **virtual_buffers;
|
struct radeon_winsys_bo **virtual_buffers;
|
||||||
uint8_t *virtual_buffer_priorities;
|
uint8_t *virtual_buffer_priorities;
|
||||||
int *virtual_buffer_hash_table;
|
int *virtual_buffer_hash_table;
|
||||||
|
|
||||||
|
/* For chips that don't support chaining. */
|
||||||
|
struct radeon_winsys_cs *old_cs_buffers;
|
||||||
|
unsigned num_old_cs_buffers;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct radv_amdgpu_cs *
|
static inline struct radv_amdgpu_cs *
|
||||||
@@ -166,6 +170,12 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
|
|||||||
for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
|
for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
|
||||||
cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
|
cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
|
||||||
|
struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i];
|
||||||
|
free(rcs->buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(cs->old_cs_buffers);
|
||||||
free(cs->old_ib_buffers);
|
free(cs->old_ib_buffers);
|
||||||
free(cs->virtual_buffers);
|
free(cs->virtual_buffers);
|
||||||
free(cs->virtual_buffer_priorities);
|
free(cs->virtual_buffer_priorities);
|
||||||
@@ -251,9 +261,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size)
|
|||||||
/* The total ib size cannot exceed limit_dws dwords. */
|
/* The total ib size cannot exceed limit_dws dwords. */
|
||||||
if (ib_dws > limit_dws)
|
if (ib_dws > limit_dws)
|
||||||
{
|
{
|
||||||
cs->failed = true;
|
/* The maximum size in dwords has been reached,
|
||||||
|
* try to allocate a new one.
|
||||||
|
*/
|
||||||
|
if (cs->num_old_cs_buffers + 1 >= AMDGPU_CS_MAX_IBS_PER_SUBMIT) {
|
||||||
|
/* TODO: Allow to submit more than 4 IBs. */
|
||||||
|
fprintf(stderr, "amdgpu: Maximum number of IBs "
|
||||||
|
"per submit reached.\n");
|
||||||
|
cs->failed = true;
|
||||||
|
cs->base.cdw = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
cs->old_cs_buffers =
|
||||||
|
realloc(cs->old_cs_buffers,
|
||||||
|
(cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers));
|
||||||
|
if (!cs->old_cs_buffers) {
|
||||||
|
cs->failed = true;
|
||||||
|
cs->base.cdw = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Store the current one for submitting it later. */
|
||||||
|
cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = cs->base.cdw;
|
||||||
|
cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = cs->base.max_dw;
|
||||||
|
cs->old_cs_buffers[cs->num_old_cs_buffers].buf = cs->base.buf;
|
||||||
|
cs->num_old_cs_buffers++;
|
||||||
|
|
||||||
|
/* Reset the cs, it will be re-allocated below. */
|
||||||
cs->base.cdw = 0;
|
cs->base.cdw = 0;
|
||||||
return;
|
cs->base.buf = NULL;
|
||||||
|
|
||||||
|
/* Re-compute the number of dwords to allocate. */
|
||||||
|
ib_dws = MAX2(cs->base.cdw + min_size,
|
||||||
|
MIN2(cs->base.max_dw * 2, limit_dws));
|
||||||
|
if (ib_dws > limit_dws) {
|
||||||
|
fprintf(stderr, "amdgpu: Too high number of "
|
||||||
|
"dwords to allocate\n");
|
||||||
|
cs->failed = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
|
uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
|
||||||
@@ -365,6 +412,15 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
|
|||||||
cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
|
cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
|
||||||
cs->ib_size_ptr = &cs->ib.size;
|
cs->ib_size_ptr = &cs->ib.size;
|
||||||
cs->ib.size = 0;
|
cs->ib.size = 0;
|
||||||
|
} else {
|
||||||
|
for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
|
||||||
|
struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i];
|
||||||
|
free(rcs->buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(cs->old_cs_buffers);
|
||||||
|
cs->old_cs_buffers = NULL;
|
||||||
|
cs->num_old_cs_buffers = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -515,7 +571,8 @@ static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
|
|||||||
static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
|
static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
|
||||||
struct radeon_winsys_cs **cs_array,
|
struct radeon_winsys_cs **cs_array,
|
||||||
unsigned count,
|
unsigned count,
|
||||||
struct radv_amdgpu_winsys_bo *extra_bo,
|
struct radv_amdgpu_winsys_bo **extra_bo_array,
|
||||||
|
unsigned num_extra_bo,
|
||||||
struct radeon_winsys_cs *extra_cs,
|
struct radeon_winsys_cs *extra_cs,
|
||||||
amdgpu_bo_list_handle *bo_list)
|
amdgpu_bo_list_handle *bo_list)
|
||||||
{
|
{
|
||||||
@@ -544,7 +601,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
|
|||||||
bo_list);
|
bo_list);
|
||||||
free(handles);
|
free(handles);
|
||||||
pthread_mutex_unlock(&ws->global_bo_list_lock);
|
pthread_mutex_unlock(&ws->global_bo_list_lock);
|
||||||
} else if (count == 1 && !extra_bo && !extra_cs &&
|
} else if (count == 1 && !num_extra_bo && !extra_cs &&
|
||||||
!radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
|
!radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
|
||||||
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
|
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
|
||||||
if (cs->num_buffers == 0) {
|
if (cs->num_buffers == 0) {
|
||||||
@@ -554,8 +611,8 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
|
|||||||
r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
|
r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
|
||||||
cs->priorities, bo_list);
|
cs->priorities, bo_list);
|
||||||
} else {
|
} else {
|
||||||
unsigned total_buffer_count = !!extra_bo;
|
unsigned total_buffer_count = num_extra_bo;
|
||||||
unsigned unique_bo_count = !!extra_bo;
|
unsigned unique_bo_count = num_extra_bo;
|
||||||
for (unsigned i = 0; i < count; ++i) {
|
for (unsigned i = 0; i < count; ++i) {
|
||||||
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
|
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
|
||||||
total_buffer_count += cs->num_buffers;
|
total_buffer_count += cs->num_buffers;
|
||||||
@@ -578,9 +635,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (extra_bo) {
|
for (unsigned i = 0; i < num_extra_bo; i++) {
|
||||||
handles[0] = extra_bo->bo;
|
handles[i] = extra_bo_array[i]->bo;
|
||||||
priorities[0] = 8;
|
priorities[i] = 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < count + !!extra_cs; ++i) {
|
for (unsigned i = 0; i < count + !!extra_cs; ++i) {
|
||||||
@@ -710,7 +767,8 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs, &bo_list);
|
r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, 0, initial_preamble_cs,
|
||||||
|
&bo_list);
|
||||||
if (r) {
|
if (r) {
|
||||||
fprintf(stderr, "amdgpu: buffer list creation failed for the "
|
fprintf(stderr, "amdgpu: buffer list creation failed for the "
|
||||||
"chained submission(%d)\n", r);
|
"chained submission(%d)\n", r);
|
||||||
@@ -777,7 +835,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
|
|||||||
|
|
||||||
memset(&request, 0, sizeof(request));
|
memset(&request, 0, sizeof(request));
|
||||||
|
|
||||||
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL,
|
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0,
|
||||||
preamble_cs, &bo_list);
|
preamble_cs, &bo_list);
|
||||||
if (r) {
|
if (r) {
|
||||||
fprintf(stderr, "amdgpu: buffer list creation failed "
|
fprintf(stderr, "amdgpu: buffer list creation failed "
|
||||||
@@ -857,68 +915,127 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
|
|||||||
assert(cs_count);
|
assert(cs_count);
|
||||||
|
|
||||||
for (unsigned i = 0; i < cs_count;) {
|
for (unsigned i = 0; i < cs_count;) {
|
||||||
struct amdgpu_cs_ib_info ib = {0};
|
struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0};
|
||||||
struct radeon_winsys_bo *bo = NULL;
|
unsigned number_of_ibs = 1;
|
||||||
|
struct radeon_winsys_bo *bos[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0};
|
||||||
struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
|
struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
|
||||||
|
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
|
||||||
uint32_t *ptr;
|
uint32_t *ptr;
|
||||||
unsigned cnt = 0;
|
unsigned cnt = 0;
|
||||||
unsigned size = 0;
|
unsigned size = 0;
|
||||||
unsigned pad_words = 0;
|
unsigned pad_words = 0;
|
||||||
if (preamble_cs)
|
|
||||||
size += preamble_cs->cdw;
|
|
||||||
|
|
||||||
while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
|
if (cs->num_old_cs_buffers > 0) {
|
||||||
size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
|
/* Special path when the maximum size in dwords has
|
||||||
++cnt;
|
* been reached because we need to handle more than one
|
||||||
|
* IB per submit.
|
||||||
|
*/
|
||||||
|
unsigned new_cs_count = cs->num_old_cs_buffers + 1;
|
||||||
|
struct radeon_winsys_cs *new_cs_array[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
|
||||||
|
unsigned idx = 0;
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < cs->num_old_cs_buffers; j++)
|
||||||
|
new_cs_array[idx++] = &cs->old_cs_buffers[j];
|
||||||
|
new_cs_array[idx++] = cs_array[i];
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < new_cs_count; j++) {
|
||||||
|
struct radeon_winsys_cs *rcs = new_cs_array[j];
|
||||||
|
bool needs_preamble = preamble_cs && j == 0;
|
||||||
|
unsigned size = 0;
|
||||||
|
|
||||||
|
if (needs_preamble)
|
||||||
|
size += preamble_cs->cdw;
|
||||||
|
size += rcs->cdw;
|
||||||
|
|
||||||
|
assert(size < 0xffff8);
|
||||||
|
|
||||||
|
while (!size || (size & 7)) {
|
||||||
|
size++;
|
||||||
|
pad_words++;
|
||||||
|
}
|
||||||
|
|
||||||
|
bos[j] = ws->buffer_create(ws, 4 * size, 4096,
|
||||||
|
RADEON_DOMAIN_GTT,
|
||||||
|
RADEON_FLAG_CPU_ACCESS |
|
||||||
|
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||||
|
RADEON_FLAG_READ_ONLY);
|
||||||
|
ptr = ws->buffer_map(bos[j]);
|
||||||
|
|
||||||
|
if (needs_preamble) {
|
||||||
|
memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
|
||||||
|
ptr += preamble_cs->cdw;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(ptr, rcs->buf, 4 * rcs->cdw);
|
||||||
|
ptr += rcs->cdw;
|
||||||
|
|
||||||
|
for (unsigned k = 0; k < pad_words; ++k)
|
||||||
|
*ptr++ = pad_word;
|
||||||
|
|
||||||
|
ibs[j].size = size;
|
||||||
|
ibs[j].ib_mc_address = radv_buffer_get_va(bos[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
number_of_ibs = new_cs_count;
|
||||||
|
cnt++;
|
||||||
|
} else {
|
||||||
|
if (preamble_cs)
|
||||||
|
size += preamble_cs->cdw;
|
||||||
|
|
||||||
|
while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
|
||||||
|
size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
|
||||||
|
++cnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!size || (size & 7)) {
|
||||||
|
size++;
|
||||||
|
pad_words++;
|
||||||
|
}
|
||||||
|
assert(cnt);
|
||||||
|
|
||||||
|
bos[0] = ws->buffer_create(ws, 4 * size, 4096,
|
||||||
|
RADEON_DOMAIN_GTT,
|
||||||
|
RADEON_FLAG_CPU_ACCESS |
|
||||||
|
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||||
|
RADEON_FLAG_READ_ONLY);
|
||||||
|
ptr = ws->buffer_map(bos[0]);
|
||||||
|
|
||||||
|
if (preamble_cs) {
|
||||||
|
memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
|
||||||
|
ptr += preamble_cs->cdw;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < cnt; ++j) {
|
||||||
|
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
|
||||||
|
memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
|
||||||
|
ptr += cs->base.cdw;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < pad_words; ++j)
|
||||||
|
*ptr++ = pad_word;
|
||||||
|
|
||||||
|
ibs[0].size = size;
|
||||||
|
ibs[0].ib_mc_address = radv_buffer_get_va(bos[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
while(!size || (size & 7)) {
|
|
||||||
size++;
|
|
||||||
pad_words++;
|
|
||||||
}
|
|
||||||
assert(cnt);
|
|
||||||
|
|
||||||
bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT,
|
|
||||||
RADEON_FLAG_CPU_ACCESS |
|
|
||||||
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
|
||||||
RADEON_FLAG_READ_ONLY);
|
|
||||||
ptr = ws->buffer_map(bo);
|
|
||||||
|
|
||||||
if (preamble_cs) {
|
|
||||||
memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
|
|
||||||
ptr += preamble_cs->cdw;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned j = 0; j < cnt; ++j) {
|
|
||||||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
|
|
||||||
memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
|
|
||||||
ptr += cs->base.cdw;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned j = 0; j < pad_words; ++j)
|
|
||||||
*ptr++ = pad_word;
|
|
||||||
|
|
||||||
memset(&request, 0, sizeof(request));
|
|
||||||
|
|
||||||
|
|
||||||
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
|
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
|
||||||
(struct radv_amdgpu_winsys_bo*)bo,
|
(struct radv_amdgpu_winsys_bo **)bos,
|
||||||
preamble_cs, &bo_list);
|
number_of_ibs, preamble_cs,
|
||||||
|
&bo_list);
|
||||||
if (r) {
|
if (r) {
|
||||||
fprintf(stderr, "amdgpu: buffer list creation failed "
|
fprintf(stderr, "amdgpu: buffer list creation failed "
|
||||||
"for the sysmem submission (%d)\n", r);
|
"for the sysmem submission (%d)\n", r);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
ib.size = size;
|
memset(&request, 0, sizeof(request));
|
||||||
ib.ib_mc_address = radv_buffer_get_va(bo);
|
|
||||||
|
|
||||||
request.ip_type = cs0->hw_ip;
|
request.ip_type = cs0->hw_ip;
|
||||||
request.ring = queue_idx;
|
request.ring = queue_idx;
|
||||||
request.resources = bo_list;
|
request.resources = bo_list;
|
||||||
request.number_of_ibs = 1;
|
request.number_of_ibs = number_of_ibs;
|
||||||
request.ibs = &ib;
|
request.ibs = ibs;
|
||||||
request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
|
request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
|
||||||
|
|
||||||
sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
|
sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
|
||||||
@@ -934,9 +1051,11 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
|
|||||||
if (bo_list)
|
if (bo_list)
|
||||||
amdgpu_bo_list_destroy(bo_list);
|
amdgpu_bo_list_destroy(bo_list);
|
||||||
|
|
||||||
ws->buffer_destroy(bo);
|
for (unsigned j = 0; j < number_of_ibs; j++) {
|
||||||
if (r)
|
ws->buffer_destroy(bos[j]);
|
||||||
return r;
|
if (r)
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
i += cnt;
|
i += cnt;
|
||||||
}
|
}
|
||||||
|
@@ -2239,6 +2239,24 @@ do_common_optimization(exec_list *ir, bool linked,
|
|||||||
loop_progress = false;
|
loop_progress = false;
|
||||||
loop_progress |= do_constant_propagation(ir);
|
loop_progress |= do_constant_propagation(ir);
|
||||||
loop_progress |= do_if_simplification(ir);
|
loop_progress |= do_if_simplification(ir);
|
||||||
|
|
||||||
|
/* Some drivers only call do_common_optimization() once rather
|
||||||
|
* than in a loop. So we must call do_lower_jumps() after
|
||||||
|
* unrolling a loop because for drivers that use LLVM validation
|
||||||
|
* will fail if a jump is not the last instruction in the block.
|
||||||
|
* For example the following will fail LLVM validation:
|
||||||
|
*
|
||||||
|
* (loop (
|
||||||
|
* ...
|
||||||
|
* break
|
||||||
|
* (assign (x) (var_ref v124) (expression int + (var_ref v124)
|
||||||
|
* (constant int (1)) ) )
|
||||||
|
* ))
|
||||||
|
*/
|
||||||
|
loop_progress |= do_lower_jumps(ir, true, true,
|
||||||
|
options->EmitNoMainReturn,
|
||||||
|
options->EmitNoCont,
|
||||||
|
options->EmitNoLoops);
|
||||||
}
|
}
|
||||||
progress |= loop_progress;
|
progress |= loop_progress;
|
||||||
}
|
}
|
||||||
|
@@ -519,7 +519,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
|
|||||||
* isn't any additional unknown terminators, or any other jumps nested
|
* isn't any additional unknown terminators, or any other jumps nested
|
||||||
* inside futher ifs.
|
* inside futher ifs.
|
||||||
*/
|
*/
|
||||||
if (ls->num_loop_jumps != 2)
|
if (ls->num_loop_jumps != 2 || ls->terminators.length() != 2)
|
||||||
return visit_continue;
|
return visit_continue;
|
||||||
|
|
||||||
ir_instruction *first_ir =
|
ir_instruction *first_ir =
|
||||||
@@ -528,8 +528,6 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
|
|||||||
unsigned term_count = 0;
|
unsigned term_count = 0;
|
||||||
bool first_term_then_continue = false;
|
bool first_term_then_continue = false;
|
||||||
foreach_in_list(loop_terminator, t, &ls->terminators) {
|
foreach_in_list(loop_terminator, t, &ls->terminators) {
|
||||||
assert(term_count < 2);
|
|
||||||
|
|
||||||
ir_if *ir_if = t->ir->as_if();
|
ir_if *ir_if = t->ir->as_if();
|
||||||
assert(ir_if != NULL);
|
assert(ir_if != NULL);
|
||||||
|
|
||||||
|
@@ -445,7 +445,7 @@ LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
|||||||
/* src[] = { offset }. const_index[] = { base, component } */
|
/* src[] = { offset }. const_index[] = { base, component } */
|
||||||
LOAD(output, 1, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
LOAD(output, 1, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||||
/* src[] = { vertex, offset }. const_index[] = { base, component } */
|
/* src[] = { vertex, offset }. const_index[] = { base, component } */
|
||||||
LOAD(per_vertex_output, 2, 1, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
LOAD(per_vertex_output, 2, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||||
/* src[] = { offset }. const_index[] = { base } */
|
/* src[] = { offset }. const_index[] = { base } */
|
||||||
LOAD(shared, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
LOAD(shared, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||||
/* src[] = { offset }. const_index[] = { base, range } */
|
/* src[] = { offset }. const_index[] = { base, range } */
|
||||||
|
@@ -95,9 +95,15 @@ emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
|
|||||||
if (src == NULL) {
|
if (src == NULL) {
|
||||||
/* This is a load instruction */
|
/* This is a load instruction */
|
||||||
nir_intrinsic_instr *load =
|
nir_intrinsic_instr *load =
|
||||||
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
|
nir_intrinsic_instr_create(b->shader, orig_instr->intrinsic);
|
||||||
load->num_components = orig_instr->num_components;
|
load->num_components = orig_instr->num_components;
|
||||||
load->variables[0] = nir_deref_var_clone(deref, load);
|
load->variables[0] = nir_deref_var_clone(deref, load);
|
||||||
|
|
||||||
|
/* Copy over any sources. This is needed for interp_var_at */
|
||||||
|
for (unsigned i = 0;
|
||||||
|
i < nir_intrinsic_infos[orig_instr->intrinsic].num_srcs; i++)
|
||||||
|
nir_src_copy(&load->src[i], &orig_instr->src[i], load);
|
||||||
|
|
||||||
unsigned bit_size = orig_instr->dest.ssa.bit_size;
|
unsigned bit_size = orig_instr->dest.ssa.bit_size;
|
||||||
nir_ssa_dest_init(&load->instr, &load->dest,
|
nir_ssa_dest_init(&load->instr, &load->dest,
|
||||||
load->num_components, bit_size, NULL);
|
load->num_components, bit_size, NULL);
|
||||||
@@ -142,6 +148,9 @@ lower_indirect_block(nir_block *block, nir_builder *b,
|
|||||||
|
|
||||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||||
if (intrin->intrinsic != nir_intrinsic_load_var &&
|
if (intrin->intrinsic != nir_intrinsic_load_var &&
|
||||||
|
intrin->intrinsic != nir_intrinsic_interp_var_at_centroid &&
|
||||||
|
intrin->intrinsic != nir_intrinsic_interp_var_at_sample &&
|
||||||
|
intrin->intrinsic != nir_intrinsic_interp_var_at_offset &&
|
||||||
intrin->intrinsic != nir_intrinsic_store_var)
|
intrin->intrinsic != nir_intrinsic_store_var)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@@ -158,7 +167,7 @@ lower_indirect_block(nir_block *block, nir_builder *b,
|
|||||||
|
|
||||||
b->cursor = nir_before_instr(&intrin->instr);
|
b->cursor = nir_before_instr(&intrin->instr);
|
||||||
|
|
||||||
if (intrin->intrinsic == nir_intrinsic_load_var) {
|
if (intrin->intrinsic != nir_intrinsic_store_var) {
|
||||||
nir_ssa_def *result;
|
nir_ssa_def *result;
|
||||||
emit_load_store(b, intrin, intrin->variables[0],
|
emit_load_store(b, intrin, intrin->variables[0],
|
||||||
&intrin->variables[0]->deref, &result, NULL);
|
&intrin->variables[0]->deref, &result, NULL);
|
||||||
|
@@ -464,7 +464,7 @@ lower_copies_to_load_store(struct deref_node *node,
|
|||||||
|
|
||||||
struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy);
|
struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy);
|
||||||
assert(arg_entry);
|
assert(arg_entry);
|
||||||
_mesa_set_remove(node->copies, arg_entry);
|
_mesa_set_remove(arg_node->copies, arg_entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
nir_instr_remove(©->instr);
|
nir_instr_remove(©->instr);
|
||||||
|
@@ -230,6 +230,7 @@ lower_vec_to_movs_block(nir_block *block, nir_function_impl *impl)
|
|||||||
continue; /* The loop */
|
continue; /* The loop */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool vec_had_ssa_dest = vec->dest.dest.is_ssa;
|
||||||
if (vec->dest.dest.is_ssa) {
|
if (vec->dest.dest.is_ssa) {
|
||||||
/* Since we insert multiple MOVs, we have a register destination. */
|
/* Since we insert multiple MOVs, we have a register destination. */
|
||||||
nir_register *reg = nir_local_reg_create(impl);
|
nir_register *reg = nir_local_reg_create(impl);
|
||||||
@@ -263,7 +264,11 @@ lower_vec_to_movs_block(nir_block *block, nir_function_impl *impl)
|
|||||||
if (!(vec->dest.write_mask & (1 << i)))
|
if (!(vec->dest.write_mask & (1 << i)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!(finished_write_mask & (1 << i)))
|
/* Coalescing moves the register writes from the vec up to the ALU
|
||||||
|
* instruction in the source. We can only do this if the original
|
||||||
|
* vecN had an SSA destination.
|
||||||
|
*/
|
||||||
|
if (vec_had_ssa_dest && !(finished_write_mask & (1 << i)))
|
||||||
finished_write_mask |= try_coalesce(vec, i);
|
finished_write_mask |= try_coalesce(vec, i);
|
||||||
|
|
||||||
if (!(finished_write_mask & (1 << i)))
|
if (!(finished_write_mask & (1 << i)))
|
||||||
|
@@ -39,10 +39,10 @@
|
|||||||
#define LOOP_UNROLL_LIMIT 96
|
#define LOOP_UNROLL_LIMIT 96
|
||||||
|
|
||||||
/* Prepare this loop for unrolling by first converting to lcssa and then
|
/* Prepare this loop for unrolling by first converting to lcssa and then
|
||||||
* converting the phis from the loops first block and the block that follows
|
* converting the phis from the top level of the loop body to regs.
|
||||||
* the loop into regs. Partially converting out of SSA allows us to unroll
|
* Partially converting out of SSA allows us to unroll the loop without having
|
||||||
* the loop without having to keep track of and update phis along the way
|
* to keep track of and update phis along the way which gets tricky and
|
||||||
* which gets tricky and doesn't add much value over conveting to regs.
|
* doesn't add much value over converting to regs.
|
||||||
*
|
*
|
||||||
* The loop may have a continue instruction at the end of the loop which does
|
* The loop may have a continue instruction at the end of the loop which does
|
||||||
* nothing. Once we're out of SSA, we can safely delete it so we don't have
|
* nothing. Once we're out of SSA, we can safely delete it so we don't have
|
||||||
@@ -53,13 +53,20 @@ loop_prepare_for_unroll(nir_loop *loop)
|
|||||||
{
|
{
|
||||||
nir_convert_loop_to_lcssa(loop);
|
nir_convert_loop_to_lcssa(loop);
|
||||||
|
|
||||||
nir_lower_phis_to_regs_block(nir_loop_first_block(loop));
|
/* Lower phis at the top level of the loop body */
|
||||||
|
foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
|
||||||
|
if (nir_cf_node_block == node->type) {
|
||||||
|
nir_lower_phis_to_regs_block(nir_cf_node_as_block(node));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Lower phis after the loop */
|
||||||
nir_block *block_after_loop =
|
nir_block *block_after_loop =
|
||||||
nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
|
nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
|
||||||
|
|
||||||
nir_lower_phis_to_regs_block(block_after_loop);
|
nir_lower_phis_to_regs_block(block_after_loop);
|
||||||
|
|
||||||
|
/* Remove continue if its the last instruction in the loop */
|
||||||
nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
|
nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
|
||||||
if (last_instr && last_instr->type == nir_instr_type_jump) {
|
if (last_instr && last_instr->type == nir_instr_type_jump) {
|
||||||
assert(nir_instr_as_jump(last_instr)->type == nir_jump_continue);
|
assert(nir_instr_as_jump(last_instr)->type == nir_jump_continue);
|
||||||
|
@@ -1879,7 +1879,6 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
|||||||
const struct glsl_type *image_type = sampled.type->type;
|
const struct glsl_type *image_type = sampled.type->type;
|
||||||
const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image_type);
|
const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image_type);
|
||||||
const bool is_array = glsl_sampler_type_is_array(image_type);
|
const bool is_array = glsl_sampler_type_is_array(image_type);
|
||||||
const bool is_shadow = glsl_sampler_type_is_shadow(image_type);
|
|
||||||
|
|
||||||
/* Figure out the base texture operation */
|
/* Figure out the base texture operation */
|
||||||
nir_texop texop;
|
nir_texop texop;
|
||||||
@@ -2003,6 +2002,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_shadow = false;
|
||||||
unsigned gather_component = 0;
|
unsigned gather_component = 0;
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case SpvOpImageSampleDrefImplicitLod:
|
case SpvOpImageSampleDrefImplicitLod:
|
||||||
@@ -2011,6 +2011,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
|||||||
case SpvOpImageSampleProjDrefExplicitLod:
|
case SpvOpImageSampleProjDrefExplicitLod:
|
||||||
case SpvOpImageDrefGather:
|
case SpvOpImageDrefGather:
|
||||||
/* These all have an explicit depth value as their next source */
|
/* These all have an explicit depth value as their next source */
|
||||||
|
is_shadow = true;
|
||||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparator);
|
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparator);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@@ -1769,7 +1769,7 @@ dri2_wl_swrast_commit_backbuffer(struct dri2_egl_surface *dri2_surf)
|
|||||||
* handle the commit and send a release event before checking for a free
|
* handle the commit and send a release event before checking for a free
|
||||||
* buffer */
|
* buffer */
|
||||||
if (dri2_surf->throttle_callback == NULL) {
|
if (dri2_surf->throttle_callback == NULL) {
|
||||||
dri2_surf->throttle_callback = wl_display_sync(dri2_dpy->wl_dpy_wrapper);
|
dri2_surf->throttle_callback = wl_display_sync(dri2_surf->wl_dpy_wrapper);
|
||||||
wl_callback_add_listener(dri2_surf->throttle_callback,
|
wl_callback_add_listener(dri2_surf->throttle_callback,
|
||||||
&throttle_listener, dri2_surf);
|
&throttle_listener, dri2_surf);
|
||||||
}
|
}
|
||||||
|
@@ -167,7 +167,7 @@ pb_cache_reclaim_buffer(struct pb_cache *mgr, pb_size size,
|
|||||||
cur_entry = LIST_ENTRY(struct pb_cache_entry, cur, head);
|
cur_entry = LIST_ENTRY(struct pb_cache_entry, cur, head);
|
||||||
|
|
||||||
if (!entry && (ret = pb_cache_is_buffer_compat(cur_entry, size,
|
if (!entry && (ret = pb_cache_is_buffer_compat(cur_entry, size,
|
||||||
alignment, usage) > 0))
|
alignment, usage)) > 0)
|
||||||
entry = cur_entry;
|
entry = cur_entry;
|
||||||
else if (os_time_timeout(cur_entry->start, cur_entry->end, now))
|
else if (os_time_timeout(cur_entry->start, cur_entry->end, now))
|
||||||
destroy_buffer_locked(cur_entry);
|
destroy_buffer_locked(cur_entry);
|
||||||
|
@@ -302,7 +302,7 @@ texture_format_needs_swiz(enum pipe_format fmt)
|
|||||||
bool swiz = false;
|
bool swiz = false;
|
||||||
|
|
||||||
if (formats[fmt].present)
|
if (formats[fmt].present)
|
||||||
swiz = !memcmp(def, formats[fmt].tex_swiz, sizeof(formats[fmt].tex_swiz));
|
swiz = !!memcmp(def, formats[fmt].tex_swiz, sizeof(formats[fmt].tex_swiz));
|
||||||
|
|
||||||
return swiz;
|
return swiz;
|
||||||
}
|
}
|
||||||
|
@@ -73,6 +73,16 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format forma
|
|||||||
aligned_height = align(aligned_height, heightalign);
|
aligned_height = align(aligned_height, heightalign);
|
||||||
} else {
|
} else {
|
||||||
pitchalign = 64;
|
pitchalign = 64;
|
||||||
|
|
||||||
|
/* The blits used for mem<->gmem work at a granularity of
|
||||||
|
* 32x32, which can cause faults due to over-fetch on the
|
||||||
|
* last level. The simple solution is to over-allocate a
|
||||||
|
* bit the last level to ensure any over-fetch is harmless.
|
||||||
|
* The pitch is already sufficiently aligned, but height
|
||||||
|
* may not be:
|
||||||
|
*/
|
||||||
|
if ((level == prsc->last_level) && (prsc->target != PIPE_BUFFER))
|
||||||
|
aligned_height = align(aligned_height, 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (layout == UTIL_FORMAT_LAYOUT_ASTC)
|
if (layout == UTIL_FORMAT_LAYOUT_ASTC)
|
||||||
|
@@ -247,7 +247,6 @@ static void rvce_destroy(struct pipe_video_codec *encoder)
|
|||||||
enc->fb = &fb;
|
enc->fb = &fb;
|
||||||
enc->session(enc);
|
enc->session(enc);
|
||||||
enc->destroy(enc);
|
enc->destroy(enc);
|
||||||
enc->feedback(enc);
|
|
||||||
flush(enc);
|
flush(enc);
|
||||||
si_vid_destroy_buffer(&fb);
|
si_vid_destroy_buffer(&fb);
|
||||||
}
|
}
|
||||||
|
@@ -421,6 +421,8 @@ static void destroy(struct rvce_encoder *enc)
|
|||||||
{
|
{
|
||||||
enc->task_info(enc, 0x00000001, 0, 0, 0);
|
enc->task_info(enc, 0x00000001, 0, 0, 0);
|
||||||
|
|
||||||
|
feedback(enc);
|
||||||
|
|
||||||
RVCE_BEGIN(0x02000001); // destroy
|
RVCE_BEGIN(0x02000001); // destroy
|
||||||
RVCE_END();
|
RVCE_END();
|
||||||
}
|
}
|
||||||
|
@@ -458,14 +458,6 @@ static void config_extension(struct rvce_encoder *enc)
|
|||||||
RVCE_END();
|
RVCE_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void destroy(struct rvce_encoder *enc)
|
|
||||||
{
|
|
||||||
enc->task_info(enc, 0x00000001, 0, 0, 0);
|
|
||||||
|
|
||||||
RVCE_BEGIN(0x02000001); // destroy
|
|
||||||
RVCE_END();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void feedback(struct rvce_encoder *enc)
|
static void feedback(struct rvce_encoder *enc)
|
||||||
{
|
{
|
||||||
RVCE_BEGIN(0x05000005); // feedback buffer
|
RVCE_BEGIN(0x05000005); // feedback buffer
|
||||||
@@ -474,6 +466,16 @@ static void feedback(struct rvce_encoder *enc)
|
|||||||
RVCE_END();
|
RVCE_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void destroy(struct rvce_encoder *enc)
|
||||||
|
{
|
||||||
|
enc->task_info(enc, 0x00000001, 0, 0, 0);
|
||||||
|
|
||||||
|
feedback(enc);
|
||||||
|
|
||||||
|
RVCE_BEGIN(0x02000001); // destroy
|
||||||
|
RVCE_END();
|
||||||
|
}
|
||||||
|
|
||||||
static void motion_estimation(struct rvce_encoder *enc)
|
static void motion_estimation(struct rvce_encoder *enc)
|
||||||
{
|
{
|
||||||
RVCE_BEGIN(0x04000007); // motion estimation
|
RVCE_BEGIN(0x04000007); // motion estimation
|
||||||
|
@@ -26,6 +26,7 @@
|
|||||||
#include "si_shader_internal.h"
|
#include "si_shader_internal.h"
|
||||||
#include "sid.h"
|
#include "sid.h"
|
||||||
|
|
||||||
|
#include "radeon/r600_cs.h"
|
||||||
#include "radeon/radeon_uvd.h"
|
#include "radeon/radeon_uvd.h"
|
||||||
#include "util/hash_table.h"
|
#include "util/hash_table.h"
|
||||||
#include "util/u_log.h"
|
#include "util/u_log.h"
|
||||||
@@ -333,9 +334,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
|||||||
|
|
||||||
sctx->sample_mask.sample_mask = 0xffff;
|
sctx->sample_mask.sample_mask = 0xffff;
|
||||||
|
|
||||||
/* these must be last */
|
|
||||||
si_begin_new_cs(sctx);
|
|
||||||
|
|
||||||
if (sctx->b.chip_class >= GFX9) {
|
if (sctx->b.chip_class >= GFX9) {
|
||||||
sctx->wait_mem_scratch = (struct r600_resource*)
|
sctx->wait_mem_scratch = (struct r600_resource*)
|
||||||
pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4);
|
pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4);
|
||||||
@@ -351,6 +349,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
|||||||
radeon_emit(cs, sctx->wait_mem_scratch->gpu_address);
|
radeon_emit(cs, sctx->wait_mem_scratch->gpu_address);
|
||||||
radeon_emit(cs, sctx->wait_mem_scratch->gpu_address >> 32);
|
radeon_emit(cs, sctx->wait_mem_scratch->gpu_address >> 32);
|
||||||
radeon_emit(cs, sctx->wait_mem_number);
|
radeon_emit(cs, sctx->wait_mem_number);
|
||||||
|
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
|
||||||
|
sctx->wait_mem_scratch,
|
||||||
|
RADEON_USAGE_WRITE, RADEON_PRIO_FENCE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
|
/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
|
||||||
@@ -423,6 +424,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
|||||||
util_dynarray_init(&sctx->resident_img_needs_color_decompress, NULL);
|
util_dynarray_init(&sctx->resident_img_needs_color_decompress, NULL);
|
||||||
util_dynarray_init(&sctx->resident_tex_needs_depth_decompress, NULL);
|
util_dynarray_init(&sctx->resident_tex_needs_depth_decompress, NULL);
|
||||||
|
|
||||||
|
/* this must be last */
|
||||||
|
si_begin_new_cs(sctx);
|
||||||
return &sctx->b.b;
|
return &sctx->b.b;
|
||||||
fail:
|
fail:
|
||||||
fprintf(stderr, "radeonsi: Failed to create a context.\n");
|
fprintf(stderr, "radeonsi: Failed to create a context.\n");
|
||||||
|
@@ -43,13 +43,9 @@ struct ac_shader_binary;
|
|||||||
#define RADEON_LLVM_MAX_INPUTS 32 * 4
|
#define RADEON_LLVM_MAX_INPUTS 32 * 4
|
||||||
#define RADEON_LLVM_MAX_OUTPUTS 32 * 4
|
#define RADEON_LLVM_MAX_OUTPUTS 32 * 4
|
||||||
|
|
||||||
#define RADEON_LLVM_INITIAL_CF_DEPTH 4
|
|
||||||
|
|
||||||
#define RADEON_LLVM_MAX_SYSTEM_VALUES 11
|
#define RADEON_LLVM_MAX_SYSTEM_VALUES 11
|
||||||
#define RADEON_LLVM_MAX_ADDRS 16
|
#define RADEON_LLVM_MAX_ADDRS 16
|
||||||
|
|
||||||
struct si_llvm_flow;
|
|
||||||
|
|
||||||
struct si_shader_context {
|
struct si_shader_context {
|
||||||
struct lp_build_tgsi_context bld_base;
|
struct lp_build_tgsi_context bld_base;
|
||||||
struct gallivm_state gallivm;
|
struct gallivm_state gallivm;
|
||||||
@@ -98,10 +94,6 @@ struct si_shader_context {
|
|||||||
LLVMValueRef *imms;
|
LLVMValueRef *imms;
|
||||||
unsigned imms_num;
|
unsigned imms_num;
|
||||||
|
|
||||||
struct si_llvm_flow *flow;
|
|
||||||
unsigned flow_depth;
|
|
||||||
unsigned flow_depth_max;
|
|
||||||
|
|
||||||
struct lp_build_if_state merged_wrap_if_state;
|
struct lp_build_if_state merged_wrap_if_state;
|
||||||
|
|
||||||
struct tgsi_array_info *temp_arrays;
|
struct tgsi_array_info *temp_arrays;
|
||||||
|
@@ -41,14 +41,6 @@
|
|||||||
#include <llvm-c/Transforms/IPO.h>
|
#include <llvm-c/Transforms/IPO.h>
|
||||||
#include <llvm-c/Transforms/Scalar.h>
|
#include <llvm-c/Transforms/Scalar.h>
|
||||||
|
|
||||||
/* Data for if/else/endif and bgnloop/endloop control flow structures.
|
|
||||||
*/
|
|
||||||
struct si_llvm_flow {
|
|
||||||
/* Loop exit or next part of if/else/endif. */
|
|
||||||
LLVMBasicBlockRef next_block;
|
|
||||||
LLVMBasicBlockRef loop_entry_block;
|
|
||||||
};
|
|
||||||
|
|
||||||
enum si_llvm_calling_convention {
|
enum si_llvm_calling_convention {
|
||||||
RADEON_LLVM_AMDGPU_VS = 87,
|
RADEON_LLVM_AMDGPU_VS = 87,
|
||||||
RADEON_LLVM_AMDGPU_GS = 88,
|
RADEON_LLVM_AMDGPU_GS = 88,
|
||||||
@@ -224,45 +216,6 @@ LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
|
|||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct si_llvm_flow *
|
|
||||||
get_current_flow(struct si_shader_context *ctx)
|
|
||||||
{
|
|
||||||
if (ctx->flow_depth > 0)
|
|
||||||
return &ctx->flow[ctx->flow_depth - 1];
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct si_llvm_flow *
|
|
||||||
get_innermost_loop(struct si_shader_context *ctx)
|
|
||||||
{
|
|
||||||
for (unsigned i = ctx->flow_depth; i > 0; --i) {
|
|
||||||
if (ctx->flow[i - 1].loop_entry_block)
|
|
||||||
return &ctx->flow[i - 1];
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct si_llvm_flow *
|
|
||||||
push_flow(struct si_shader_context *ctx)
|
|
||||||
{
|
|
||||||
struct si_llvm_flow *flow;
|
|
||||||
|
|
||||||
if (ctx->flow_depth >= ctx->flow_depth_max) {
|
|
||||||
unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
|
|
||||||
ctx->flow = REALLOC(ctx->flow,
|
|
||||||
ctx->flow_depth_max * sizeof(*ctx->flow),
|
|
||||||
new_max * sizeof(*ctx->flow));
|
|
||||||
ctx->flow_depth_max = new_max;
|
|
||||||
}
|
|
||||||
|
|
||||||
flow = &ctx->flow[ctx->flow_depth];
|
|
||||||
ctx->flow_depth++;
|
|
||||||
|
|
||||||
flow->next_block = NULL;
|
|
||||||
flow->loop_entry_block = NULL;
|
|
||||||
return flow;
|
|
||||||
}
|
|
||||||
|
|
||||||
static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
|
static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
|
||||||
LLVMValueRef value,
|
LLVMValueRef value,
|
||||||
unsigned swizzle_x,
|
unsigned swizzle_x,
|
||||||
@@ -954,42 +907,13 @@ void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
|
static int get_line(int pc)
|
||||||
{
|
{
|
||||||
char buf[32];
|
|
||||||
/* Subtract 1 so that the number shown is that of the corresponding
|
/* Subtract 1 so that the number shown is that of the corresponding
|
||||||
* opcode in the TGSI dump, e.g. an if block has the same suffix as
|
* opcode in the TGSI dump, e.g. an if block has the same suffix as
|
||||||
* the instruction number of the corresponding TGSI IF.
|
* the instruction number of the corresponding TGSI IF.
|
||||||
*/
|
*/
|
||||||
snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
|
return pc - 1;
|
||||||
LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Append a basic block at the level of the parent flow.
|
|
||||||
*/
|
|
||||||
static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
|
|
||||||
const char *name)
|
|
||||||
{
|
|
||||||
assert(ctx->flow_depth >= 1);
|
|
||||||
|
|
||||||
if (ctx->flow_depth >= 2) {
|
|
||||||
struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
|
|
||||||
|
|
||||||
return LLVMInsertBasicBlockInContext(ctx->ac.context,
|
|
||||||
flow->next_block, name);
|
|
||||||
}
|
|
||||||
|
|
||||||
return LLVMAppendBasicBlockInContext(ctx->ac.context, ctx->main_fn, name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Emit a branch to the given default target for the current block if
|
|
||||||
* applicable -- that is, if the current block does not already contain a
|
|
||||||
* branch from a break or continue.
|
|
||||||
*/
|
|
||||||
static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
|
|
||||||
{
|
|
||||||
if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
|
|
||||||
LLVMBuildBr(builder, target);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bgnloop_emit(const struct lp_build_tgsi_action *action,
|
static void bgnloop_emit(const struct lp_build_tgsi_action *action,
|
||||||
@@ -997,12 +921,7 @@ static void bgnloop_emit(const struct lp_build_tgsi_action *action,
|
|||||||
struct lp_build_emit_data *emit_data)
|
struct lp_build_emit_data *emit_data)
|
||||||
{
|
{
|
||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||||
struct si_llvm_flow *flow = push_flow(ctx);
|
ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc));
|
||||||
flow->loop_entry_block = append_basic_block(ctx, "LOOP");
|
|
||||||
flow->next_block = append_basic_block(ctx, "ENDLOOP");
|
|
||||||
set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
|
|
||||||
LLVMBuildBr(ctx->ac.builder, flow->loop_entry_block);
|
|
||||||
LLVMPositionBuilderAtEnd(ctx->ac.builder, flow->loop_entry_block);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void brk_emit(const struct lp_build_tgsi_action *action,
|
static void brk_emit(const struct lp_build_tgsi_action *action,
|
||||||
@@ -1010,9 +929,7 @@ static void brk_emit(const struct lp_build_tgsi_action *action,
|
|||||||
struct lp_build_emit_data *emit_data)
|
struct lp_build_emit_data *emit_data)
|
||||||
{
|
{
|
||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||||
struct si_llvm_flow *flow = get_innermost_loop(ctx);
|
ac_build_break(&ctx->ac);
|
||||||
|
|
||||||
LLVMBuildBr(ctx->ac.builder, flow->next_block);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cont_emit(const struct lp_build_tgsi_action *action,
|
static void cont_emit(const struct lp_build_tgsi_action *action,
|
||||||
@@ -1020,9 +937,7 @@ static void cont_emit(const struct lp_build_tgsi_action *action,
|
|||||||
struct lp_build_emit_data *emit_data)
|
struct lp_build_emit_data *emit_data)
|
||||||
{
|
{
|
||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||||
struct si_llvm_flow *flow = get_innermost_loop(ctx);
|
ac_build_continue(&ctx->ac);
|
||||||
|
|
||||||
LLVMBuildBr(ctx->ac.builder, flow->loop_entry_block);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void else_emit(const struct lp_build_tgsi_action *action,
|
static void else_emit(const struct lp_build_tgsi_action *action,
|
||||||
@@ -1030,18 +945,7 @@ static void else_emit(const struct lp_build_tgsi_action *action,
|
|||||||
struct lp_build_emit_data *emit_data)
|
struct lp_build_emit_data *emit_data)
|
||||||
{
|
{
|
||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||||
struct si_llvm_flow *current_branch = get_current_flow(ctx);
|
ac_build_else(&ctx->ac, get_line(bld_base->pc));
|
||||||
LLVMBasicBlockRef endif_block;
|
|
||||||
|
|
||||||
assert(!current_branch->loop_entry_block);
|
|
||||||
|
|
||||||
endif_block = append_basic_block(ctx, "ENDIF");
|
|
||||||
emit_default_branch(ctx->ac.builder, endif_block);
|
|
||||||
|
|
||||||
LLVMPositionBuilderAtEnd(ctx->ac.builder, current_branch->next_block);
|
|
||||||
set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
|
|
||||||
|
|
||||||
current_branch->next_block = endif_block;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void endif_emit(const struct lp_build_tgsi_action *action,
|
static void endif_emit(const struct lp_build_tgsi_action *action,
|
||||||
@@ -1049,15 +953,7 @@ static void endif_emit(const struct lp_build_tgsi_action *action,
|
|||||||
struct lp_build_emit_data *emit_data)
|
struct lp_build_emit_data *emit_data)
|
||||||
{
|
{
|
||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||||
struct si_llvm_flow *current_branch = get_current_flow(ctx);
|
ac_build_endif(&ctx->ac, get_line(bld_base->pc));
|
||||||
|
|
||||||
assert(!current_branch->loop_entry_block);
|
|
||||||
|
|
||||||
emit_default_branch(ctx->ac.builder, current_branch->next_block);
|
|
||||||
LLVMPositionBuilderAtEnd(ctx->ac.builder, current_branch->next_block);
|
|
||||||
set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
|
|
||||||
|
|
||||||
ctx->flow_depth--;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void endloop_emit(const struct lp_build_tgsi_action *action,
|
static void endloop_emit(const struct lp_build_tgsi_action *action,
|
||||||
@@ -1065,31 +961,7 @@ static void endloop_emit(const struct lp_build_tgsi_action *action,
|
|||||||
struct lp_build_emit_data *emit_data)
|
struct lp_build_emit_data *emit_data)
|
||||||
{
|
{
|
||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||||
struct si_llvm_flow *current_loop = get_current_flow(ctx);
|
ac_build_endloop(&ctx->ac, get_line(bld_base->pc));
|
||||||
|
|
||||||
assert(current_loop->loop_entry_block);
|
|
||||||
|
|
||||||
emit_default_branch(ctx->ac.builder, current_loop->loop_entry_block);
|
|
||||||
|
|
||||||
LLVMPositionBuilderAtEnd(ctx->ac.builder, current_loop->next_block);
|
|
||||||
set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
|
|
||||||
ctx->flow_depth--;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void if_cond_emit(const struct lp_build_tgsi_action *action,
|
|
||||||
struct lp_build_tgsi_context *bld_base,
|
|
||||||
struct lp_build_emit_data *emit_data,
|
|
||||||
LLVMValueRef cond)
|
|
||||||
{
|
|
||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
|
||||||
struct si_llvm_flow *flow = push_flow(ctx);
|
|
||||||
LLVMBasicBlockRef if_block;
|
|
||||||
|
|
||||||
if_block = append_basic_block(ctx, "IF");
|
|
||||||
flow->next_block = append_basic_block(ctx, "ELSE");
|
|
||||||
set_basicblock_name(if_block, "if", bld_base->pc);
|
|
||||||
LLVMBuildCondBr(ctx->ac.builder, cond, if_block, flow->next_block);
|
|
||||||
LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void if_emit(const struct lp_build_tgsi_action *action,
|
static void if_emit(const struct lp_build_tgsi_action *action,
|
||||||
@@ -1097,13 +969,7 @@ static void if_emit(const struct lp_build_tgsi_action *action,
|
|||||||
struct lp_build_emit_data *emit_data)
|
struct lp_build_emit_data *emit_data)
|
||||||
{
|
{
|
||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||||
LLVMValueRef cond;
|
ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
|
||||||
|
|
||||||
cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealUNE,
|
|
||||||
emit_data->args[0],
|
|
||||||
ctx->ac.f32_0, "");
|
|
||||||
|
|
||||||
if_cond_emit(action, bld_base, emit_data, cond);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void uif_emit(const struct lp_build_tgsi_action *action,
|
static void uif_emit(const struct lp_build_tgsi_action *action,
|
||||||
@@ -1111,12 +977,7 @@ static void uif_emit(const struct lp_build_tgsi_action *action,
|
|||||||
struct lp_build_emit_data *emit_data)
|
struct lp_build_emit_data *emit_data)
|
||||||
{
|
{
|
||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||||
LLVMValueRef cond;
|
ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
|
||||||
|
|
||||||
cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
|
|
||||||
ac_to_integer(&ctx->ac, emit_data->args[0]), ctx->i32_0, "");
|
|
||||||
|
|
||||||
if_cond_emit(action, bld_base, emit_data, cond);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void emit_immediate(struct lp_build_tgsi_context *bld_base,
|
static void emit_immediate(struct lp_build_tgsi_context *bld_base,
|
||||||
@@ -1413,7 +1274,5 @@ void si_llvm_dispose(struct si_shader_context *ctx)
|
|||||||
FREE(ctx->imms);
|
FREE(ctx->imms);
|
||||||
ctx->imms = NULL;
|
ctx->imms = NULL;
|
||||||
ctx->imms_num = 0;
|
ctx->imms_num = 0;
|
||||||
FREE(ctx->flow);
|
ac_llvm_context_dispose(&ctx->ac);
|
||||||
ctx->flow = NULL;
|
|
||||||
ctx->flow_depth_max = 0;
|
|
||||||
}
|
}
|
||||||
|
@@ -2106,7 +2106,7 @@ svga_is_format_supported(struct pipe_screen *screen,
|
|||||||
|
|
||||||
if (!ss->sws->have_vgpu10 &&
|
if (!ss->sws->have_vgpu10 &&
|
||||||
util_format_is_srgb(format) &&
|
util_format_is_srgb(format) &&
|
||||||
(bindings & PIPE_BIND_DISPLAY_TARGET)) {
|
(bindings & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_RENDER_TARGET))) {
|
||||||
/* We only support sRGB rendering with vgpu10 */
|
/* We only support sRGB rendering with vgpu10 */
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
@@ -770,6 +770,7 @@ dri2_allocate_textures(struct dri_context *ctx,
|
|||||||
whandle.handle = buf->name;
|
whandle.handle = buf->name;
|
||||||
whandle.stride = buf->pitch;
|
whandle.stride = buf->pitch;
|
||||||
whandle.offset = 0;
|
whandle.offset = 0;
|
||||||
|
whandle.modifier = DRM_FORMAT_MOD_INVALID;
|
||||||
if (screen->can_share_buffer)
|
if (screen->can_share_buffer)
|
||||||
whandle.type = DRM_API_HANDLE_TYPE_SHARED;
|
whandle.type = DRM_API_HANDLE_TYPE_SHARED;
|
||||||
else
|
else
|
||||||
|
@@ -810,6 +810,10 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
|
|||||||
|
|
||||||
const unsigned loop_label = l++;
|
const unsigned loop_label = l++;
|
||||||
|
|
||||||
|
/* Declare all light constants to allow indirect adressing */
|
||||||
|
for (i = 32; i < 96; i++)
|
||||||
|
ureg_DECL_constant(ureg, i);
|
||||||
|
|
||||||
ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
|
ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
|
||||||
ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
|
ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
|
||||||
ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
|
ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
|
||||||
@@ -1935,7 +1939,7 @@ nine_ff_load_lights(struct NineDevice9 *device)
|
|||||||
dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
|
dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
|
||||||
dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
|
dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
|
||||||
dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
|
dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
|
||||||
dst[39 + l * 8].w = (l + 1) == context->ff.num_lights_active;
|
dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2474,7 +2478,7 @@ nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
|
|||||||
M->m[2][0] * D->m[0][2] +
|
M->m[2][0] * D->m[0][2] +
|
||||||
M->m[3][0] * D->m[0][3];
|
M->m[3][0] * D->m[0][3];
|
||||||
|
|
||||||
if (det < 1e-30) {/* non inversible */
|
if (fabsf(det) < 1e-30) {/* non inversible */
|
||||||
*D = *M; /* wine tests */
|
*D = *M; /* wine tests */
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@@ -1079,7 +1079,7 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
|
|||||||
case D3DSMO_FACE:
|
case D3DSMO_FACE:
|
||||||
if (ureg_src_is_undef(tx->regs.vFace)) {
|
if (ureg_src_is_undef(tx->regs.vFace)) {
|
||||||
if (tx->face_is_sysval_integer) {
|
if (tx->face_is_sysval_integer) {
|
||||||
tmp = tx_scratch(tx);
|
tmp = ureg_DECL_temporary(ureg);
|
||||||
tx->regs.vFace =
|
tx->regs.vFace =
|
||||||
ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
|
ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
|
||||||
|
|
||||||
|
@@ -454,7 +454,7 @@ nine_state_copy_common_all(struct NineDevice9 *device,
|
|||||||
|
|
||||||
/* Textures */
|
/* Textures */
|
||||||
if (1) {
|
if (1) {
|
||||||
for (i = 0; i < device->caps.MaxSimultaneousTextures; i++)
|
for (i = 0; i < NINE_MAX_SAMPLERS; i++)
|
||||||
NineStateBlock9_BindTexture(device, apply, &dst->texture[i], src->texture[i]);
|
NineStateBlock9_BindTexture(device, apply, &dst->texture[i], src->texture[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -89,7 +89,13 @@ static struct VADriverVTable vtable =
|
|||||||
&vlVaQuerySurfaceAttributes,
|
&vlVaQuerySurfaceAttributes,
|
||||||
&vlVaAcquireBufferHandle,
|
&vlVaAcquireBufferHandle,
|
||||||
&vlVaReleaseBufferHandle,
|
&vlVaReleaseBufferHandle,
|
||||||
#if 0
|
#if VA_CHECK_VERSION(1, 1, 0)
|
||||||
|
NULL, /* vaCreateMFContext */
|
||||||
|
NULL, /* vaMFAddContext */
|
||||||
|
NULL, /* vaMFReleaseContext */
|
||||||
|
NULL, /* vaMFSubmit */
|
||||||
|
NULL, /* vaCreateBuffer2 */
|
||||||
|
NULL, /* vaQueryProcessingRate */
|
||||||
&vlVaExportSurfaceHandle,
|
&vlVaExportSurfaceHandle,
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
@@ -923,7 +923,7 @@ vlVaQueryVideoProcPipelineCaps(VADriverContextP ctx, VAContextID context,
|
|||||||
return VA_STATUS_SUCCESS;
|
return VA_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if VA_CHECK_VERSION(1, 1, 0)
|
||||||
VAStatus
|
VAStatus
|
||||||
vlVaExportSurfaceHandle(VADriverContextP ctx,
|
vlVaExportSurfaceHandle(VADriverContextP ctx,
|
||||||
VASurfaceID surface_id,
|
VASurfaceID surface_id,
|
||||||
|
@@ -30,6 +30,7 @@ if with_ld_version_script
|
|||||||
xa_link_depends += files('xa.sym')
|
xa_link_depends += files('xa.sym')
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
_xa_version = '.'.join(xa_version)
|
||||||
|
|
||||||
libxatracker = shared_library(
|
libxatracker = shared_library(
|
||||||
'xatracker',
|
'xatracker',
|
||||||
@@ -49,12 +50,13 @@ libxatracker = shared_library(
|
|||||||
dependencies : [
|
dependencies : [
|
||||||
dep_thread, driver_nouveau, driver_i915, driver_svga, driver_freedreno,
|
dep_thread, driver_nouveau, driver_i915, driver_svga, driver_freedreno,
|
||||||
],
|
],
|
||||||
|
version : _xa_version,
|
||||||
install : true,
|
install : true,
|
||||||
)
|
)
|
||||||
|
|
||||||
pkg.generate(
|
pkg.generate(
|
||||||
name : 'xatracker',
|
name : 'xatracker',
|
||||||
description : 'Xorg gallium3D acceleration library',
|
description : 'Xorg gallium3D acceleration library',
|
||||||
version : '.'.join(xa_version),
|
version : _xa_version,
|
||||||
libraries : libxatracker,
|
libraries : libxatracker,
|
||||||
)
|
)
|
||||||
|
@@ -41,7 +41,6 @@ gbm_bo_get_user_data
|
|||||||
gbm_bo_destroy
|
gbm_bo_destroy
|
||||||
gbm_surface_create
|
gbm_surface_create
|
||||||
gbm_surface_create_with_modifiers
|
gbm_surface_create_with_modifiers
|
||||||
gbm_surface_needs_lock_front_buffer
|
|
||||||
gbm_surface_lock_front_buffer
|
gbm_surface_lock_front_buffer
|
||||||
gbm_surface_release_buffer
|
gbm_surface_release_buffer
|
||||||
gbm_surface_has_free_buffers
|
gbm_surface_has_free_buffers
|
||||||
|
@@ -386,8 +386,6 @@ gbm_surface_create_with_modifiers(struct gbm_device *gbm,
|
|||||||
uint32_t format,
|
uint32_t format,
|
||||||
const uint64_t *modifiers,
|
const uint64_t *modifiers,
|
||||||
const unsigned int count);
|
const unsigned int count);
|
||||||
int
|
|
||||||
gbm_surface_needs_lock_front_buffer(struct gbm_surface *surface);
|
|
||||||
|
|
||||||
struct gbm_bo *
|
struct gbm_bo *
|
||||||
gbm_surface_lock_front_buffer(struct gbm_surface *surface);
|
gbm_surface_lock_front_buffer(struct gbm_surface *surface);
|
||||||
|
@@ -816,6 +816,8 @@ fs_inst::size_read(int arg) const
|
|||||||
case SHADER_OPCODE_TYPED_ATOMIC:
|
case SHADER_OPCODE_TYPED_ATOMIC:
|
||||||
case SHADER_OPCODE_TYPED_SURFACE_READ:
|
case SHADER_OPCODE_TYPED_SURFACE_READ:
|
||||||
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
|
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
|
||||||
|
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
||||||
|
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
||||||
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
||||||
case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
|
case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
|
||||||
case SHADER_OPCODE_BYTE_SCATTERED_READ:
|
case SHADER_OPCODE_BYTE_SCATTERED_READ:
|
||||||
|
@@ -332,6 +332,31 @@ public:
|
|||||||
opcode != BRW_OPCODE_IF &&
|
opcode != BRW_OPCODE_IF &&
|
||||||
opcode != BRW_OPCODE_WHILE));
|
opcode != BRW_OPCODE_WHILE));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool reads_g0_implicitly() const
|
||||||
|
{
|
||||||
|
switch (opcode) {
|
||||||
|
case SHADER_OPCODE_TEX:
|
||||||
|
case SHADER_OPCODE_TXL:
|
||||||
|
case SHADER_OPCODE_TXD:
|
||||||
|
case SHADER_OPCODE_TXF:
|
||||||
|
case SHADER_OPCODE_TXF_CMS_W:
|
||||||
|
case SHADER_OPCODE_TXF_CMS:
|
||||||
|
case SHADER_OPCODE_TXF_MCS:
|
||||||
|
case SHADER_OPCODE_TXS:
|
||||||
|
case SHADER_OPCODE_TG4:
|
||||||
|
case SHADER_OPCODE_TG4_OFFSET:
|
||||||
|
case SHADER_OPCODE_SAMPLEINFO:
|
||||||
|
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
||||||
|
case GS_OPCODE_SET_PRIMITIVE_ID:
|
||||||
|
case GS_OPCODE_GET_INSTANCE_ID:
|
||||||
|
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
||||||
|
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -1267,6 +1267,9 @@ vec4_instruction_scheduler::calculate_deps()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (inst->reads_g0_implicitly())
|
||||||
|
add_dep(last_fixed_grf_write, n);
|
||||||
|
|
||||||
if (!inst->is_send_from_grf()) {
|
if (!inst->is_send_from_grf()) {
|
||||||
for (int i = 0; i < inst->mlen; i++) {
|
for (int i = 0; i < inst->mlen; i++) {
|
||||||
/* It looks like the MRF regs are released in the send
|
/* It looks like the MRF regs are released in the send
|
||||||
|
@@ -688,8 +688,11 @@ vec4_visitor::pack_uniform_registers()
|
|||||||
* the next part of our packing algorithm.
|
* the next part of our packing algorithm.
|
||||||
*/
|
*/
|
||||||
int reg = inst->src[0].nr;
|
int reg = inst->src[0].nr;
|
||||||
for (unsigned i = 0; i < vec4s_read; i++)
|
int channel_size = type_sz(inst->src[0].type) / 4;
|
||||||
|
for (unsigned i = 0; i < vec4s_read; i++) {
|
||||||
chans_used[reg + i] = 4;
|
chans_used[reg + i] = 4;
|
||||||
|
channel_sizes[reg + i] = MAX2(channel_sizes[reg + i], channel_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1943,6 +1946,30 @@ is_align1_df(vec4_instruction *inst)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Three source instruction must have a GRF/MRF destination register.
|
||||||
|
* ARF NULL is not allowed. Fix that up by allocating a temporary GRF.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
vec4_visitor::fixup_3src_null_dest()
|
||||||
|
{
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
|
foreach_block_and_inst_safe (block, vec4_instruction, inst, cfg) {
|
||||||
|
if (inst->is_3src(devinfo) && inst->dst.is_null()) {
|
||||||
|
const unsigned size_written = type_sz(inst->dst.type);
|
||||||
|
const unsigned num_regs = DIV_ROUND_UP(size_written, REG_SIZE);
|
||||||
|
|
||||||
|
inst->dst = retype(dst_reg(VGRF, alloc.allocate(num_regs)),
|
||||||
|
inst->dst.type);
|
||||||
|
progress = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (progress)
|
||||||
|
invalidate_live_intervals();
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
vec4_visitor::convert_to_hw_regs()
|
vec4_visitor::convert_to_hw_regs()
|
||||||
{
|
{
|
||||||
@@ -2694,6 +2721,8 @@ vec4_visitor::run()
|
|||||||
OPT(scalarize_df);
|
OPT(scalarize_df);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fixup_3src_null_dest();
|
||||||
|
|
||||||
bool allocated_without_spills = reg_allocate();
|
bool allocated_without_spills = reg_allocate();
|
||||||
|
|
||||||
if (!allocated_without_spills) {
|
if (!allocated_without_spills) {
|
||||||
|
@@ -159,6 +159,7 @@ public:
|
|||||||
void opt_set_dependency_control();
|
void opt_set_dependency_control();
|
||||||
void opt_schedule_instructions();
|
void opt_schedule_instructions();
|
||||||
void convert_to_hw_regs();
|
void convert_to_hw_regs();
|
||||||
|
void fixup_3src_null_dest();
|
||||||
|
|
||||||
bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
|
bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
|
||||||
bool lower_simd_width();
|
bool lower_simd_width();
|
||||||
|
@@ -1325,13 +1325,15 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (modules[MESA_SHADER_TESS_EVAL]) {
|
if (modules[MESA_SHADER_TESS_EVAL]) {
|
||||||
anv_pipeline_compile_tcs_tes(pipeline, cache, pCreateInfo,
|
result = anv_pipeline_compile_tcs_tes(pipeline, cache, pCreateInfo,
|
||||||
modules[MESA_SHADER_TESS_CTRL],
|
modules[MESA_SHADER_TESS_CTRL],
|
||||||
pStages[MESA_SHADER_TESS_CTRL]->pName,
|
pStages[MESA_SHADER_TESS_CTRL]->pName,
|
||||||
pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo,
|
pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo,
|
||||||
modules[MESA_SHADER_TESS_EVAL],
|
modules[MESA_SHADER_TESS_EVAL],
|
||||||
pStages[MESA_SHADER_TESS_EVAL]->pName,
|
pStages[MESA_SHADER_TESS_EVAL]->pName,
|
||||||
pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo);
|
pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto compile_fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (modules[MESA_SHADER_GEOMETRY]) {
|
if (modules[MESA_SHADER_GEOMETRY]) {
|
||||||
|
@@ -2282,6 +2282,10 @@ anv_image_aspect_get_planes(VkImageAspectFlags aspect_mask)
|
|||||||
if (aspect_mask & VK_IMAGE_ASPECT_PLANE_2_BIT_KHR)
|
if (aspect_mask & VK_IMAGE_ASPECT_PLANE_2_BIT_KHR)
|
||||||
planes++;
|
planes++;
|
||||||
|
|
||||||
|
if ((aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0 &&
|
||||||
|
(aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0)
|
||||||
|
planes++;
|
||||||
|
|
||||||
return planes;
|
return planes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -875,10 +875,10 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
|
|
||||||
struct anv_image_view *iview = framebuffer->attachments[i];
|
struct anv_image_view *iview = framebuffer->attachments[i];
|
||||||
anv_assert(iview->vk_format == att->format);
|
anv_assert(iview->vk_format == att->format);
|
||||||
anv_assert(iview->n_planes == 1);
|
|
||||||
|
|
||||||
union isl_color_value clear_color = { .u32 = { 0, } };
|
union isl_color_value clear_color = { .u32 = { 0, } };
|
||||||
if (att_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
|
if (att_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
|
||||||
|
anv_assert(iview->n_planes == 1);
|
||||||
assert(att_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
|
assert(att_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||||
color_attachment_compute_aux_usage(cmd_buffer->device,
|
color_attachment_compute_aux_usage(cmd_buffer->device,
|
||||||
state, i, begin->renderArea,
|
state, i, begin->renderArea,
|
||||||
|
@@ -164,11 +164,9 @@ driCreateNewScreen2(int scrn, int fd,
|
|||||||
|
|
||||||
api = API_OPENGL_COMPAT;
|
api = API_OPENGL_COMPAT;
|
||||||
if (_mesa_override_gl_version_contextless(&consts, &api, &version)) {
|
if (_mesa_override_gl_version_contextless(&consts, &api, &version)) {
|
||||||
if (api == API_OPENGL_CORE) {
|
psp->max_gl_core_version = version;
|
||||||
psp->max_gl_core_version = version;
|
if (api == API_OPENGL_COMPAT)
|
||||||
} else {
|
|
||||||
psp->max_gl_compat_version = version;
|
psp->max_gl_compat_version = version;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
psp->api_mask = 0;
|
psp->api_mask = 0;
|
||||||
|
@@ -1896,7 +1896,7 @@ init_oa_configs(struct brw_context *brw, const char *sysfs_dev_dir)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
register_oa_config(brw, query, config_id);
|
register_oa_config(brw, query, ret);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -339,8 +339,11 @@ grow_buffer(struct brw_context *brw,
|
|||||||
/* We can't safely use realloc, as it may move the existing buffer,
|
/* We can't safely use realloc, as it may move the existing buffer,
|
||||||
* breaking existing pointers the caller may still be using. Just
|
* breaking existing pointers the caller may still be using. Just
|
||||||
* malloc a new copy and memcpy it like the normal BO path.
|
* malloc a new copy and memcpy it like the normal BO path.
|
||||||
|
*
|
||||||
|
* Use bo->size rather than new_size because the bufmgr may have
|
||||||
|
* rounded up the size, and we want the shadow size to match.
|
||||||
*/
|
*/
|
||||||
grow->map = malloc(new_size);
|
grow->map = malloc(new_bo->size);
|
||||||
} else {
|
} else {
|
||||||
grow->map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE);
|
grow->map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE);
|
||||||
}
|
}
|
||||||
@@ -1060,7 +1063,7 @@ brw_batch_references(struct intel_batchbuffer *batch, struct brw_bo *bo)
|
|||||||
static uint64_t
|
static uint64_t
|
||||||
emit_reloc(struct intel_batchbuffer *batch,
|
emit_reloc(struct intel_batchbuffer *batch,
|
||||||
struct brw_reloc_list *rlist, uint32_t offset,
|
struct brw_reloc_list *rlist, uint32_t offset,
|
||||||
struct brw_bo *target, uint32_t target_offset,
|
struct brw_bo *target, int32_t target_offset,
|
||||||
unsigned int reloc_flags)
|
unsigned int reloc_flags)
|
||||||
{
|
{
|
||||||
assert(target != NULL);
|
assert(target != NULL);
|
||||||
|
@@ -384,10 +384,16 @@ intel_image_format_lookup(int fourcc)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean intel_lookup_fourcc(int dri_format, int *fourcc)
|
static boolean
|
||||||
|
intel_image_get_fourcc(__DRIimage *image, int *fourcc)
|
||||||
{
|
{
|
||||||
|
if (image->planar_format) {
|
||||||
|
*fourcc = image->planar_format->fourcc;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(intel_image_formats); i++) {
|
for (unsigned i = 0; i < ARRAY_SIZE(intel_image_formats); i++) {
|
||||||
if (intel_image_formats[i].planes[0].dri_format == dri_format) {
|
if (intel_image_formats[i].planes[0].dri_format == image->dri_format) {
|
||||||
*fourcc = intel_image_formats[i].fourcc;
|
*fourcc = intel_image_formats[i].fourcc;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -574,6 +580,7 @@ intel_create_image_from_texture(__DRIcontext *context, int target,
|
|||||||
intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset);
|
intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset);
|
||||||
image->dri_format = driGLFormatToImageFormat(image->format);
|
image->dri_format = driGLFormatToImageFormat(image->format);
|
||||||
image->has_depthstencil = iobj->mt->stencil_mt? true : false;
|
image->has_depthstencil = iobj->mt->stencil_mt? true : false;
|
||||||
|
image->planar_format = iobj->planar_format;
|
||||||
if (image->dri_format == MESA_FORMAT_NONE) {
|
if (image->dri_format == MESA_FORMAT_NONE) {
|
||||||
*error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
|
*error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
|
||||||
free(image);
|
free(image);
|
||||||
@@ -865,7 +872,7 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
|
|||||||
case __DRI_IMAGE_ATTRIB_FD:
|
case __DRI_IMAGE_ATTRIB_FD:
|
||||||
return !brw_bo_gem_export_to_prime(image->bo, value);
|
return !brw_bo_gem_export_to_prime(image->bo, value);
|
||||||
case __DRI_IMAGE_ATTRIB_FOURCC:
|
case __DRI_IMAGE_ATTRIB_FOURCC:
|
||||||
return intel_lookup_fourcc(image->dri_format, value);
|
return intel_image_get_fourcc(image, value);
|
||||||
case __DRI_IMAGE_ATTRIB_NUM_PLANES:
|
case __DRI_IMAGE_ATTRIB_NUM_PLANES:
|
||||||
if (isl_drm_modifier_has_aux(image->modifier)) {
|
if (isl_drm_modifier_has_aux(image->modifier)) {
|
||||||
assert(!image->planar_format || image->planar_format->nplanes == 1);
|
assert(!image->planar_format || image->planar_format->nplanes == 1);
|
||||||
|
@@ -501,6 +501,28 @@ debug_clear_group(struct gl_debug_state *debug)
|
|||||||
debug->Groups[gstack] = NULL;
|
debug->Groups[gstack] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete the oldest debug messages out of the log.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
debug_delete_messages(struct gl_debug_state *debug, int count)
|
||||||
|
{
|
||||||
|
struct gl_debug_log *log = &debug->Log;
|
||||||
|
|
||||||
|
if (count > log->NumMessages)
|
||||||
|
count = log->NumMessages;
|
||||||
|
|
||||||
|
while (count--) {
|
||||||
|
struct gl_debug_message *msg = &log->Messages[log->NextMessage];
|
||||||
|
|
||||||
|
debug_message_clear(msg);
|
||||||
|
|
||||||
|
log->NumMessages--;
|
||||||
|
log->NextMessage++;
|
||||||
|
log->NextMessage %= MAX_DEBUG_LOGGED_MESSAGES;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loop through debug group stack tearing down states for
|
* Loop through debug group stack tearing down states for
|
||||||
* filtering debug messages. Then free debug output state.
|
* filtering debug messages. Then free debug output state.
|
||||||
@@ -514,6 +536,7 @@ debug_destroy(struct gl_debug_state *debug)
|
|||||||
}
|
}
|
||||||
|
|
||||||
debug_clear_group(debug);
|
debug_clear_group(debug);
|
||||||
|
debug_delete_messages(debug, debug->Log.NumMessages);
|
||||||
free(debug);
|
free(debug);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -648,28 +671,6 @@ debug_fetch_message(const struct gl_debug_state *debug)
|
|||||||
return (log->NumMessages) ? &log->Messages[log->NextMessage] : NULL;
|
return (log->NumMessages) ? &log->Messages[log->NextMessage] : NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Delete the oldest debug messages out of the log.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
debug_delete_messages(struct gl_debug_state *debug, int count)
|
|
||||||
{
|
|
||||||
struct gl_debug_log *log = &debug->Log;
|
|
||||||
|
|
||||||
if (count > log->NumMessages)
|
|
||||||
count = log->NumMessages;
|
|
||||||
|
|
||||||
while (count--) {
|
|
||||||
struct gl_debug_message *msg = &log->Messages[log->NextMessage];
|
|
||||||
|
|
||||||
debug_message_clear(msg);
|
|
||||||
|
|
||||||
log->NumMessages--;
|
|
||||||
log->NextMessage++;
|
|
||||||
log->NextMessage %= MAX_DEBUG_LOGGED_MESSAGES;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct gl_debug_message *
|
static struct gl_debug_message *
|
||||||
debug_get_group_message(struct gl_debug_state *debug)
|
debug_get_group_message(struct gl_debug_state *debug)
|
||||||
{
|
{
|
||||||
|
@@ -837,8 +837,8 @@ clear_teximage_fields(struct gl_texture_image *img)
|
|||||||
* Fills in the fields of \p img with the given information.
|
* Fills in the fields of \p img with the given information.
|
||||||
* Note: width, height and depth include the border.
|
* Note: width, height and depth include the border.
|
||||||
*/
|
*/
|
||||||
static void
|
void
|
||||||
init_teximage_fields_ms(struct gl_context *ctx,
|
_mesa_init_teximage_fields_ms(struct gl_context *ctx,
|
||||||
struct gl_texture_image *img,
|
struct gl_texture_image *img,
|
||||||
GLsizei width, GLsizei height, GLsizei depth,
|
GLsizei width, GLsizei height, GLsizei depth,
|
||||||
GLint border, GLenum internalFormat,
|
GLint border, GLenum internalFormat,
|
||||||
@@ -950,8 +950,8 @@ _mesa_init_teximage_fields(struct gl_context *ctx,
|
|||||||
GLint border, GLenum internalFormat,
|
GLint border, GLenum internalFormat,
|
||||||
mesa_format format)
|
mesa_format format)
|
||||||
{
|
{
|
||||||
init_teximage_fields_ms(ctx, img, width, height, depth, border,
|
_mesa_init_teximage_fields_ms(ctx, img, width, height, depth, border,
|
||||||
internalFormat, format, 0, GL_TRUE);
|
internalFormat, format, 0, GL_TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -5853,9 +5853,9 @@ texture_image_multisample(struct gl_context *ctx, GLuint dims,
|
|||||||
|
|
||||||
if (_mesa_is_proxy_texture(target)) {
|
if (_mesa_is_proxy_texture(target)) {
|
||||||
if (samplesOK && dimensionsOK && sizeOK) {
|
if (samplesOK && dimensionsOK && sizeOK) {
|
||||||
init_teximage_fields_ms(ctx, texImage, width, height, depth, 0,
|
_mesa_init_teximage_fields_ms(ctx, texImage, width, height, depth, 0,
|
||||||
internalformat, texFormat,
|
internalformat, texFormat,
|
||||||
samples, fixedsamplelocations);
|
samples, fixedsamplelocations);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* clear all image fields */
|
/* clear all image fields */
|
||||||
@@ -5882,9 +5882,9 @@ texture_image_multisample(struct gl_context *ctx, GLuint dims,
|
|||||||
|
|
||||||
ctx->Driver.FreeTextureImageBuffer(ctx, texImage);
|
ctx->Driver.FreeTextureImageBuffer(ctx, texImage);
|
||||||
|
|
||||||
init_teximage_fields_ms(ctx, texImage, width, height, depth, 0,
|
_mesa_init_teximage_fields_ms(ctx, texImage, width, height, depth, 0,
|
||||||
internalformat, texFormat,
|
internalformat, texFormat,
|
||||||
samples, fixedsamplelocations);
|
samples, fixedsamplelocations);
|
||||||
|
|
||||||
if (width > 0 && height > 0 && depth > 0) {
|
if (width > 0 && height > 0 && depth > 0) {
|
||||||
if (memObj) {
|
if (memObj) {
|
||||||
|
@@ -130,6 +130,14 @@ _mesa_init_teximage_fields(struct gl_context *ctx,
|
|||||||
GLsizei width, GLsizei height, GLsizei depth,
|
GLsizei width, GLsizei height, GLsizei depth,
|
||||||
GLint border, GLenum internalFormat,
|
GLint border, GLenum internalFormat,
|
||||||
mesa_format format);
|
mesa_format format);
|
||||||
|
extern void
|
||||||
|
_mesa_init_teximage_fields_ms(struct gl_context *ctx,
|
||||||
|
struct gl_texture_image *img,
|
||||||
|
GLsizei width, GLsizei height, GLsizei depth,
|
||||||
|
GLint border, GLenum internalFormat,
|
||||||
|
mesa_format format,
|
||||||
|
GLuint numSamples,
|
||||||
|
GLboolean fixedSampleLocations);
|
||||||
|
|
||||||
|
|
||||||
extern mesa_format
|
extern mesa_format
|
||||||
|
@@ -304,7 +304,8 @@ initialize_texture_fields(struct gl_context *ctx,
|
|||||||
struct gl_texture_object *texObj,
|
struct gl_texture_object *texObj,
|
||||||
GLint levels,
|
GLint levels,
|
||||||
GLsizei width, GLsizei height, GLsizei depth,
|
GLsizei width, GLsizei height, GLsizei depth,
|
||||||
GLenum internalFormat, mesa_format texFormat)
|
GLenum internalFormat, mesa_format texFormat,
|
||||||
|
GLuint numSamples, GLboolean fixedSampleLocations)
|
||||||
{
|
{
|
||||||
const GLuint numFaces = _mesa_num_tex_faces(target);
|
const GLuint numFaces = _mesa_num_tex_faces(target);
|
||||||
GLint level, levelWidth = width, levelHeight = height, levelDepth = depth;
|
GLint level, levelWidth = width, levelHeight = height, levelDepth = depth;
|
||||||
@@ -326,9 +327,10 @@ initialize_texture_fields(struct gl_context *ctx,
|
|||||||
return GL_FALSE;
|
return GL_FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
_mesa_init_teximage_fields(ctx, texImage,
|
_mesa_init_teximage_fields_ms(ctx, texImage,
|
||||||
levelWidth, levelHeight, levelDepth,
|
levelWidth, levelHeight, levelDepth,
|
||||||
0, internalFormat, texFormat);
|
0, internalFormat, texFormat,
|
||||||
|
numSamples, fixedSampleLocations);
|
||||||
}
|
}
|
||||||
|
|
||||||
_mesa_next_mipmap_level_size(target, 0,
|
_mesa_next_mipmap_level_size(target, 0,
|
||||||
@@ -676,7 +678,9 @@ texture_view(struct gl_context *ctx, struct gl_texture_object *origTexObj,
|
|||||||
|
|
||||||
if (!initialize_texture_fields(ctx, target, texObj, newViewNumLevels,
|
if (!initialize_texture_fields(ctx, target, texObj, newViewNumLevels,
|
||||||
width, height, depth,
|
width, height, depth,
|
||||||
internalformat, texFormat)) {
|
internalformat, texFormat,
|
||||||
|
origTexImage->NumSamples,
|
||||||
|
origTexImage->FixedSampleLocations)) {
|
||||||
return; /* Already recorded error */
|
return; /* Already recorded error */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -134,16 +134,26 @@ create_version_string(struct gl_context *ctx, const char *prefix)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Override the context's version and/or API type if the
|
* Override the context's version and/or API type if the environment variables
|
||||||
* environment variable MESA_GL_VERSION_OVERRIDE is set.
|
* MESA_GL_VERSION_OVERRIDE or MESA_GLES_VERSION_OVERRIDE are set.
|
||||||
*
|
*
|
||||||
* Example uses of MESA_GL_VERSION_OVERRIDE:
|
* Example uses of MESA_GL_VERSION_OVERRIDE:
|
||||||
*
|
*
|
||||||
* 2.1: select a compatibility (non-Core) profile with GL version 2.1
|
* 2.1: select a compatibility (non-Core) profile with GL version 2.1.
|
||||||
* 3.0: select a compatibility (non-Core) profile with GL version 3.0
|
* 3.0: select a compatibility (non-Core) profile with GL version 3.0.
|
||||||
* 3.0FC: select a Core+Forward Compatible profile with GL version 3.0
|
* 3.0FC: select a Core+Forward Compatible profile with GL version 3.0.
|
||||||
* 3.1: select a Core profile with GL version 3.1
|
* 3.1: select GL version 3.1 with GL_ARB_compatibility enabled per the driver default.
|
||||||
* 3.1FC: select a Core+Forward Compatible profile with GL version 3.1
|
* 3.1FC: select GL version 3.1 with forward compatibility and GL_ARB_compatibility disabled.
|
||||||
|
* 3.1COMPAT: select GL version 3.1 with GL_ARB_compatibility enabled.
|
||||||
|
* X.Y: override GL version to X.Y without changing the profile.
|
||||||
|
* X.YFC: select a Core+Forward Compatible profile with GL version X.Y.
|
||||||
|
* X.YCOMPAT: select a Compatibility profile with GL version X.Y.
|
||||||
|
*
|
||||||
|
* Example uses of MESA_GLES_VERSION_OVERRIDE:
|
||||||
|
*
|
||||||
|
* 2.0: select GLES version 2.0.
|
||||||
|
* 3.0: select GLES version 3.0.
|
||||||
|
* 3.1: select GLES version 3.1.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
_mesa_override_gl_version_contextless(struct gl_constants *consts,
|
_mesa_override_gl_version_contextless(struct gl_constants *consts,
|
||||||
@@ -157,17 +167,12 @@ _mesa_override_gl_version_contextless(struct gl_constants *consts,
|
|||||||
if (version > 0) {
|
if (version > 0) {
|
||||||
*versionOut = version;
|
*versionOut = version;
|
||||||
|
|
||||||
/* If the API is a desktop API, adjust the context flags. We may also
|
/* Modify the API and context flags as needed. */
|
||||||
* need to modify the API depending on the version. For example, Mesa
|
|
||||||
* does not support a GL 3.3 compatibility profile.
|
|
||||||
*/
|
|
||||||
if (*apiOut == API_OPENGL_CORE || *apiOut == API_OPENGL_COMPAT) {
|
if (*apiOut == API_OPENGL_CORE || *apiOut == API_OPENGL_COMPAT) {
|
||||||
if (version >= 30 && fwd_context) {
|
if (version >= 30 && fwd_context) {
|
||||||
*apiOut = API_OPENGL_CORE;
|
*apiOut = API_OPENGL_CORE;
|
||||||
consts->ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;
|
consts->ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;
|
||||||
} else if (version >= 31 && !compat_context) {
|
} else if (compat_context) {
|
||||||
*apiOut = API_OPENGL_CORE;
|
|
||||||
} else {
|
|
||||||
*apiOut = API_OPENGL_COMPAT;
|
*apiOut = API_OPENGL_COMPAT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -732,6 +732,6 @@ endif
|
|||||||
if with_glx == 'xlib'
|
if with_glx == 'xlib'
|
||||||
subdir('drivers/x11')
|
subdir('drivers/x11')
|
||||||
endif
|
endif
|
||||||
if with_tests
|
if with_tests and dri_drivers != []
|
||||||
subdir('main/tests')
|
subdir('main/tests')
|
||||||
endif
|
endif
|
||||||
|
@@ -7049,6 +7049,11 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||||||
} while (progress);
|
} while (progress);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Do this again to lower ir_binop_vector_extract introduced
|
||||||
|
* by optimization passes.
|
||||||
|
*/
|
||||||
|
do_vec_index_to_cond_assign(ir);
|
||||||
|
|
||||||
validate_ir_tree(ir);
|
validate_ir_tree(ir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -27,7 +27,7 @@
|
|||||||
#ifndef U_ENDIAN_H
|
#ifndef U_ENDIAN_H
|
||||||
#define U_ENDIAN_H
|
#define U_ENDIAN_H
|
||||||
|
|
||||||
#if defined(__GLIBC__) || defined(ANDROID) || defined(__CYGWIN__)
|
#ifdef HAVE_ENDIAN_H
|
||||||
#include <endian.h>
|
#include <endian.h>
|
||||||
|
|
||||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||||
|
Reference in New Issue
Block a user