Compare commits

..

241 Commits

Author SHA1 Message Date
Eric Anholt
9baf1ff8fc vc4: Use NEON to speed up utile stores on Pi2+.
Improves 1024x1024 TexSubImage2D by 41.2371% +/- 3.52799% (n=10).
2017-01-26 12:50:05 -08:00
Eric Anholt
4d30024238 vc4: Use NEON to speed up utile loads on Pi2.
We had a lot of memcpy call overhead because gpu_stride wasn't being
inlined.  But if you split out the stride==8 and stride==16 cases like
this code does while still using memcpy, you'd no longer have glibc's
NEON memcpy applied at which point we'd be doing 16 uncached reads
instead of 64/(NEON memcpy granularity), for about a 30% performance
hit.  By hand writing the assembly, we can get a whole cacheline
loaded at a time.

Unfortunately, NEON intrinsics turned out to be unusable -- they
didn't have the vldm instruction available.

Note that, for now, the NEON code is only enabled when building for ARMv7
(Pi 2+).  We may want to do runtime detection for the Raspbian case, in
the future.

Improves 1024x1024 GetTexImage by 208.256% +/- 7.07029% (n=10).
2017-01-26 12:48:10 -08:00
Eric Anholt
347b69e7d7 vc4: Move LT tiling code to a separate file.
This paves the way for building it twice, with NEON assembly or not.
2017-01-26 12:23:31 -08:00
Eric Anholt
14cf5c60b8 vc4: Use unreachable() in an unreachable codepath for tiling. 2017-01-26 12:23:31 -08:00
Samuel Pitoiset
eca96ea308 gallium/radeon: add VRAM-vis-usage HUD query
This new query returns the current visible usage of VRAM accessed
by the CPU. It will return 0 on radeon because it's unimplemented.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Edmondo Tommasina <edmondo.tommasina@gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
2017-01-26 19:40:52 +01:00
Samuel Pitoiset
9f087e1c7c gallium/radeon: query the CPU accessible size of VRAM
R600_DEBUG="info" can be used to display that size, as well as
the total amount of VRAM/GTT.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Edmondo Tommasina <edmondo.tommasina@gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
2017-01-26 19:40:14 +01:00
Ian Romanick
13439031c8 mesa: Arrange validate_uniform_parameters parameters to match call sites
Saves a measly 20 bytes on IA32 and nothing on x64.  Depending on
exactly when this is applied, a lot of variation is possible due to
function alignment.

   text	   data	    bss	    dec	    hex	filename
6670131	 228340	  22552	6921023	 699b3f	lib/i965_dri.so before
6670111	 228340	  22552	6921003	 699b2b	lib/i965_dri.so after
6342932	 293872	  29880	6666684	 65b9bc	lib64/i965_dri.so before
6342932	 293872	  29880	6666684	 65b9bc	lib64/i965_dri.so after

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
2017-01-26 09:46:18 -08:00
Ian Romanick
9be5fd3c87 mesa: Arrange _mesa_uniform parameters to match the call sites
By putting the parameters first that match the parameters to the call
site, 4 (of 14) instructions are saved at _mesa_Uniform4fv on x64.  On
IA32, the details of the instructions change, but it is the same count
and mix of instructions.

Before:

0000000000000830 <_mesa_Uniform4fv>:
     830:       48 83 ec 10             sub    $0x10,%rsp
     834:       49 89 d0                mov    %rdx,%r8
     837:       48 8b 15 00 00 00 00    mov    0x0(%rip),%rdx        # 83e <_mesa_Uniform4fv+0xe>
     83e:       89 f8                   mov    %edi,%eax
     840:       89 f1                   mov    %esi,%ecx
     842:       41 b9 02 00 00 00       mov    $0x2,%r9d
     848:       64 48 8b 3a             mov    %fs:(%rdx),%rdi
     84c:       48 8b 97 c8 01 02 00    mov    0x201c8(%rdi),%rdx
     853:       48 8b 72 70             mov    0x70(%rdx),%rsi
     857:       6a 04                   pushq  $0x4
     859:       89 c2                   mov    %eax,%edx
     85b:       e8 00 00 00 00          callq  860 <_mesa_Uniform4fv+0x30>
     860:       48 83 c4 18             add    $0x18,%rsp
     864:       c3                      retq

After:

00000000000007f0 <_mesa_Uniform4fv>:
     7f0:       48 83 ec 10             sub    $0x10,%rsp
     7f4:       48 8b 05 00 00 00 00    mov    0x0(%rip),%rax        # 7fb <_mesa_Uniform4fv+0xb>
     7fb:       41 b9 02 00 00 00       mov    $0x2,%r9d
     801:       64 48 8b 08             mov    %fs:(%rax),%rcx
     805:       48 8b 81 c8 01 02 00    mov    0x201c8(%rcx),%rax
     80c:       6a 04                   pushq  $0x4
     80e:       4c 8b 40 70             mov    0x70(%rax),%r8
     812:       e8 00 00 00 00          callq  817 <_mesa_Uniform4fv+0x27>
     817:       48 83 c4 18             add    $0x18,%rsp
     81b:       c3                      retq

Saves a measly 416 bytes of text on x64.  Depending on exactly when this
is applied, a lot of variation is possible due to function alignment.

   text	   data	    bss	    dec	    hex	filename
6670131	 228340	  22552	6921023	 699b3f	lib/i965_dri.so before
6670131	 228340	  22552	6921023	 699b3f	lib/i965_dri.so after
6343348	 293872	  29880	6667100	 65bb5c	lib64/i965_dri.so before
6342932	 293872	  29880	6666684	 65b9bc	lib64/i965_dri.so after

There is likely to be no performance change with just this patch.
_mesa_uniform immediately calls validate_uniform_parameters with
parameters in the "wrong" (different from the call site) order.

v2: Rebase on GL_ARB_gpu_shader_fp64.

v3: Rebase on GL_ARB_gpu_shader_int64.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
2017-01-26 09:46:14 -08:00
Ian Romanick
9f7ac45ce4 mesa: Arrange _mesa_uniform_matrix parameters to match the call sites
By putting the parameters first that match the parameters to the call
site, 4 (of 16) instructions are saved at _mesa_UniformMatrix4fv on
x64.  On IA32, the details of the instructions change, but it is the
same count and mix of instructions.

Before:

0000000000001380 <_mesa_UniformMatrix4fv>:
    1380:       48 83 ec 10             sub    $0x10,%rsp
    1384:       48 8b 05 00 00 00 00    mov    0x0(%rip),%rax        # 138b <_mesa_UniformMatrix4fv+0xb>
    138b:       41 89 f8                mov    %edi,%r8d
    138e:       41 89 f1                mov    %esi,%r9d
    1391:       0f b6 d2                movzbl %dl,%edx
    1394:       64 48 8b 38             mov    %fs:(%rax),%rdi
    1398:       48 8b b7 c8 01 02 00    mov    0x201c8(%rdi),%rsi
    139f:       48 8b 76 70             mov    0x70(%rsi),%rsi
    13a3:       68 06 14 00 00          pushq  $0x1406
    13a8:       51                      push   %rcx
    13a9:       52                      push   %rdx
    13aa:       b9 04 00 00 00          mov    $0x4,%ecx
    13af:       ba 04 00 00 00          mov    $0x4,%edx
    13b4:       e8 00 00 00 00          callq  13b9 <_mesa_UniformMatrix4fv+0x39>
    13b9:       48 83 c4 28             add    $0x28,%rsp
    13bd:       c3                      retq

After:

0000000000001360 <_mesa_UniformMatrix4fv>:
    1360:       48 83 ec 10             sub    $0x10,%rsp
    1364:       48 8b 05 00 00 00 00    mov    0x0(%rip),%rax        # 136b <_mesa_UniformMatrix4fv+0xb>
    136b:       0f b6 d2                movzbl %dl,%edx
    136e:       64 4c 8b 00             mov    %fs:(%rax),%r8
    1372:       49 8b 80 c8 01 02 00    mov    0x201c8(%r8),%rax
    1379:       68 06 14 00 00          pushq  $0x1406
    137e:       6a 04                   pushq  $0x4
    1380:       6a 04                   pushq  $0x4
    1382:       4c 8b 48 70             mov    0x70(%rax),%r9
    1386:       e8 00 00 00 00          callq  138b <_mesa_UniformMatrix4fv+0x2b>
    138b:       48 83 c4 28             add    $0x28,%rsp
    138f:       c3                      retq

Saves a measly 576 bytes of text on x64.

   text	   data	    bss	    dec	    hex	filename
6670131	 228340	  22552	6921023	 699b3f	lib/i965_dri.so before
6670131	 228340	  22552	6921023	 699b3f	lib/i965_dri.so after
6343924	 293872	  29880	6667676	 65bd9c	lib64/i965_dri.so before
6343348	 293872	  29880	6667100	 65bb5c	lib64/i965_dri.so after

v2: Rebase on GL_ARB_gpu_shader_fp64.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
2017-01-26 09:46:09 -08:00
Ian Romanick
874393186b mesa: Trivial clean-ups in uniform_query.cpp
This is C++, so we can mix code and declarations.  Doing so allows
constification.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Plamena Manolova <plamena.manolova@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
2017-01-26 09:46:07 -08:00
Lionel Landwerlin
bbe8705c57 spirv: handle undefined components for OpVectorShuffle
Fixes:
   dEQP-VK.spirv_assembly.instruction.compute.opspecconstantop.vector_related
   dEQP-VK.spirv_assembly.instruction.graphics.opspecconstantop.vector_related*

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: "17.0 13.0" <mesa-stable@lists.freedesktop.org>
2017-01-26 17:31:21 +00:00
Lionel Landwerlin
df7063cba3 spirv: handle OpUndef as part of the variable parsing pass
Looking at the following bit of SPIRV shader :

...
%zero        = OpConstant %i32 0
%ivec3_0     = OpConstantComposite %ivec3 %zero %zero %zero
%vec3_undef  = OpUndef %ivec3
%sc_0        = OpSpecConstant %i32 0
%sc_1        = OpSpecConstant %i32 0
%sc_2        = OpSpecConstant %i32 0
...

Our compiler currently stops parsing variables & types on the OpUndef
and switches to instructions, leaving the following sc_[0-2] variables
untreated.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: "17.0 13.0" <mesa-stable@lists.freedesktop.org>
2017-01-26 17:29:29 +00:00
Lionel Landwerlin
c3421106ec anv: fix descriptor pool internal size allocation
The size of the pool is slightly smaller than the size of the
structure containing the whole pool. We need to take that into account
on when setting up the internals.

Fixes a crash due to out of bound memory access in:
   dEQP-VK.api.descriptor_pool.out_of_pool_memory

v2: Drop debug traces (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: "17.0 13.0" <mesa-stable@lists.freedesktop.org>
2017-01-26 17:24:21 +00:00
Kenneth Graunke
f8f7ea508b i965: Make intelEmitCopyBlit not truncate large strides.
When trying to blit larger tiled surfaces, the pitch can be larger than
32768 bytes, which means it won't fit in a GLshort.  Passing it in will
truncate the stride to 0, which has...surprising results.

The pitch can be up to 32,768 DWords, or 128kB.  We measure it in bytes,
but divide by 4 when programming it.  So we need to handle values up to
131,072.  Switch from GLshort to int32_t to avoid the truncation.

Fixes GL45-CTS.gtf30.GL3Tests.depth_texture.depth_texture_copyteximage
at widths greater than 8192.

v2: Use int32_t as negative values can be used (Jason).

Cc: "17.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-26 01:43:20 -08:00
Kenneth Graunke
fcf723b647 i965: Use a UW source type for CS_OPCODE_CS_TERMINATE.
SIMD16 compute shaders use a send(16) with mlen 1 for the EOT message,
using a source of g127 for the single register.  With a UD type, this
supposedly could read g128, which doesn't exist, causing the simulator
to get cranky.  Use a UW type to avoid this.

Cc: "17.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
2017-01-26 00:52:52 -08:00
Iago Toral Quiroga
9b25769da6 anv/lower_input_attachments: honor sample index parameter to subpassLoad()
According to GL_KHR_vulkan_glsl, the signature of subpassLoad() is:

gvec4 subpassLoad(gsubpassInput   subpass);
gvec4 subpassLoad(gsubpassInputMS subpass, int sample);

So the multisampled case always receives an explicit sample index that we
should use. The current implementation was ignoring this parameter
and using gl_SampleID value instead.

Fixes:
dEQP-VK.pipeline.multisample_shader_builtin.sample_id.*

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
2017-01-26 08:11:21 +01:00
Kenneth Graunke
5106df85da i965: Fix fast depth clears for surfaces with a dimension of 16384.
I hadn't bothered to set this bit because I figured it would just
paper over us getting the rectangle wrong.  But it turns out that
there is a legitimate reason to use it, so let's do so.

The alternative would be to chop up 16k clears to multiple 8k clears,
which is pointlessly painful.

Cc: "17.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Nanley Chery <nanley.g.chery@intel.com>
2017-01-25 22:24:08 -08:00
Chad Versace
022e5c7e5a anv: Implement VK_KHR_get_physical_device_properties2
Reviewed-by: Jason Ekstranad <jason@jlekstrand.net>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-25 19:18:47 -08:00
Chad Versace
cd03021c83 anv: Refactor anv_GetPhysicalDeviceQueueFamilyProperties()
Add a helper function, anv_get_queue_family_properties(), which fills the
struct.  This patch reduces churn in the following patch that implements
vkGetPhysicalDeviceQueueFamilyProperties2KHR.

Reviewed-by: Jason Ekstranad <jason@jlekstrand.net>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-25 19:18:46 -08:00
Chad Versace
5826190095 anv: Refactor anv_GetPhysicalDeviceFormatProperties()
Add a helper function, anv_get_image_format_properties(), which does all
the work and has a VkPhysicalDeviceImageFormatInfo2KHR parameter. This
patch reduces churn in the following patch that implements
vkGetPhysicalDeviceImageFormatProperties2KHR.

Reviewed-by: Jason Ekstranad <jason@jlekstrand.net>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-25 19:18:43 -08:00
Chad Versace
b2de77a07d anv: Revive struct anv_common
The struct was deleted by:
  commit efe9d1cde3
  Author: Edward O'Callaghan <funfunctor@folklore1984.net>
  Subject: anv: Clean up some unused variables

Unlike the original anv_common, the new one has a non-const pNext
pointer because we will use it for the output structs of
VK_KHR_get_physical_device_properties2.

v2:
  - Retype pNext from void* to struct anv_common*.

Reviewed-by: Jason Ekstranad <jason@jlekstrand.net>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-25 19:18:33 -08:00
Chad Versace
c5d99c9983 anv: Define macro anv_debug()
This is a printf-like macro that prints a debug message to stderr when
built with DEBUG.  If no DEBUG, then do nothing.

Reviewed-by: Jason Ekstranad <jason@jlekstrand.net>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-25 19:17:45 -08:00
Ian Romanick
fd43bee0ea mesa: Fix copy-and-paste bug in _mesa_(Program|)Uniform[1234](i|ui)64vARB functions
All of the functions were passing 1 to _mesa_uniform instead of passing
count.

Fixes 16 unsed parameter warnings like:

main/uniforms.c: In function ‘_mesa_Uniform1i64vARB’:
main/uniforms.c:1692:47: warning: unused parameter ‘count’ [-Wunused-parameter]
 _mesa_Uniform1i64vARB(GLint location, GLsizei count, const GLint64 *value)
                                               ^~~~~

This is why I build with extra warnings enabled.  Unfortunately, there
are so many unused parameter warnings in Mesa that I didn't notice these
added warnings for over 6 months. :(

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-25 09:28:40 -08:00
Lionel Landwerlin
173dd60ced spirv: bump headers to SPIRV 1.1
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-25 17:22:23 +00:00
Lionel Landwerlin
05e2d99bf2 spirv: add default handler for new enums
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-25 17:22:23 +00:00
Lionel Landwerlin
4fd54d611f spirv: fix typos
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-25 17:21:15 +00:00
Lionel Landwerlin
25e21cb8d0 anv: set command buffer to NULL when allocations fail
The spec section 5.2 says:

   "vkAllocateCommandBuffers can be used to create multiple command
   buffers. If the creation of any of those command buffers fails, the
   implementation must destroy all successfully created command buffer
   objects from this command, set all entries of the pCommandBuffers
   array to VK_NULL_HANDLE and return the error."

Fixes:
   dEQP-VK.api.object_management.alloc_callback_fail_multiple.command_buffer_primary
   dEQP-VK.api.object_management.alloc_callback_fail_multiple.command_buffer_secondary

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: "13.0 17.0" <mesa-stable@lists.freedesktop.org>
2017-01-25 17:15:30 +00:00
Jason Ekstrand
d6397dd625 vulkan/wsi: Lower the maximum image sizes
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: "17.0" <mesa-dev@lists.freedesktop.org>
2017-01-25 09:05:30 -08:00
Jason Ekstrand
659edd9f5c vulkan/wsi/wayland: Handle VK_INCOMPLETE for GetPresentModes
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: "17.0" <mesa-dev@lists.freedesktop.org>
2017-01-25 09:05:25 -08:00
Jason Ekstrand
dc578ef060 vulkan/wsi/wayland: Handle VK_INCOMPLETE for GetFormats
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: "17.0" <mesa-dev@lists.freedesktop.org>
2017-01-25 09:04:56 -08:00
George Kyriazis
e259efd805 swr: Update fs texture & sampler state logic
In swr_update_derived() update texture and sampler state on a new fragment
shader.  GALLIUM_HUD can update fs using a previously bound texture and
sampler.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
2017-01-25 10:02:50 -06:00
Samuel Pitoiset
cff199ceb7 gallium/radeon: add a new HUD query for the number of mapped buffers
Useful when debugging applications which map a ton of buffers
and also because we used to run into Linux's limit on the number
of simultaneous mmap() calls.

v2: - update the commit message

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-25 15:19:21 +01:00
Iago Toral Quiroga
56495080ed spirv: handle gl_SampleMask
SPIR-V maps both gl_SampleMask and gl_SampleMaskIn to the same
builtin (SampleMask). The only way to tell which one we are dealing with
is to check if it is an input or an output.

Fixes:
dEQP-VK.pipeline.multisample_shader_builtin.sample_mask.write.*

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-25 08:08:16 +01:00
Iago Toral Quiroga
9467d78d38 spirv: acknowledge multisampled input attachments
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-25 08:07:09 +01:00
Dave Airlie
2ab2be092d radv: program a default point size.
Along the lines of what
3b804819 anv: Default PointSize to 1.0 if not written by the shader
does for anv, program a default point size in the hw of 1.0.

This preempt fixes a bunch of geom shader tests.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-01-25 09:58:38 +10:00
Marek Olšák
eac7df43ca radeonsi: handle first_non_void correctly in si_create_vertex_elements
This fixes R11G11B10_FLOAT, because it's in the category of "OTHER",
meaning that it doesn't have any channel description.

Cc: 17.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-24 23:52:01 +01:00
Marek Olšák
d9ef549238 st/mesa: destroy pipe_context before destroying st_context (v2)
If radeonsi starts compiling an optimized shader variant asynchronously
with a GL debug callback set and the application destroys the GL context,
radeonsi crashes when trying to write shader stats into the debug output
of a non-existent context after compilation, because st/mesa was destroyed
before pipe_context.

Firefox with WebGL2 enabled hits this bug.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99456

v2: protect against a double destroy in st_create_context_priv and callers.

Cc: 17.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-24 23:52:01 +01:00
Timothy Arceri
dd65f0efc9 nir: bump loop max unroll limit
The original number was chosen in an attempt to match the limits applied to
GLSL IR.

A look at the git history of the why these limits were chosen for GLSL IR
shows it was more to do with the slow speed of unrolling large loops in
GLSL IR than anything else. The speed of loop unrolling in NIR is not a
problem so we may wish to bump this even higher in future.

No shader-db change, however a furture change will disbale the GLSL IR
optimisation loop in the i965 backend results in 4 loops from The Talos
Principle failing to unroll. Bumping the limit allows them to unroll which
results in the instruction count matching the previous output from when the
GLSL IR opts were still enabled.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-25 09:43:29 +11:00
Timothy Arceri
34ab9b0947 glsl: lower constant arrays to uniform arrays before optimisation loop
Previously the constant array would not get copy propagated until the backend
did its GLSL IR opt loop. I plan on removing that from i965 shortly which
caused huge regressions in Deus-ex and Tomb Raider which have large
constant arrays. Moving lowering before the opt loop in the GLSL linker
fixes this and unexpectedly improves some compute shaders also.

shader-db results BDW:

instructions helped:   shaders/closed/steam/deus-ex-mankind-divided/374.shader_test CS SIMD16: 204 -> 194 (-4.90%)
instructions helped:   shaders/closed/steam/deus-ex-mankind-divided/318.shader_test CS SIMD8: 1010 -> 741 (-26.63%)
instructions helped:   shaders/closed/steam/deus-ex-mankind-divided/144.shader_test CS SIMD8: 542 -> 385 (-28.97%)

cycles helped:   shaders/closed/steam/deus-ex-mankind-divided/318.shader_test CS SIMD8: 1831382 -> 1818492 (-0.70%)
cycles helped:   shaders/closed/steam/deus-ex-mankind-divided/144.shader_test CS SIMD8: 216238 -> 206180 (-4.65%)
cycles helped:   shaders/closed/steam/deus-ex-mankind-divided/374.shader_test CS SIMD16: 18484 -> 16644 (-9.95%)

total instructions in shared programs: 13060313 -> 13059877 (-0.00%)
instructions in affected programs: 1756 -> 1320 (-24.83%)
helped: 3
HURT: 0

total cycles in shared programs: 256586698 -> 256561910 (-0.01%)
cycles in affected programs: 2066104 -> 2041316 (-1.20%)
helped: 3
HURT: 0

V3: only call the opt loop if lowering progressed (Suggested by Eric)

V2: call opts before and after lowering (Suggested by Ken)

Reviewed-by: Eric Anholt <eric@anholt.net>
2017-01-25 09:07:30 +11:00
Ian Romanick
c4a0c1efff mesa: Don't advertise GL_OES_read_format in core profile
OpenGL ES implementations are not allowed to ship ARB extensions, and
OpenGL implementations are not allowed to ship OES extensions.

The functionality is also included in GL_ARB_ES2_compatibility.  Ever
OpenGL core-profile driver currently exposes both extensions.  I don't
know of any applications that explicitly check for GL_OES_read_format,
so removing it seems very unlikely to cause problems.  No functionality
is removed.

I have left this extension in place for compatibility profile.  There
are still OpenGL 1.x drivers in Mesa, and adding code to check for
compatibility profile and not GL_ARB_ES2_compatibility for
GL_IMPLEMENTATION_COLOR_READ_TYPE and GL_IMPLEMENTATION_COLOR_READ_FORMAT
just feels dumb.

Three other other alternatives considered:

 - Remove the string from compatibility profile drivers but leave the
   functionality in place.

 - Add a flag to expose the extension string, and set it in every OpenGL
   driver that does not expose GL_ARB_ES2_compatibility (and those
   drivers only).  I tried this.  You can't have two instances of an
   extension in the extension table (one dummy_true for ES1 and one with
   a flag for compatibility profile), so the implementation requires a
   bit of effort.

 - Only expose the extension in compatibility if the version is less
   than 2.0.  I didn't see an easy way to do this.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: mesa-stable@lists.freedesktop.org
2017-01-24 13:39:26 -08:00
Brian Paul
b87eedd405 docs: fix incorrect link to 12.0.6 release notes
Trivial.
2017-01-24 14:30:44 -07:00
Jason Ekstrand
a435991d3c anv: Expose VK_KHR_maintenance1
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-24 12:27:48 -08:00
Jason Ekstrand
756533520e anv: Return better errors from AllocateDescriptorSets
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-24 12:27:48 -08:00
Jason Ekstrand
99bb4c22a5 anv: Allow selecting the slice of a 3D image
As per VK_KHR_maintenance1, clients can render to a slice of a 3D image
by creating a VK_IMAGE_VIEW_TYPE_2D view of it.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-24 12:27:48 -08:00
Jason Ekstrand
6d79111834 anv: Report FORMAT_FEATURE_TRANSFER_SRC/DST_BIT_KHR
As of VK_KHR_maintenance1, these are supposed to be reported for any
formats on which we support transfer operations.  For us, this is
anything that we can texture from.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-24 12:27:48 -08:00
Jason Ekstrand
8a8630486b anv: Add trivial support for TrimCommandPoolKHR
Our command buffers already efficiently use a global pool so trimming
doesn't really need to do anything.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-24 12:27:48 -08:00
Jason Ekstrand
5edcc96bf6 anv: Set viewport extents correctly when height is negative
As per VK_KHR_maintenance1, setting a negative height in the viewport
can be used to get flipped coordinates.  This is, aparently, very useful
when porting D3D apps to Vulkan.  All we need to do to support this is
to make sure we actually set the min and max correctly.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-24 12:27:48 -08:00
Matt Turner
045f38a507 vulkan: Don't install vk_platform.h or vulkan.h.
These files belong to the vulkan loader.

Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
2017-01-24 11:27:20 -08:00
Roland Scheidegger
aceae09ef0 glsl: fix compile errors with mingw due to missing PRIx64 definitions
define __STDC_FORMAT_MACROS and include <inttypes.h> (same as
ir_builder_print_visitor.cpp already does).

Otherwise, some mingw build errors out (since
8e7e1ae036 and
bbce1c538d presumably) with:
src/compiler/glsl/ir_print_visitor.cpp:479:40: error: expected ‘)’ before ‘PRIu64’
   case GLSL_TYPE_UINT64:fprintf(f, "%" PRIu64, ir->value.u64[i]); break;

(Note even with that fix I get other format specifier warnings:
src/compiler/glsl/ir_print_visitor.cpp:473:47:
warning: unknown conversion type character ‘a’ in format [-Wformat=]
                fprintf(f, "%a", ir->value.f[i]);
                                               ^
src/compiler/glsl/ir_print_visitor.cpp:473:47:
warning: too many arguments for format [-Wformat-extra-args]
but it still compiles at least)

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
2017-01-24 19:12:46 +01:00
Roland Scheidegger
f4df21ed95 gallivm: don't try to use fast rcp for fdiv
The use of fast rcp instruction is disabled, and will always fall back
to use a division instead (1 / x). Hence, if we get a division opcode,
it doesn't make much sense trying to split that into rcp/mul.

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
2017-01-24 19:12:46 +01:00
Roland Scheidegger
25208949d7 gallivm: (trivial) fix ddiv cpu implementation
we can't use the cpu implementation of fdiv, as this one uses different
lp_build_context, which causes assertion failure.
Just use default fdiv action (there is no fast rcp for doubles which we
could potentially use anyway).

Cc: 17.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
2017-01-24 19:12:46 +01:00
Roland Scheidegger
3b575a955c tgsi: implement ddiv opcode
softpipe (along with llvmpipe) claims to support arb_gpu_shader_fp64,
so we really need to support that opcode.

Cc: 17.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
2017-01-24 19:12:46 +01:00
Jason Ekstrand
4c180f9633 i965/blorp: Use the correct ISL format for combined depth/stencil
In brw_blorp_copyteximage, we use the format from the render buffer.
This could be a combined depth/stencil format.  In this case, we handle
stencil properly but we give blorp the wrong ISL format.  Specifically,
we would give blorp ISL_FORMAT_R32G32B32A32_FLOAT which is the wrong
size was causing GPU hangs.

Fixes: GL45-CTS.gtf30.GL3Tests.packed_depth_stencil.packed_depth_stencil_copyteximage

Reviewed-by: Chad Versace <chadversary@chromium.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Cc: "13.0 17.0" <mesa-stable@lists.freedesktop.org>
2017-01-24 10:06:07 -08:00
Samuel Pitoiset
0054dded03 st/glsl_to_tgsi: fix compilation warnings since int64 types
state_tracker/st_glsl_to_tgsi.cpp:302:28: warning: ‘glsl_to_tgsi_instruction::tex_type’
	is too small to hold all values of ‘enum glsl_base_type’
    glsl_base_type tex_type:4;

Fixes: 8ce53d4a2f ("glsl: Add basic ARB_gpu_shader_int64 types")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-24 12:45:39 +01:00
Samuel Pitoiset
d90d37db73 gallium/radeon: undef the very specific UPDATE_COUNTER macro
Also, wrap this into a do { ... } while (0). Suggested by Nicolai.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-24 11:17:25 +01:00
Topi Pohjolainen
ba6399df94 i965/blorp: Add also depth and stencil buffers to render cache
v2 (Jason, Curro): Add stencil also even though it is not
                   enabled yet.

Cc: 17.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
2017-01-24 10:41:58 +02:00
Ben Widawsky
e63ab36d0e gbm: Fix width height getters return type (trivial)
v2: Other way round... to make consistent, make both return type have
the fixed width - uint32_t.

Cc: Daniel Stone <daniel@fooishbar.org>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Eric Engestrom <eric.engestrom@imgtec.com>
Acked-by: Daniel Stone <daniels@collabora.com>
2017-01-23 21:43:38 -08:00
Ben Widawsky
bb9ff98b4c gbm: Move getters to match order in header file (trivial)
Other things are out of order, but I need to add a getter so I'm just
fixing those.

This helps people adding to GBM know where the right place to put things
is.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Eric Engestrom <eric.engestrom@imgtec.com>
Acked-by: Daniel Stone <daniels@collabora.com>
2017-01-23 21:43:34 -08:00
Emil Velikov
530cd248f5 docs: add news item and link release notes for 12.0.6
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
2017-01-24 02:15:30 +00:00
Emil Velikov
9b16bd8b6c docs: use correct year for the 12.0.6 release notes
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
(cherry picked from commit 13953f012d)
2017-01-24 02:15:30 +00:00
Emil Velikov
c16e7e0a60 docs: add sha256 checksums for 12.0.6
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
(cherry picked from commit 36e3f2542d)
2017-01-24 02:15:30 +00:00
Emil Velikov
b1137cb9de docs: add release notes for 12.0.6
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
(cherry picked from commit 555885a0bf)
2017-01-24 02:15:30 +00:00
Emil Velikov
9924cdecd9 docs/releasing: remove stray "cd"
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
2017-01-24 02:15:29 +00:00
Ilia Mirkin
b755f2f233 nv50: add support for MUL_ZERO_WINS property
This is simply keyed off the vertex shader, as that's guaranteed to be
present in any pipeline.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
2017-01-23 20:37:14 -05:00
Ilia Mirkin
8c764a2321 nvc0: add support for MUL_ZERO_WINS property
This sets the dnz flag on all the relevant multiplication operations. At
emission time, this will only be supported by nvc0+, so nv50 will need a
different solution.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
2017-01-23 20:37:14 -05:00
Ilia Mirkin
e1346f25bf st/nine: set the MUL_ZERO_WINS flag when supported
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Axel Davy <axel.davy@ens.fr>
2017-01-23 20:37:10 -05:00
Ilia Mirkin
6e40938fbc gallium: add PIPE_CAP_TGSI_MUL_ZERO_WINS
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Axel Davy <axel.davy@ens.fr>
2017-01-23 20:36:47 -05:00
Ilia Mirkin
a2b2cd81d1 gallium: add TGSI_PROPERTY_MUL_ZERO_WINS
This will be useful for proper D3D9 emulation, where this behavior is
expected by some shaders.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Axel Davy <axel.davy@ens.fr>
2017-01-23 20:35:55 -05:00
Marek Olšák
573bf0940a radeonsi: always set the TCL1_ACTION_ENA when invalidating L2
Some CIK-VI docs say this is the default behavior on SI. That doesn't
answer whether it's also the default behavior on CIK-VI.

Cc: 17.0 13.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 23:43:38 +01:00
Marek Olšák
5d3dd70cab radeonsi: don't declare LDS in TES
not used since we started using the offchip tess ring

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 23:43:38 +01:00
Marek Olšák
59c5da40ed radeonsi: preload PS inputs only if KILL is used
so that most shaders can get lower VGPR usage thanks to lazy input loading.
I think this is a more accurate constraint that prevents the black transitions
in Witcher 2.

Affected shaders (7758):
Max Waves: 57437 -> 58231 (1.38 %)

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 23:43:38 +01:00
Marek Olšák
7b32ae4df5 gallium/radeon: adjust the rule for using the LINEAR_ALIGNED layout
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 23:43:38 +01:00
Marek Olšák
e248390e93 winsys/amdgpu: drop all IBs if at least one was rejected within the context
The corruption is inevitable and hangs are possible too.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 23:43:38 +01:00
Marek Olšák
1840800860 winsys/amdgpu: report a rejected IB as a lost context
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 23:43:38 +01:00
Dave Airlie
dcfcb3047c vulkan: import latest registry for 1.0.39 extensions.
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-01-24 08:13:37 +10:00
Dave Airlie
e38bee34bf vulkan: bump vulkan.h to 1.0.39 version
This introduces a bunch of new extension defines.

Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-01-24 08:13:23 +10:00
Grazvydas Ignotas
f65b3641c3 radv: don't resubmit the same cs over and over while tracing
Fixes: 97dfff54 ("radv: Dump command buffer on hang.")
Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
CC: <mesa-stable@lists.freedesktop.org>
2017-01-23 22:27:05 +01:00
Samuel Pitoiset
aa2ace8e49 gallium/radeon: add HUD queries for monitoring some hw blocks
It's also possible to monitor them via performance counters but
the hardware can only use two counters simultaneously. It seems
easier to re-use the existing code which reads from MMIO instead
of writing a multi-pass approach.

v2: - add new lines after ':'

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
2017-01-23 21:19:49 +01:00
Samuel Pitoiset
a704f19247 gallium/radeon: refactor the GRBM counters path
This will allow to expose more queries in order to know which
blocks are busy/idle.

v2: - add new lines after ':'

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
2017-01-23 21:19:49 +01:00
George Kyriazis
00847e4f14 swr: Align query results allocation
Some query results struct contents are declared as cache line aligned.
Use aligned malloc, and align the whole struct, to be safe.

Fixes crash when compiling with clang.

CC: <mesa-stable@lists.freedesktop.org>

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
2017-01-23 14:15:54 -06:00
Bruce Cherniak
b829206b07 swr: Prune empty nodes in CalculateProcessorTopology.
CalculateProcessorTopology tries to figure out system topology by
parsing /proc/cpuinfo to determine the number of threads, cores, and
NUMA nodes.  There are some architectures where the "physical id" begins
with 1 rather than 0, which was creating and empty "0" node and causing a
crash in CreateThreadPool.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97102
Reviewed-By: George Kyriazis <george.kyriazis@intel.com>
CC: <mesa-stable@lists.freedesktop.org>
2017-01-23 13:52:26 -06:00
Matt Turner
d349449a16 i965: Use UNUSED to silence unused variable (used in assert). 2017-01-23 10:50:20 -08:00
Rainer Hochecker
09b140abb5 dri: allow 16bit R/GR images to be exported via drm buffers
This allows eglCreateImageKHR to access P010 surfaces created by vaapi

Signed-off-by: Rainer Hochecker <fernetmenta@online.de>
Acked-by: Ben Widawky <ben@bwidawsk.net>
2017-01-23 08:47:15 -08:00
Christian König
1338d912f5 st/va: make sure that we call begin_frame() only once v2
This fixes "st/va: delay calling begin_frame until we have all parameters".

v2: call begin frame after decoder (re)creation as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Tested-by: Andy Furniss <adf.lists@gmail.com>
2017-01-23 17:00:04 +01:00
Eric Engestrom
50141e131a drirc: remove spurious tabs
Signed-off-by: Eric Engestrom <eric@engestrom.ch>
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 16:34:58 +01:00
Nicolai Hähnle
cfabbbcfd7 st/glsl_to_tgsi: use DDIV instead of DRCP + DMUL
Fixes GL45-CTS.gpu_shader_fp64.built_in_functions.

v2: use DDIV unconditionally (Roland)

Reviewed-by: Roland Scheidegger <sroland@vmware.com> (v1)
Reviewed-by: Marek Olšák <marek.olsak@amd.com> (v1)
Tested-by: Glenn Kennard <glenn.kennard@gmail.com>
Tested-by: James Harvey <lothmordor@gmail.com>
Cc: 17.0 <mesa-stable@lists.freedesktop.org>
2017-01-23 16:17:26 +01:00
Nicolai Hähnle
b71c415c3d glsl: split DIV_TO_MUL_RCP into single- and double-precision flags
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Tested-by: Glenn Kennard <glenn.kennard@gmail.com>
Tested-by: James Harvey <lothmordor@gmail.com>
Cc: 17.0 <mesa-stable@lists.freedesktop.org>
2017-01-23 16:17:19 +01:00
Nicolai Hähnle
e4f8f9a638 r600: implement DDIV
Tested-by: Glenn Kennard <glenn.kennard@gmail.com>
Tested-by: James Harvey <lothmordor@gmail.com>
Cc: 17.0 <mesa-stable@lists.freedesktop.org>
2017-01-23 16:17:15 +01:00
Nicolai Hähnle
488560cfe6 r600: factor out cayman_emit_unary_double_raw
We will use it for DDIV.

Tested-by: Glenn Kennard <glenn.kennard@gmail.com>
Tested-by: James Harvey <lothmordor@gmail.com>
Cc: 17.0 <mesa-stable@lists.freedesktop.org>
2017-01-23 16:17:12 +01:00
Nicolai Hähnle
76b02d2fe1 r600: double multiply can handle only one multiply at a time
It seems clear that trying to multiply two pairs of doubles would result
in the temporary register getting overwritten by the second pair. So
make the code more explicit.

Tested-by: Glenn Kennard <glenn.kennard@gmail.com>
Tested-by: James Harvey <lothmordor@gmail.com>
Cc: 17.0 <mesa-stable@lists.freedesktop.org>
2017-01-23 16:15:45 +01:00
Timothy Arceri
f3f9207786 glsl: fix tes linking regression
Fixes regression caused by cbeba6bd48. I accidentally pushed the
wrong version of the patch.
2017-01-23 19:07:22 +11:00
Timothy Arceri
38a67f020d mesa: remove unused gl_shader_info field from gl_linked_shader
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 14:48:04 +11:00
Timothy Arceri
79f07e87c9 mesa/glsl: set and get cs layouts to and from shader_info
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 14:48:04 +11:00
Timothy Arceri
b96bddae67 mesa/glsl: set and get gs layouts directly to and from shader_info
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 14:48:04 +11:00
Timothy Arceri
cbeba6bd48 mesa/glsl/i965: set and get tes layouts directly to and from shader_info
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 14:48:04 +11:00
Timothy Arceri
64e201ab8f glsl: use last_vert_prog to get last {clip,cull}_distance_array_size
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 14:48:04 +11:00
Timothy Arceri
fc707f570f mesa/glsl: set {clip,cull}_distance_array_size directly in gl_program
There are some line wrapping violations here but those lines will get
deleted in the following patch.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 14:48:04 +11:00
Timothy Arceri
f86d15ed94 st/mesa/glsl: change xfb_program field to last_vert_prog
Now that the i965 backend doesn't depend on this field we can
make it more generic and short circuit a bunch of code paths.

The new field will be used in a following patch for another
clean-up.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-23 14:48:04 +11:00
Timothy Arceri
c505d6d852 mesa: use gl_program for CurrentProgram rather than gl_shader_program
This makes much more sense and should be more performant in some
critical paths such as SSO validation which is called at draw time.

Previously the CurrentProgram array could have contained multiple
pointers to the same struct which was confusing and we would often
need to fish out the information we were really after from the
gl_program anyway.

Also it was error prone to depend on the _LinkedShader array for
programs in current use because a failed linking attempt will lose
the infomation about the current program in use which is still
valid.

V2: fix validate_io() to compare linked_stages rather than the
consumer and producer to decide if we are looking at inward
facing shader interfaces which don't need validation.

Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net>

To avoid build regressions the following 2 patches were squashed in to
this commit:

mesa/meta: rewrite _mesa_shader_program_use() and _mesa_program_use()

These are rewritten to do what the function name suggests, that is
_mesa_shader_program_use() sets the use of all stage and
_mesa_program_use() sets the use of a single stage.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net>

mesa: update active relinked program

This likely fixes a subroutine bug were
_mesa_shader_program_init_subroutine_defaults() would never have been
called for the relinked program as we previously just set
_NEW_PROGRAM as dirty and never called the _mesa_use* functions when
linking.

Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net>
2017-01-23 14:48:04 +11:00
Rob Clark
31daeb5bf1 freedreno/a5xx: set frag shader threadsize
Signed-off-by: Rob Clark <robdclark@gmail.com>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
2017-01-22 14:12:05 -05:00
Rob Clark
8d6af93e76 freedreno/a5xx: set fragcoordxy properly
What a3xx docs call IJPERSPCENTERREGID.. the xy coord passed into
bary.f.  We were incorrectly setting both this and gl_FragCoord.xy to
the same register resulting in all sorts of hilarity.

Fixes stk, vdrift, 0ad, probably a bunch others.

Signed-off-by: Rob Clark <robdclark@gmail.com>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
2017-01-22 14:11:43 -05:00
Rob Clark
278b97946f freedreno/ir3: setup var locations in standalone compiler
Signed-off-by: Rob Clark <robdclark@gmail.com>
2017-01-22 14:11:26 -05:00
Rob Clark
6cc93bedc1 freedreno/a5xx: fix psize
Note spritelist (POINTLIST_PSIZE) seems not to be a thing anymore on
a5xx.

Signed-off-by: Rob Clark <robdclark@gmail.com>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
2017-01-22 14:11:15 -05:00
Rob Clark
141a4f86d6 freedreno/a5xx: srgb fix
Signed-off-by: Rob Clark <robdclark@gmail.com>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
2017-01-22 14:11:04 -05:00
Rob Clark
69fbb458cf freedreno/a5xx: fix int vbos
Signed-off-by: Rob Clark <robdclark@gmail.com>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
2017-01-22 14:10:54 -05:00
Rob Clark
16671e9704 freedreno/a5xx: fix clear for uint/sint formats
Signed-off-by: Rob Clark <robdclark@gmail.com>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
2017-01-22 14:10:42 -05:00
Rob Clark
4d9aa4f67d freedreno/a5xx: fix cull state
Signed-off-by: Rob Clark <robdclark@gmail.com>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
2017-01-22 14:10:28 -05:00
Rob Clark
4c39458460 freedreno: update generated headers
Signed-off-by: Rob Clark <robdclark@gmail.com>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
2017-01-22 14:09:45 -05:00
Lionel Landwerlin
494b63f525 anv: descriptors: don't update immutables samplers with anything but their immutable value
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-21 19:22:27 +00:00
Jason Ekstrand
bb96b03461 nir/search: Use the correct bit size for integer comparisons
The previous code always compared integers as 64-bit.  Due to variations
in sign-extension in the code generated by nir_opt_algebraic.py, this
meant that nir_search doesn't always do what you want.  Instead, 32-bit
values should be matched as 32-bit and 64-bit values should be matched
as 64-bit.  While we're here we unify the unsigned and signed paths.
Now that we're using the right bit size, they should be the same since
the only difference we had before was sign extension.

This gets the UE4 bitfield_extract optimization working again.  It had
stopped working due to the constant 0xff00ff00 getting sign-extended
when it shouldn't have.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Cc: "17.0 13.0" <mesa-stable@lists.freedesktop.org>
2017-01-21 10:34:21 -08:00
Jason Ekstrand
817f9e3b17 intel/blorp/copy: Properly handle clear colors for CCS_E images
In order to handle CCS_E, we stomp the image format to a UINT format and
then do some bitcasting logic in the shader.  This works fine since SKL
render compression only considers the channel layout of the format and
not the format itself.  In order for this to work on images that have
been fast-cleared, we need to also convert the clear color so that, when
interpreted as UINT, it provides the same bit value as it would have in
the original format.  This fixes a bunch of OpenGL ES CTS tests for
copy_image when we start using CCS more aggressively.

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
2017-01-21 10:34:09 -08:00
Kenneth Graunke
bb5db5564f glsl: Rename [u]int64_t tokens.
basetsd.h on Windows defines INT64 and UINT64 typedefs which conflict
with these.  Append "_TOK" to avoid conflicts.

Should fix the Windows build.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 19:39:20 -08:00
Matt Turner
892781d6c7 Revert "i965: Really don't emit Q or UQ moves on Gen < 8"
This reverts commit c95380c404.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 19:12:31 -08:00
Matt Turner
d871f8e820 i965: Select DF type for 64-bit integers on Gen < 8.
Gen8 adds Q/UQ types. We attempted to change the types back to DF in the
generator (commit c95380c40), but an assertion added in the FP64 series
(commit e481dcc3) triggers before that code has a chance to execute.

In fact, using Q/UQ in the IR and then changing to DF in the generator
would not work in the presence of source modifiers, etc.

Fixes: d6fcede6 ("i965: Return Q and UQ types for int64 and uint64")
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 19:12:24 -08:00
Ian Romanick
db6d23cfd2 i965: Enable ARB_gpu_shader_int64 on Gen8+
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
fc16bf125f i965: Split SIMD16 CMP of Q and UQ instructions
This is basically the same as happens for doubles.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
51807c6493 i965: Enable 64-bit integer support for almost all unary and binary operations
Integer comparison functions (e.g., nir_op_ilt) are handled in the next
commit.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
821d7cece8 i965: Enable uploading 64-bit integer uniforms
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
e0579c5017 i965: Add 64-bit integer support for conversions and bitcasts
v2 (idr): Make the "from" type in a cast unsized.  This reduces the
number of required cast operations at the expensive slightly more
complex code.  However, this will be a dramatic improvement when other
sized integer types are added.  Suggested by Connor.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
f2fa510594 i965: Enable emitting Q and UQ instructions in the fs backend
v2: Fixup assertion in brw_reg_type_to_hw_type to allow
BRW_REGISTER_TYPE_{UQ,Q} on Gen8+.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
409e0b2d48 i965: Add support for constant evaluation on Q and UQ types
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
d6fcede60f i965: Return Q and UQ types for int64 and uint64
It seems like maybe this should return a different type based on Gen.  Q
and UQ only exist on Gen8+, but, based on the old comment, I believe
previous Gens can generate 64-bit moves.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
c95380c404 i965: Really don't emit Q or UQ moves on Gen < 8
It's much easier to do this in the generator rather than while coming
out of NIR.  brw_type_for_nir_type doesn't know the Gen, so we'd have to
add a bunch of plumbing.  The alternate fix is to not emit int64 moves
for doubles in the first place... but that seems even more difficult.

This change won't catch non-MOV instructions that try to use 64-bit
integer types on Gen < 8.  This may convert certain kinds of bugs in to
different kinds of bugs that are more difficult to detect (since the
assertions in the function won't catch them).

NOTE: I don't think anything can emit mixed-type 64-bit moves until the
same platform supports both ARB_gpu_shader_fp64 and
ARB_gpu_shader_int64.  When we enable int64 on Gen < 8, we can solve
this problem other ways.

This prevents regressions on HSW in the next patch.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
30164d501d nir: Add support for 64-bit integer types to split_var_copies_block
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
3c9b35372b nir: Enable 64-bit integer support for almost all unary and binary operations
v2: Don't up-convert the shift count parameter if shift instructions.
Suggested by Connor.  Add type_is_singed() function.  This will make
adding 8- and 16-bit types easier.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
2017-01-20 15:41:23 -08:00
Ian Romanick
fda33e09d8 nir: Shift count for shift opcodes is always 32-bits
Previously both sources were unsized.  This caused problems when the
thing being shifted was 64-bit but the shift count was 32-bit.  The
expectation in NIR is that all unsized sources (and destination) will
ultimately have the same size.

The changes in nir_opt_algebraic.py are to prevent errors like:

 Failed to parse transformation:
03:12:25   (('extract_i8', 'a', 'b'), ('ishr', ('ishl', 'a', ('imul', ('isub', 3, 'b'), 8)), 24), 'options->lower_extract_byte')
03:12:25 Traceback (most recent call last):
03:12:25   File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 610, in __init__
03:12:25     xform = SearchAndReplace(xform)
03:12:25   File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 495, in __init__
03:12:25     BitSizeValidator(varset).validate(self.search, self.replace)
03:12:25   File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 311, in validate
03:12:25     validate_dst_class = self._validate_bit_class_up(replace)
03:12:25   File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 414, in _validate_bit_class_up
03:12:25     src_class = self._validate_bit_class_up(val.sources[i])
03:12:25   File "/home/jenkins/workspace/Leeroy_2/repos/mesa/src/compiler/nir/nir_algebraic.py", line 420, in _validate_bit_class_up
03:12:25     assert src_class == src_type_bits
03:12:25 AssertionError

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Suggested-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
2017-01-20 15:41:23 -08:00
Ian Romanick
8ad74a2745 nir: Lower packing and unpacking of 64-bit integer types
This change makes me wonder whether double packing should be
reimplemented as int64BitsToDouble(packInt2x32(v)).  I'm a little on the
fence since not all platforms that support fp64 natively support int64.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
3460d05a71 nir: Add 64-bit integer support for conversions and bitcasts
v2 (idr): "cut them down later" => Remove ir_unop_b2u64 and
ir_unop_u642b.  Handle these with extra i2u or u2i casts just like
uint(bool) and bool(uint) conversion is done.

v3 (idr): Make the "from" type in a cast unsized.  This reduces the
number of required cast operations at the expensive slightly more
complex code.  However, this will be a dramatic improvement when other
sized integer types are added.  Suggested by Connor.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
3ca0029a0d nir: Add 64-bit integer constant support
v2: Rebase on 19a541f (nir: Get rid of nir_constant_data)

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com> [v1]
2017-01-20 15:41:23 -08:00
Ian Romanick
48e122244b nir: Add GLSL_TYPE_INT64 and GLSL_TYPE_UINT64 to glsl_get_bit_size
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
81952814a3 glsl: Optimize redundant pack(unpack()) and unpack(pack()) combinations
The lowering passes 64-bit integer operations will generate a lot of
these.

v2: Modify the HANDLE_PACK_UNPACK_INVERSE so that the breaks apply to
the switch instead of the 'do { } while(true)' loop.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
7122d851aa glsl: Add a lowering pass for 64-bit integer modulus
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
695b04f7eb glsl: Add "built-in" functions to do 64%64 => 64 modulus
These functions are directly available in shaders.  A #define is added
to detect the presence.  This allows these functions to be tested using
piglit regardless of whether the driver uses them for lowering.  The
GLSL spec says that functions and macros beginning with __ are reserved
for use by the implementation... hey, that's us!

v2: Use function inlining.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
82c31f3eb9 glsl: Add a lowering pass for 64-bit integer division
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
012f2995c3 glsl: Add "built-in" functions to do 64/64 => 64 division
These functions are directly available in shaders.  A #define is added
to detect the presence.  This allows these functions to be tested using
piglit regardless of whether the driver uses them for lowering.  The
GLSL spec says that functions and macros beginning with __ are reserved
for use by the implementation... hey, that's us!

v2: Use function inlining.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
50d52df278 glsl: Add a lowering pass for 64-bit integer sign()
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
6b03b345eb glsl: Add "built-in" function for 64-bit integer sign()
These functions are directly available in shaders.  A #define is added
to detect the presence.  This allows these functions to be tested using
piglit regardless of whether the driver uses them for lowering.  The
GLSL spec says that functions and macros beginning with __ are reserved
for use by the implementation... hey, that's us!

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
6c3af04363 glsl: Add a lowering pass for 64-bit integer multiplication
v2: Rename lower_64bit.cpp and lower_64bit_test.cpp to lower_int64.
Suggested by Matt.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
330fc2413c glsl: Add "built-in" functions to do 64x64 => 64 multiplication
These functions are directly available in shaders.  A #define is added
to detect the presence.  This allows these functions to be tested using
piglit regardless of whether the driver uses them for lowering.  The
GLSL spec says that functions and macros beginning with __ are reserved
for use by the implementation... hey, that's us!

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
aa38bf1e59 glsl: Move builtin_function related prototypes to a separate file
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
8358e58f25 glsl/standalone: Enable ARB_gpu_shader_int64
v2: Add missing break in GLSL_TYPE_INT64 case.  Notice by Matt.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
8dfea5348c i965: Avoid int64 warnings.
Just add operations to the switch statement here.

v2 (idr): "cut them down later" => Remove ir_unop_b2u64 and
ir_unop_u642b.  Handle these with extra i2u or u2i casts just like
uint(bool) and bool(uint) conversion is done.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
c101cee2ba i965: Avoid int64 induced warnings
Just add types into unsupported or double equivalent spots.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
a53f315ad8 mesa/program: Add unused ir operations.
v2 (idr): "cut them down later" => Remove ir_unop_b2u64 and
ir_unop_u642b.  Handle these with extra i2u or u2i casts just like
uint(bool) and bool(uint) conversion is done.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
f82ced5af3 glsl: Allow GLSL_TYPE_INT64 for ir_unop_abs and ir_unop_sign
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
8e7e1ae036 glsl: Print GLSL_TYPE_UINT64 and GLSL_TYPE_INT64 values
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Ian Romanick
0d14fec345 glsl: Add interaction between ARB_gpu_shader_int64 and ARB_shader_clock
If ARB_gpu_shader_int64 is supported, ARB_shader_clock also adds
clockARB() that returns a uint64_t.  Rather than add new opcodes and
intrinsics for this, just wrap the existing intrinsic with a
packUint2x32.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
bfc4080d38 glsl: Add 64-bit integer functions
These are all the allowed 64-bit functions from ARB_gpu_shader_int64
spec.

v2: restrict int64/double functions better.

v3 (idr): Delete spurious blank lines.  Suggested by Matt.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
050f38ef0b glsl/varying_packing: Add 64-bit integer support
As for the double code, but using the 64-bit integer conversions.

v2 (idr): Remove some spurious u2i() and i2u() operations when packing
and unpacking, respectively, int64_t varyings.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> [v1]
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
923aebdd46 glsl/ast: Add 64-bit integer support in some places.
Just add support in two more places in ast parsing.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
9ba9a7f854 glsl: Add 64-bit integer support to some operations.
This adds 64-bit integer support to some AST and IR operations where
it is needed.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
25c7a61b28 glsl/ir_builder: Add support for some 64-bit bitcasts.
We need builder support to implement some of the builtins.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
78cc44280e glsl/ast: Add 64-bit integer support to conversion functions
This adds support to call the new operations on conversions.

v2 (idr): Delete an unnecessary break-statement.  Noticed by Matt.  Add
a missing blank line.  Noticed by Ian.

v3 (idr): "cut them down later" => Remove ir_unop_b2u64 and
ir_unop_u642b.  Handle these with extra i2u or u2i casts just like
uint(bool) and bool(uint) conversion is done.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> [v1]
Reviewed-by: Matt Turner <mattst88@gmail.com> [v2]
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
85faf5082f glsl: Add 64-bit integer support for constant expressions
This just adds the new operations and add 64-bit integer support to all
the existing cases where it is needed.

v2: fix some issues found in testing.
v2.1: add unreachable (Ian), add missing int/uint pack/unpack (Dave).

v3 (idr): Rebase on top of idr's series to generate
ir_expression_operation_constant.h. In addition, this version:

    Adds missing support for ir_unop_bit_not, ir_binop_all_equal,
    ir_binop_any_nequal, ir_binop_vector_extract,
    ir_triop_vector_insert, and ir_quadop_vector.

    Removes support for uint64_t from ir_unop_abs and ir_unop_sign.

v4 (idr): "cut them down later" => Remove ir_unop_b2u64 and
ir_unop_u642b.  Handle these with extra i2u or u2i casts just like
uint(bool) and bool(uint) conversion is done.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> [v2]
Reviewed-by: Matt Turner <mattst88@gmail.com> [v3]
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
a68b6ee063 glsl/ir: Add support for 64-bit integer conversions.
This adds all the conversions in the world, I'm not 100% sure of all of
these are needed, but add all of them and we can cut them down later.

v2: fix issue with packing output types.

v3 (idr): Rebase on top of idr's series to generate
ir_expression_operation_constant.h.  Fix transposed ir_validate
assertions for ir_unop_u642i64 and ir_unop_i642u64.  Add missing
automatic type setup for ir_unop_u642i64 and ir_unop_i642u64.

v4 (idr): "cut them down later" => Remove ir_unop_b2u64 and
ir_unop_u642b.  Handle these with extra i2u or u2i casts just like
uint(bool) and bool(uint) conversion is done.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> [v2]
Reviewed-by: Matt Turner <mattst88@gmail.com> [v3]
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
7dd63c10c3 glsl: Add 64-bit integer support to uniform initialiser code
Just add support to the double case, same code should work.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
8df5287c23 glsl/varyings: Add 64-bit integer support.
This adds 64-bit ints to the link_varyings 64-bit support.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
bbce1c538d glsl/ast/ir: Add 64-bit integer constant support
This adds support for 64-bit integer constants to the parser,
ast and ir.

v2: fix a few issues found in testing.

v3: Add missing ir_constant copy contructor support.

v4: Use PRIu64 and PRId64 in printfs in glsl_parser_extras.cpp.
Suggested by Nicolai.  Rebase on Marek's linalloc changes.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> [v2]
Reviewed-by: Matt Turner <mattst88@gmail.com> [v3]
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
249007d13c mesa: Add support for 64-bit integer uniforms
This hooks up the API to the internals for 64-bit integer uniforms.

v2: update to use non-strict aliased alternatives

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
8ce53d4a2f glsl: Add basic ARB_gpu_shader_int64 types
This adds the builtins and the lexer support.

To avoid too many warnings, it adds basic support to the type in a few
other places in mesa, mostly in the trivial places.

It also adds a query to be used later for if a type is an integer 32 or 64.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
e90830bb8e glsl: Add ARB_gpu_shader_int64 boilerplate.
This just adds the basic boilerplate support.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
839ce21143 mesa: Add ARB_gpu_shader_int64 extension bits
This just adds the usual boilerplate in mesa core.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Dave Airlie
150f2fa789 mapi: Add support for ARB_gpu_shader_int64.
Just add the boilerplate xml code.

v2 (idr): Update dispatch_sanity.  Only add extension functions in core
profile.

v3 (idr): Remove comment line from gl_API.xml.  Suggested by Matt.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> [v1]
Reviewed-by: Matt Turner <mattst88@gmail.com>
2017-01-20 15:41:23 -08:00
Lionel Landwerlin
74c23bde5b anv: don't require render target isl bit for depth/stencil surfaces
Blorp can deal with depth/stencil surfaces blits/copies without the
render target requirement. Also having both render target and
depth/stencil requirement is incompatible from isl's point of view.

This fixes an image creation issue in the high level quality settings
of the Unity3D player, which requires a depth texture with src/dst
transfer & 4x multisampling.

v2: Simply aspect checking condition (Jason)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: 13.0 17.0 <mesa-stable@lists.freedesktop.org>
2017-01-20 21:39:51 +00:00
Lionel Landwerlin
8a28e764d0 spirv: don't assert with location decorations on non i/o variables
Some applications might add location decoration to samplers. Rather
than raising an error it seems it would make more sense to just
discard these decorations.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: 17.0 <mesa-stable@lists.freedesktop.org>
2017-01-20 21:39:46 +00:00
Matt Turner
f57bdd4849 i965: Validate "Special Cases for Byte Operations"
Do this in general_restrictions_based_on_operand_types() because the two
rules that "Special Cases for Byte Operations" relax are checked there.
2017-01-20 11:40:52 -08:00
Matt Turner
75b7f5a269 i965: Validate "Region Alignment Rules" 2017-01-20 11:40:52 -08:00
Matt Turner
f817d132c1 i965: Validate "General Restrictions Based on Operand Types" 2017-01-20 11:40:52 -08:00
Matt Turner
83696b2234 i965: Validate "General Restrictions on Regioning Parameters" 2017-01-20 11:40:52 -08:00
Matt Turner
df0b7bcdfd i965: Replace reg_type_size[] with a function.
A function is necessary to handle immediate types.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
ada891d472 i965: Validate math instruction sources.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
fce0612fc2 i965: Claim that SEND/math has two sources.
src1 must be a descriptor (including the information to determine that
the SEND is doing an extended math operation), but src0 can actually be
null since it serves as the source of the implicit GRF -> MRF move.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
c9724682b5 i965: Simplify num_sources_from_inst().
desc will always be non-NULL, because brw_validate_instructions() does
not attempt to validate any instructions that fail the
is_unsupported_inst() check.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
9fd12666d0 i965: Factor out send_restrictions() function.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
7abc65dd7c i965: Factor out sources_not_null() validation function.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
a693305b61 i965: Structure code so unsupported inst will not generate more errors.
We want to rely on brw_opcode_desc() always returning non-NULL in other
validation functions. Other validation functions will be in the else
case of the block added in this patch.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
f0429359cc i965: Add a test for the EU assembly validator.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
ae9c69e1cf i965: Add a CHECK macro to call more complicated validation funcs.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
25448e4b7e i965: Make ERROR_IF usable from other functions.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
f9a4fc9b15 i965: Mark error annotation on correct SIMD16 inst.
inst, whose assignment can be seen in the last line of context pointed
to the correct instruction in the SIMD16 program, but src_offset was the
offset from the beginning of the SIMD16 program.

So if an instruction at offset 0x100 in the SIMD16 program was illegal,
we would mark an error on the instruction at offset 0x100 (which is
likely in the SIMD8 program).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
59003f3447 i965/vec4: Use UW-typed operands when dest is UW.
Using a UD-typed operand makes the execution size D, and if the size of
the execution type is greater than the size of the destination type, the
destination must be appropriately strided.

We actually just want UW-types all around.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
68bcbfa9e4 i965: Use W-typed immediate in brw_F32TO16().
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
3eada948a0 gtest: Update to 1.8.0.
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-20 11:40:52 -08:00
Matt Turner
cbc39e541f i965: Don't change F->VF if dest type is DF.
We change the immediate source type to VF to allow instruction
compaction, but there are no entires in the compaction table for DF, so
there's no point in doing this.

Additionally, I mixing floating-point types is now allowed except for
F and VF.
2017-01-20 11:40:52 -08:00
Lionel Landwerlin
a72dea9483 anv: fix comment typo
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-20 16:46:32 +00:00
Lionel Landwerlin
0c3d058723 spirv: fix warn string typo
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-20 16:46:29 +00:00
Lionel Landwerlin
bac6fe5c77 blorp: remove unnecessary struct declaration
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-20 16:46:21 +00:00
Marek Olšák
74f40d1570 Revert "radeonsi: reject invalid vertex element formats"
This reverts commit 9e4d1d8a7c.

It broke arb_vertex_type_10f_11f_11f_rev-draw-vertices, which has
first_non_void == -1.
2017-01-20 16:02:45 +01:00
Philipp Zabel
a37cf630b4 gallium: add pipe_screen::resource_changed callback wrappers
Add resource_changed to the ddebug, rbug, and trace wrappers. Since it
is optional, there is no need to add it to noop.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Suggested-by: Nicolai Hähnle <nhaehnle@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
2017-01-20 15:30:30 +01:00
Philipp Zabel
97de7e6586 st/mesa: ask pipe driver to recreate derived internal resources when (re-)binding external textures
Use the resource_changed callback to invalidate internal resources
derived from external textures when they are (re-)bound. This is needed
to comply with the requirement from the GL_OES_EGL_image_external
extension that a call to glBindTexture guarantees that all further
sampling will return values that correspond to the values in the
external texture at or after the time that glBindTexture was called.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
2017-01-20 15:30:30 +01:00
Philipp Zabel
9bab714c61 mesa: update external textures when (re-)binding
To comply with the requirement from the GL_OES_EGL_image_external
extension that a call to glBindTexture guarantees that all further
sampling will return values that correspond to the values in the
external texture at or after the time that glBindTexture was called,
do not bail out early from mesa_BindTextures if the target is
external.
This will later allow the state tracker to instruct the pipe driver
to invalidate internal resources derived from the external texture.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
2017-01-20 15:30:30 +01:00
Philipp Zabel
c70ed79e79 etnaviv: implement resource_changed to invalidate internal resources derived from imported buffers
Implement the resource_changed pipe callback to invalidate internal
resources derived from imported buffers. This is needed to update the
texture for re-imported renderables.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
2017-01-20 15:30:30 +01:00
Philipp Zabel
362edc868c etnaviv: initialize seqno of imported resources
Imported resources already have contents that we want to be copied to
texture resources derived from them. Set initial seqno of imported
resources to 1, just as if it had already been rendered to.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
2017-01-20 15:30:29 +01:00
Philipp Zabel
2c95d6dac3 st/dri: ask the driver to update its internal copies on reimport
For imported buffers that can't be used directly as a source to the
texture samplers, the pipe driver might need to create an internal
copy, for example in a different tiling layout. When buffers are
reimported they may contain new image data, so the driver internal
copies need to be recreated.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
2017-01-20 15:30:29 +01:00
Philipp Zabel
30853f55a3 gallium: add pipe_screen::resource_changed
Add a hook to tell drivers that an imported resource may have changed
and they need to update their internal derived resources.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
2017-01-20 15:30:29 +01:00
Emil Velikov
5872850b88 configure.ac: move require_dri_shared_libs_and_glapi() before its users
Otherwise we'll get a lovely message as below:
"require_dri_shared_libs_and_glapi: command not found"

Cc: Steven Newbury <steve@snewbury.org.uk>
Reported-by: Steven Newbury <steve@snewbury.org.uk>
Fixes: da410e6afa "configure: explicitly require shared glapi for
enable-dri"
Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Tested-by: Steven Newbury <steve@snewbury.org.uk>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-20 14:27:08 +00:00
Samuel Pitoiset
383fc8e9f3 gallium/hud: add missing break in hud_cpufreq_graph_install()
Fixes: e99b9395be "gallium/hud: Add support for CPU frequency monitoring"
Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
2017-01-20 10:33:47 +01:00
Tapani Pälli
4148881513 android: correct typo in build
Fixes: 63c58dfc65
Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
2017-01-20 07:49:10 +02:00
Elie TOURNIER
9fdaeb7776 nir: add min/max optimisation
Add the following optimisations:

min(x, -x) = -abs(x)
min(x, -abs(x)) = -abs(x)
min(x, abs(x)) = x
max(x, -abs(x)) = x
max(x, abs(x)) = abs(x)
max(x, -x) = abs(x)

shader-db:

total instructions in shared programs: 13067779 -> 13067775 (-0.00%)
instructions in affected programs: 249 -> 245 (-1.61%)
helped: 4
HURT: 0

total cycles in shared programs: 252054838 -> 252054806 (-0.00%)
cycles in affected programs: 504 -> 472 (-6.35%)
helped: 2
HURT: 0

Signed-off-by: Elie Tournier <tournier.elie@gmail.com>
Reviewed-by: Plamena Manolova <plamena.manolova@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-19 21:44:28 -08:00
Jason Ekstrand
f22ee14644 nir/algebraic: Only include nir_search_helpers once
We were including it once per value, so probably around 10k times.
Let's not cause the compiler any more work than we have to.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
2017-01-19 21:40:30 -08:00
Anuj Phogat
6de293284b i965: Remove unnecessary mt->compressed checks
It's harmless to use ALIGN_NPOT() for uncompressed formats
because they have block width/height = 1.

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
2017-01-19 14:28:18 -08:00
Anuj Phogat
c7e37a0cb8 i965: Fix indentation in brw_miptree_layout_2d()
Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
2017-01-19 14:28:18 -08:00
Anuj Phogat
47d9b3a9dd i965: Fix comment to include 3d textures
Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
2017-01-19 14:28:18 -08:00
Chad Versace
de0b0a3a9c i965: Delete pending CCS and HiZ ops in intel_miptree_make_shareable()
Fixes crash in piglit
`egl_khr_gl_renderbuffer_image-clear-shared-image GL_DEPTH_COMPONENT24`
on Skylake.

The crash happened because blorp attempted to execute a pending hiz
clear after the hiz buffer was deleted. Deleting the pending hiz ops
when the hiz buffer gets deleted fixes the crash.

For good measure, this patch also deletes all pending CCS/MCS ops when
the CCS/MCS buffer gets deleted. I'm now aware of any bugs
caused by the dangling ops, but deleting them is clearly the right thing
to do.

Cc: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99265
2017-01-19 13:47:57 -08:00
Andres Rodriguez
e0674e740b vulkan/wsi: clarify the severity of lack of DRI3 v2
The current message sounds like a small warning, clarify that it can
result in lack of presentation support and application crashes.

v2: add "if they do" (Bas)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98263
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Acked-by: Jason ekstrand <jason@jlekstrand.net>
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
2017-01-19 15:41:42 +00:00
Andres Rodriguez
a3ad6a34c6 radv: fix include order for installed headers v2
In situations where libdrm_amdgpu and mesa are installed to the same
location, the mesa installed headers will take precedence over the git
source headers.

This is due to the AMDGPU_CFLAGS containing the install directory.

This situation can cause build errors if the git version of a header is
newer than the currently installed version of a header (e.g. git pull
updates vulkan.h)

Note: using the same install prefix for mesa and libdrm is probably a
common occurrence since it is described in the radeonBuildHowTo wiki:
https://www.x.org/wiki/radeonBuildHowTo/

v2: added sign-off

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
2017-01-19 15:41:38 +00:00
Emil Velikov
0f8afde7ba docs/releasing: document post branch version bump
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
2017-01-19 15:38:30 +00:00
Emil Velikov
49e4204b12 mesa: Bump version to 17.1.0-devel
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
2017-01-19 15:38:30 +00:00
Marek Olšák
9e4d1d8a7c radeonsi: reject invalid vertex element formats
This should fix a coverity defect.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
2017-01-19 16:38:37 +01:00
Marek Olšák
e490b7812c radeonsi: don't forget to add HTILE to the buffer list for texturing
This fixes VM faults. Discovered by Samuel Pitoiset.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98975
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99450

Cc: 17.0 13.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
2017-01-19 16:38:37 +01:00
Nayan Deshmukh
31908d6a4a st/vdpau: only send buffers with B8G8R8A8 format to X
PresentPixmap only works if the pixmap depth matches with the
window depth, otherwise it returns a BadMatch protocol error.
Even if the depths match, the result won't look correctly
if the VDPAU RGB component order doesn't match the X11 one so
we only allow the X11 format.
For other buffers we copy them to a buffer which is send to X.

v2: only send buffers with format VDP_RGBA_FORMAT_B8G8R8A8
v3: reword commit message
v4: add comment explaining the code

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
2017-01-19 15:34:02 +01:00
Nicolai Hähnle
3cd092c415 radeonsi: fix texture gather on stencil textures
At least on VI, texture gather doesn't work with a 24_8 data format, so
use 8_8_8_8 and a modified swizzle instead.

A bit of background: When creating a GL_STENCIL_INDEX8 texture, we select
the X24S8 pipe format because we don't support stencil-only render targets
properly. With mip-mapping this can lead to a setup where the tiling is
incompatible with stencil texturing, and a flushed stencil texture is
used. For the flushed stencil, a literal X24S8 is used because there were
issues with an 8bpp DB->CB copy.

Longer term, it would be good if we could get away from these workarounds,
i.e. properly support an S8 format for stencil-only rendering and flushed
stencil. Since stencil texturing is somewhat rare, it's not a high
priority.

Fixes GL45-CTS.texture_cube_map_array.sampling.

Cc: 17.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Acked-by: Edward O'Callaghan <funfunctor@folklore1984.net>
2017-01-19 15:02:57 +01:00
Alejandro Piñeiro
905961452a mesa/main: Fix FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for NONE attachment type
When the attachment type is NONE (att->Type),
FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE should be NONE always.

Note that technically, the current behaviour follows the spec. From
OpenGL 4.5 spec, Section 9.2.3 "Framebuffer Object Queries":

   "If the value of FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is NONE, then
    either no framebuffer is bound to target; or the default
    framebuffer is bound, attachment is DEPTH or STENCIL, and the
    number of depth or stencil bits, respectively, is zero."

Reading literally this paragraph, for the default framebuffer, NONE
should be only returned if attachment is DEPTH and STENCIL without
being allocated.

But it doesn't makes too much sense to return DEFAULT_FRAMEBUFFER if
the attachment type is NONE. For example, this can happens if the
attachment is FRONT_RIGHT run on monoscopic mode, as that attachment
is only available on stereo mode.

With the current behaviour, defensive querying of the object type
would not work properly. So you could query the object type checking
for NONE, get DEFAULT_FRAMEBUFFER, and then get and INVALID_OPERATION
when requesting other pnames (like RED_SIZE), as the real attachment
type is NONE.

This fixes:
GL45-CTS.direct_state_access.framebuffers_get_attachment_parameters

v2: don't change the behaviour for att->Type != GL_NONE, as caused
    some ES CTS regressions
v3: simplify condition (Iago)

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
2017-01-19 11:55:41 -02:00
Zachary Michaels
d7d32b3bfe radeonsi: Always leave poly_offset in a valid state
This commit makes si_update_poly_offset set poly_offset to NULL if
uses_poly_offset is false. This way poly_offset either points into the
currently queued rasterizer, or it is NULL.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99451
Cc: "13.0 17.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2017-01-19 10:50:16 +01:00
Nicolai Hähnle
a7c635ec65 mesa/main: fix meta caller of _mesa_ClampColor
Since _mesa_ClampColor properly checks for support of the API function
now, it's meta callers need to check support as well.

Fixes: 963311b71f ("mesa/main: fix version/extension checks in _mesa_ClampColor")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99401
Tested-by: Mark Janes <mark.a.janes@intel.com>
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
2017-01-19 09:13:25 +01:00
Timothy Arceri
4d65f68a9b mesa/glsl: move TransformFeedbackBufferStride to gl_shader
Here we remove the single use of this field in gl_linked_shader
which allows us to move the field out of gl_shader_info

While we are at it we rewrite link_xfb_stride_layout_qualifiers()
to be more clear.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 17:05:26 +11:00
Timothy Arceri
e603cf1841 glsl: exit loop early if we find xfb layout qualifers
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 17:05:26 +11:00
Timothy Arceri
7983ed5f65 glsl: set InnerCoverage directly in gl_program
Also move out of the shared gl_shader_info.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 17:05:26 +11:00
Timothy Arceri
1f141eaef6 glsl: tidy up PostDepthCoverage shader field
There is no reason for this to be in the shared gl_shader_info or
to copy it to gl_program at the end of linking (its already there).

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 17:05:26 +11:00
Timothy Arceri
3d41f4b990 mesa/glsl: move pixel_center_integer to gl_shader
This is only used by gl_linked_shader as a temp during linking
so use a temp there instead.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 17:05:26 +11:00
Timothy Arceri
0a9d102ddc mesa/glsl: move origin_upper_left to gl_shader
This is only used by gl_linked_shader as a temp during linking
so use a temp there instead.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 17:05:26 +11:00
Timothy Arceri
ceeedb9bb0 mesa/glsl: move uses_gl_fragcoord to gl_shader
This is only used by gl_linked_shader as a temp during linking
so use a temp there instead.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 17:05:26 +11:00
Timothy Arceri
66a6050ad8 mesa/glsl: move redeclares_gl_fragcoord to gl_shader
This is never used in gl_linked_shader other than as a temp
during linking so just use a temp instead.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 17:05:26 +11:00
Timothy Arceri
cc7ecce253 mesa/glsl: move ARB_fragment_coord_conventions_enable field
This is only used by gl_shader not gl_linked_shader so move it
there.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 17:05:26 +11:00
Timothy Arceri
ae28c5a60c st/mesa/glsl: set early_fragment_tests directly in shader_info
We also move EarlyFragmentTests out of the gl_shader_info struct
as it is now only used by gl_shader.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 17:05:26 +11:00
Timothy Arceri
5c93d27423 mesa/glsl/i965: set and use tcs vertices_out directly
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 17:05:26 +11:00
Timothy Arceri
4cd709e2bc i965: get outputs_written from gl_program
There is no need to go via the pointer in nir_shader. This change
is required for the shader cache as we don't create a nir_shader.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 17:05:26 +11:00
Dave Airlie
ef71b867ee gallivm: use #ifdef not #if for PIPE_ARCH_BIG_ENDIAN
This fixes the build on ppc/s390.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Cc: "17.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2017-01-19 16:00:53 +10:00
Timothy Arceri
3fe8d04a6d mesa: don't always set _NEW_PROGRAM when linking
We only need to set it when linking was successful and the program
being linked is currently active.

The programs_in_use mask is just used as a flag for now but in
a future change we will use it to update the CurrentProgram array.

V2: make sure to flush vertices before linking (suggested by Marek)

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
2017-01-19 15:55:02 +11:00
Timothy Arceri
aad93402c0 mesa: change init subroutine defaults helper to work per gl_program
A later patch will result in SSO programs calling this helper
per gl_program rather than per gl_shader_program.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 15:55:02 +11:00
Timothy Arceri
90d950038f mesa/glsl: move ProgramResourceList to gl_shader_program_data
We also move NumProgramResourceList at the same time.

GLES does interface validation on SSO at runtime so we need to move
this to be able to switch to storing gl_program pointers in
CurrentProgram.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2017-01-19 15:55:02 +11:00
Timothy Arceri
62f718bfcb glsl: store number of explicit uniform loactions in gl_shader_program
This allows us to cleanup the functions that pass this count around,
but more importantly we will be able to call the uniform linking
functions from that backends linker without having to pass this
information to the backend directly via Driver.LinkShader().

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-19 15:55:02 +11:00
Timothy Arceri
c054bbf0d4 glsl: create a new link_and_validate_uniforms() helper
Currently this just breaks up the linking code a bit but in the
future i965 will call this from the backend via Driver.LinkShader()
so that we can do NIR optimisations before assigning uniform
locations.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-19 15:55:02 +11:00
Timothy Arceri
ce4fb3c8a1 glsl: make a bunch of varying linking functions static
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-19 15:55:02 +11:00
Timothy Arceri
90fffd1770 glsl: move more varying linking code to link_varyings.cpp
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-19 15:55:02 +11:00
Topi Pohjolainen
180653c357 i965/blorp: Make post draw flush more explicit
Blits do not need any special treatment as the target buffer
object is added to render cache just as one does for normal draw.
Color clears and resolves in turn require explicit "end of pipe
synchronization". It is not clear what this means exactly but the
assumption is that render cache flush with command stream stall
should be sufficient.

Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-18 22:42:47 +02:00
Topi Pohjolainen
46b346899d i965/gen6: Issue direct depth stall and flush after depth clear
instead of calling unconditionally brw_emit_mi_flush() which
does:

   brw_emit_pipe_control_flush(brw,
                                PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                                PIPE_CONTROL_RENDER_TARGET_FLUSH |
                                PIPE_CONTROL_CS_STALL);

   brw_emit_pipe_control_flush(brw,
                                PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
                                PIPE_CONTROL_CONST_CACHE_INVALIDATE);

Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-18 22:42:47 +02:00
Topi Pohjolainen
e6da6943fe i965: Make depth clear flushing more explicit
Current blorp logic issues unconditional "flush everything"
(see brw_emit_mi_flush()) after each render. For example, all
blits issue this unconditionally which shouldn't be needed if
they set render cache properly so that subsequent renders do
necessary flushing before drawing.

In case of piglit:

ext_framebuffer_multisample-accuracy all_samples depth_draw small

intel_hiz_exec() is always preceded by blorb blit and the
unconditional flush looks to hide the lack of stall and flushes
in depth clears. By removing the brw_emit_mi_flush() I get gpu
hangs.

This patch adds the stalls and flushes mandated by the spec
and gets rid of those hangs.

v2 (Jason, Ken): Document the rational for separating
                 depth cache flush and stall on Gen7.

Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-18 22:42:47 +02:00
Topi Pohjolainen
4840a53e90 i965/blorp: Use the render cache mechanism instead of explicit flushing
by replacing brw_emit_mi_flush() with brw_render_cache_set_check_flush().
The latter splits the flush in two:

   brw_emit_pipe_control_flush(brw,
                               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                               PIPE_CONTROL_RENDER_TARGET_FLUSH |
                               PIPE_CONTROL_CS_STALL);

   brw_emit_pipe_control_flush(brw,
                               PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
                               PIPE_CONTROL_CONST_CACHE_INVALIDATE);

instead of

   int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
   if (brw->gen >= 6) {
      flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
               PIPE_CONTROL_CONST_CACHE_INVALIDATE |
               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
               PIPE_CONTROL_VF_CACHE_INVALIDATE |
               PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
               PIPE_CONTROL_CS_STALL;
   }
   brw_emit_pipe_control_flush(brw, flags);

v2 (Jason): Check that destination exists before trying to add to
            render cache. Depth clears and resolves don't have it.

Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2017-01-18 22:42:47 +02:00
Emil Velikov
ea8b2624c8 utils: really remove the __END_DECLS macro
Fixes: d1efa09d34 "util: import sha1 implementation from OpenBSD"
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
2017-01-18 20:09:57 +00:00
Emil Velikov
9f8dc3bf03 utils: build sha1/disk cache only with Android/Autoconf
Earlier commit imported a SHA1 implementation and relaxed the SHA1 and
disk cache handling, broking the Windows builds.

Restrict things for now until we get to a proper fix.

Fixes: d1efa09d34 "util: import sha1 implementation from OpenBSD"
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
2017-01-18 20:09:01 +00:00
463 changed files with 14398 additions and 7762 deletions

View File

@@ -1,11 +1,26 @@
language: c
sudo: false
sudo: true
dist: trusty
cache:
apt: true
ccache: true
directories:
- $HOME/.ccache
addons:
apt:
packages:
- libdrm-dev
- x11proto-xf86vidmode-dev
- libexpat1-dev
- libxcb-dri2-0-dev
- libx11-xcb-dev
- llvm-3.5-dev
# llvm-config is not in the dev package?
- llvm-3.5
# LLVM packaging is broken and misses this dep.
- libedit-dev
- scons
env:
global:
@@ -21,266 +36,18 @@ env:
- XCBPROTO_VERSION=xcb-proto-1.11
- LIBXCB_VERSION=libxcb-1.11
- LIBXSHMFENCE_VERSION=libxshmfence-1.2
- LIBTXC_DXTN_VERSION=libtxc_dxtn-1.0.1
- LIBVDPAU_VERSION=libvdpau-1.1
- LIBVA_VERSION=libva-1.6.2
- LIBWAYLAND_VERSION=wayland-1.11.1
- PKG_CONFIG_PATH=$HOME/prefix/lib/pkgconfig
- LD_LIBRARY_PATH="$HOME/prefix/lib:$LD_LIBRARY_PATH"
matrix:
include:
- env:
- LABEL="make loaders/classic DRI"
- BUILD=make
- MAKEFLAGS="-j4"
- MAKE_CHECK_COMMAND="make check"
- DRI_LOADERS="--enable-glx --enable-gbm --enable-egl --with-egl-platforms=x11,drm,surfaceless,wayland --enable-osmesa"
- DRI_DRIVERS="i915,i965,radeon,r200,swrast,nouveau"
- GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
- GALLIUM_DRIVERS=""
- VULKAN_DRIVERS=""
addons:
apt:
packages:
- xz-utils
- x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- env:
# NOTE: Building SWR is 2x (yes two) times slower than all the other
# gallium drivers combined.
# Start this early so that it doesn't hunder the run time.
- LABEL="make Gallium Drivers SWR"
- BUILD=make
- MAKEFLAGS="-j4"
- MAKE_CHECK_COMMAND="true"
- LLVM_VERSION=3.9
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
- OVERRIDE_CC="gcc-5"
- OVERRIDE_CXX="g++-5"
- DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
- DRI_DRIVERS=""
- GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
- GALLIUM_DRIVERS="swr"
- VULKAN_DRIVERS=""
addons:
apt:
sources:
- ubuntu-toolchain-r-test
- llvm-toolchain-trusty-3.9
packages:
# LLVM packaging is broken and misses these dependencies
- libedit-dev
# From sources above
- g++-5
- llvm-3.9-dev
# Common
- xz-utils
- x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
- env:
- LABEL="make Gallium Drivers Other"
- BUILD=make
- MAKEFLAGS="-j4"
- MAKE_CHECK_COMMAND="true"
- LLVM_VERSION=3.9
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
- DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
- DRI_DRIVERS=""
- GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
- GALLIUM_DRIVERS="i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx"
- VULKAN_DRIVERS=""
addons:
apt:
sources:
- llvm-toolchain-trusty-3.9
packages:
# LLVM packaging is broken and misses these dependencies
- libedit-dev
# From sources above
- llvm-3.9-dev
# Common
- xz-utils
- x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
- env:
# NOTE: Analogous to SWR above, building Clover is quite slow.
- LABEL="make Gallium ST Clover"
- BUILD=make
- MAKEFLAGS="-j4"
- MAKE_CHECK_COMMAND="true"
- LLVM_VERSION=3.6
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
- OVERRIDE_CC=gcc-4.7
- OVERRIDE_CXX=g++-4.7
- DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
- DRI_DRIVERS=""
- GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
# i915 most likely doesn't work with OpenCL.
# Regardless - we're doing a quick build test here.
- GALLIUM_DRIVERS="i915"
- VULKAN_DRIVERS=""
addons:
apt:
sources:
- llvm-toolchain-trusty-3.6
packages:
- libclc-dev
# LLVM packaging is broken and misses these dependencies
- libedit-dev
- g++-4.7
# From sources above
- llvm-3.6-dev
- clang-3.6
- libclang-3.6-dev
# Common
- xz-utils
- x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
- env:
- LABEL="make Gallium ST Other"
- BUILD=make
- MAKEFLAGS="-j4"
- MAKE_CHECK_COMMAND="true"
- DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
- DRI_DRIVERS=""
- GALLIUM_ST="--enable-dri --disable-opencl --enable-xa --enable-nine --enable-xvmc --enable-vdpau --enable-va --enable-omx --enable-gallium-osmesa"
# We need swrast for osmesa and nine.
# i915 most likely doesn't work with most ST.
# Regardless - we're doing a quick build test here.
- GALLIUM_DRIVERS="i915,swrast"
- VULKAN_DRIVERS=""
addons:
apt:
packages:
# Nine requires gcc 4.6... which is the one we have right ?
- libxvmc-dev
# Build locally, for now.
#- libvdpau-dev
#- libva-dev
- libomxil-bellagio-dev
# LLVM packaging is broken and misses these dependencies
- libedit-dev
# Common
- xz-utils
- x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
- env:
- LABEL="make Vulkan"
- BUILD=make
- MAKEFLAGS="-j4"
- MAKE_CHECK_COMMAND="make -C src/gtest check && make -C src/intel check"
- LLVM_VERSION=3.9
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
# XXX: we want to test the WSI, but those are enabled via the EGL toggles
# XXX: Platform X11 dependencies are checked when --enable-glx is set
- DRI_LOADERS="--enable-glx --disable-gbm --enable-egl --with-egl-platforms=x11,wayland"
# i965 is needed for Intel's Vulkan driver (anv)
- DRI_DRIVERS="i965"
# XXX: enable DRI for EGL above
- GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
- GALLIUM_DRIVERS=""
- VULKAN_DRIVERS="intel,radeon"
addons:
apt:
sources:
- llvm-toolchain-trusty-3.9
packages:
# LLVM packaging is broken and misses these dependencies
- libedit-dev
# From sources above
- llvm-3.9-dev
# Common
- xz-utils
- x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
- env:
- LABEL="scons"
- BUILD=scons
- SCONSFLAGS="-j4"
# Explicitly disable.
- SCONS_TARGET="llvm=0"
# Keep it symmetrical to the make build.
- SCONS_CHECK_COMMAND="scons llvm=0 check"
addons:
apt:
packages:
- scons
# Common
- xz-utils
- x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
- env:
- LABEL="scons LLVM"
- BUILD=scons
- SCONSFLAGS="-j4"
- SCONS_TARGET="llvm=1"
# Keep it symmetrical to the make build.
- SCONS_CHECK_COMMAND="scons llvm=1 check"
- LLVM_VERSION=3.3
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
addons:
apt:
packages:
- scons
# LLVM packaging is broken and misses these dependencies
- libedit-dev
- llvm-3.3-dev
# Common
- xz-utils
- x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
- env:
- LABEL="scons SWR"
- BUILD=scons
- SCONSFLAGS="-j4"
- SCONS_TARGET="swr=1"
- LLVM_VERSION=3.9
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
# Keep it symmetrical to the make build. There's no actual SWR, yet.
- SCONS_CHECK_COMMAND="true"
- OVERRIDE_CC="gcc-5"
- OVERRIDE_CXX="g++-5"
addons:
apt:
sources:
- ubuntu-toolchain-r-test
- llvm-toolchain-trusty-3.9
packages:
- scons
# LLVM packaging is broken and misses these dependencies
- libedit-dev
# From sources above
- g++-5
- llvm-3.9-dev
# Common
- xz-utils
- x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libelf-dev
matrix:
- BUILD=make
- BUILD=scons
install:
- export PATH="/usr/lib/ccache:$PATH"
- pip install --user mako
# Since libdrm gets updated in configure.ac regularly, try to pick up the
# latest version from there.
- for line in `grep "^LIBDRM.*_REQUIRED=" configure.ac`; do
- for line in `grep "^LIBDRM_.*_REQUIRED=" configure.ac`; do
old_ver=`echo $LIBDRM_VERSION | sed 's/libdrm-//'`;
new_ver=`echo $line | sed 's/.*REQUIRED=//'`;
if `echo "$old_ver,$new_ver" | tr ',' '\n' | sort -Vc 2> /dev/null`; then
@@ -325,70 +92,21 @@ install:
- wget http://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
- tar -jxvf $LIBDRM_VERSION.tar.bz2
- (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 --enable-freedreno --enable-etnaviv-experimental-api && make install)
- (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 --enable-etnaviv-experimental-api && make install)
- wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
- tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
- (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)
# libtxc-dxtn uses the patented S3 Texture Compression
# algorithm. Therefore, we don't want to use this library but it is
# still possible through setting the USE_TXC_DXTN variable to yes in
# the travis web UI.
#
# According to Wikipedia, the patent expires on October 2, 2017:
# https://en.wikipedia.org/wiki/S3_Texture_Compression#Patent
- if test "x$USE_TXC_DXTN" = xyes; then
wget https://people.freedesktop.org/~cbrill/libtxc_dxtn/$LIBTXC_DXTN_VERSION.tar.bz2;
tar -jxvf $LIBTXC_DXTN_VERSION.tar.bz2;
(cd $LIBTXC_DXTN_VERSION && ./configure --prefix=$HOME/prefix && make install);
fi
- wget http://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2
- tar -jxvf $LIBVDPAU_VERSION.tar.bz2
- (cd $LIBVDPAU_VERSION && ./configure --prefix=$HOME/prefix && make install)
- wget http://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2
- tar -jxvf $LIBVA_VERSION.tar.bz2
- (cd $LIBVA_VERSION && ./configure --prefix=$HOME/prefix --disable-wayland --disable-dummy-driver && make install)
- wget http://wayland.freedesktop.org/releases/$LIBWAYLAND_VERSION.tar.xz
- tar -axvf $LIBWAYLAND_VERSION.tar.xz
- (cd $LIBWAYLAND_VERSION && ./configure --prefix=$HOME/prefix --enable-libraries --without-host-scanner --disable-documentation --disable-dtd-validation && make install)
# Generate the header since one is missing on the Travis instance
- mkdir -p linux
- printf "%s\n" \
"#ifndef _LINUX_MEMFD_H" \
"#define _LINUX_MEMFD_H" \
"" \
"#define __NR_memfd_create 319" \
"#define SYS_memfd_create __NR_memfd_create" \
"" \
"#define MFD_CLOEXEC 0x0001U" \
"#define MFD_ALLOW_SEALING 0x0002U" \
"" \
"#endif /* _LINUX_MEMFD_H */" > linux/memfd.h
script:
- if test "x$BUILD" = xmake; then
test -n "$OVERRIDE_CC" && export CC="$OVERRIDE_CC";
test -n "$OVERRIDE_CXX" && export CXX="$OVERRIDE_CXX";
export CC="$CC -isystem`pwd`";
./autogen.sh --enable-debug
$DRI_LOADERS
--with-dri-drivers=$DRI_DRIVERS
$GALLIUM_ST
--with-gallium-drivers=$GALLIUM_DRIVERS
--with-vulkan-drivers=$VULKAN_DRIVERS
--with-egl-platforms=x11,drm
--with-dri-drivers=i915,i965,radeon,r200,swrast,nouveau
--with-gallium-drivers=svga,swrast,vc4,virgl,r300,r600,etnaviv,imx
--disable-llvm-shared-libs
&&
make && eval $MAKE_CHECK_COMMAND;
fi
- if test "x$BUILD" = xscons; then
test -n "$OVERRIDE_CC" && export CC="$OVERRIDE_CC";
test -n "$OVERRIDE_CXX" && export CXX="$OVERRIDE_CXX";
scons $SCONS_TARGET && eval $SCONS_CHECK_COMMAND;
;
make && make check;
elif test x$BUILD = xscons; then
scons;
fi

View File

@@ -78,22 +78,10 @@ endif
ifeq ($(MESA_ENABLE_LLVM),true)
LOCAL_CFLAGS += \
-DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2 \
-D__STDC_CONSTANT_MACROS \
-D__STDC_FORMAT_MACROS \
-D__STDC_LIMIT_MACROS
ifeq ($(MESA_ANDROID_MAJOR_VERSION),5)
LOCAL_CFLAGS += -DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2
ELF_INCLUDES := external/elfutils/0.153/libelf
endif
ifeq ($(MESA_ANDROID_MAJOR_VERSION),6)
LOCAL_CFLAGS += -DHAVE_LLVM=0x0307 -DMESA_LLVM_VERSION_PATCH=0
ELF_INCLUDES := external/elfutils/src/libelf
endif
ifeq ($(MESA_ANDROID_MAJOR_VERSION),7)
LOCAL_CFLAGS += -DHAVE_LLVM=0x0308 -DMESA_LLVM_VERSION_PATCH=0
ELF_INCLUDES := external/elfutils/libelf
endif
endif
ifneq ($(LOCAL_IS_HOST_MODULE),true)

View File

@@ -1 +1 @@
17.0.7
17.1.0-devel

View File

@@ -1,107 +0,0 @@
# The commit addressed an earlier commit ccdd5b3738e which did not land in branch.
d49d275c415d60ae08dc3e52d8db11f19a44010f anv/blorp: Don't sanitize the swizzle for blorp_clear
# The commit addressed an earlier commit 0567ab0407e which did not land in branch.
# A stable specific backported patch will follow later ...
bc5d587a80b64fb3e0a5ea8067e6317fbca2bbc5 radv: Invalidate L2 for TRANSFER_WRITE barriers
# stable: the commit depends on earlier commit 656e30b6860 which did
# not land in branch, and some other dependencies.
d2d6cf6c8387ac06c8559027f57683a61b48671b anv: Add the pci_id into the shader cache UUID
# stable: 17.1 nomination only. Fixes earlier commit 0b613c20 which
# did not land in branch.
d4601b0efc7f5e24e3f39fefa8e29e79560245ce freedreno: fix crash if ctx torn down with no rendering
# stable: 17.1 nomination only. the commit depends on earlier commit
# 7748c3f5eb1 which did not land in branch and the part that
# could be merged is not really providing anything relevant to
# stable.
b81d85f1754928139f9f01474495e024946aa1b4 configure.ac: Fix typos.
# extra: this reverts a commit in stable only because a new feature in
# master replaces it.
e75001811e3b66986b4ede165a0fdde703d4f05b Revert "etnaviv: Cannot render to rb-swapped formats"
# extra: the commit depends on earlier commits which did not land in
# branch.
dee31311eb024a636466e359b43d3a67b0135f32 Revert "i965/fs: Don't emit SEL instructions for type-converting MOVs."
# fixes: finds 2 commit references but it is really only one, and only
# in master.
2b3c490e23a6cd7f4f6bb489267c2b9b4fde5b20 radv: fix typo in a2b10g10r10 fast clear calculation
# fixes: removes unused symbols but it doesn't really add nor fix
# anything relevant.
9807e9dea67a60ab8ee04741c75b748eade582ee anv: remove unused anv_dispatch_table dtable
# fixes: removes unused symbols but it doesn't really add nor fix
# anything relevant.
320561bd831b81843e732d38d2b254aa3b25f2f6 radv: remove unused radv_dispatch_table dtable
# fixes: it just makes a local function static.
8b79f0ed0812a3a80dd91e47112fd722bd2d6978 radv: make radv_resolve_entrypoint static
# fixes: reported only because referes to fixes in 2 commits, but they
# are not in stable.
3f2cb699cfe0481f214c709b5a1375caf0a9c7fe android: vulkan: add support for libmesa_vulkan_util
# fixes: reported only because referes to fixes in 2 commits, but they
# are not in stable.
61c38d14b7b1d0ccacc440340ee0b1bc4454eca2 android: r600: fix libmesa_amd_common dependency
# fixes: removes unused symbols but it doesn't really add nor fix
# anything relevant.
b69a03e12aa38ed5d8b60a056630ed445f6097dc i965: remove dead brw_new_shader() declaration
# fixes: it just cleans .gitignore.
f282ace67862c0633d0a8135e4808867740d0d39 i965: remove i965_symbols_test reference from .gitignore
# fixes: reported only because referes to fixes in 2 commits, but they
# are not in stable.
15603055fb36a630cf3023219e9dd5c4a49c0596 anv: automake: ensure that the destination directory is created
# fixes: reported only because referes to fixes in 2 commits, but they
# are not in stable.
a6840efc096bd26e92bcee1a50515d845988a2f9 anv: provide required gem stubs for the tests
# stable: 17.1 nomination only.
65e0c3fba74ee98cacadbba4bd005b930609b65e radeonsi/gfx9: fix texture buffer objects and image buffers with IDXEN==0
5c94779585e24e8bd1bd41707521584af4251de3 radeonsi/gfx9: fix most things wrong with shader images
482e6b07cc6ce4b2ceac8188be19dbf252eaecde radeonsi/gfx9: fix 1D array shader images
8e8570a9e8bae7f4d3ad623475dfadc715a828d7 radeonsi/gfx9: add a workaround for viewing a slice of 3D as a 2D image
60a20e6879e4ce0911b12848ffd9e372f096590e radeonsi/gfx9: set MAX_PRIMGRP_IN_WAVE in the correct register
80814819c28353a38c03d4cdba39983b8cf260ac radeonsi/gfx9: don't set deprecated field PARTIAL_ES_WAVE_ON
f466683cb07796fa89f96ef87a6f076218ae6db8 radeonsi/gfx9: fix gl_ViewportIndex
283a1d1e27b5456cfda848a54b9d74be0993e038 radeonsi/gfx9: make some PA & DB registers match the closed Vulkan driver
# stable: 17.1 nomination only. Fixes earlier commit 651ec926fc1 which
# did not land in branch.
c43b4bc85eddba8bc31665cfee5928bed8343516 anv: Don't place scratch buffers above the 32-bit boundary
# stable: 17.1 nomination only. Disk cache doesn't feature yet in branch.
ce412371513c90bf9156f22c3567ee57750ef264 disk_cache: reduce default cache size to 5% of filesystem
4e1f3afea9bdeddb0d21f00d25319bce580d80c3 disk_cache: use block size rather than file size
22fa3d90a92c1628215d0f5fccbe1116d4f5147f util/disk_cache: remove percentage based max cache limit
# stable: 17.1 nomination only. Fixes earlier commit 68d6d097f15 which
# did not land in branch.
69e6eab6533ff48f72223cd21ef640242c52598b winsys/amdgpu: fix Polaris12 (RX 550) breakage
# stable: 17.1 nomination only. Contributes to earlier commit
# 3f0d69af20e which did not land in branch.
2add79a73291e40621081b9a12938ac1931b9e96 radv: apply the tess+GS hang workaround to Polaris12 as well
# stable: 17.1 nomination only. Reverts earlier commit
# c5bf7cb52942cb7df9f5d73746ffbf3c102d12cc which did not land
# in branch.
1456da91c8a14f77dd347981e2bc95e0644e0fd2 Revert "mesa: Require mipmap completeness for glCopyImageSubData(), sometimes."
# stable: 17.1 nomination only. Fixes a crash triggered by earlier
# commit 7c3b8ed87859bfdfb985d21685115a729f9cd138 which did
# not land in branch.
bc074a45180eddf30ea723bbdf89895e2c7684ca i965: Don't try to unmap NULL program cache BO.
# stable: 17.1 nomination only. Fixes a regression caused by commit
# bdd644976952 which did not land in branch.
854ed47f3e1501e4cc87bf9f19c6d4a1ad2bab08 radeonsi: mark fast-cleared textures as compressed when dirtying
# stable: 17.1 nomination only. MSAA was enabled by 1832ef6cd9b which
# did not land in branch.
f52e63069a3fad23e03d42306a42bd20f0159da3 swr: move msaa resolve to generalized StoreTile
# stable: 17.1 nomination only. 17.0 doesn't support
# VK_KHR_push_descriptor, so it doesn't need this patch.
5ff48581119258214801de24a327bdd6a29e0ccc radv/meta: fix restoring a push descriptor set
# fixes: Fixes earlier commit 126d5ad which did not land in branch.
9da104593386f6e8ddec8f0d9d288aceb8908fe1 radv: fix regression in descriptor set freeing.
# stable: Fixes earlier commit 4d30024238e which did not land in
# branch.
d884d1a6540ec0f60768c30df47f0228a37ea61c vc4: Only build the NEON code on arm32.
# stable: Fixes earlier commit 6403e376511 which did not land in
# branch.
6f21b5601cc1260eac53f65c8941b3aa66d0f5e9 i965: Solve Android native fence fd double close
# stable: Depends on earlier commit 5f99c490089 which did not land in
# branch.
0549ea15ec380f3ca6df76ce53ff4c30bfc21dbf radeonsi: fix primitive ID in fragment shader when using tessellation
# stable: Depends on earlier commit 9fd9a7d0ba3 which did not land in
# branch.
b84b631c6381d9b36bca5d0e7cc67dd23af188c1 radeonsi: load patch_id for TES-as-ES when exporting for PS
# stable: The bug that they fixed is probably more benign than a
# possible assertion in the X server.
19b61799e3d06795d783b34fdbbf8474ef1e9a7c st/mesa: don't cast the incomplete framebufer to st_framebuffer
2f0450c627e5158d49aa1320eed9a5f6cb184838 radeonsi: add new vega10 pci ids

View File

@@ -10,28 +10,26 @@
# $ bin/get-extra-pick-list.sh | tee picklist
# Use the last branchpoint as our limit for the search
latest_branchpoint=`git merge-base origin/master HEAD`
# XXX: there should be a better way for this
latest_branchpoint=`git branch | grep \* | cut -c 3-`-branchpoint
# Grep for commits with "cherry picked from commit" in the commit message.
git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
grep "cherry picked from commit" |\
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
# For each cherry-picked commit...
cat already_picked | cut -c -8 |\
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' |\
cut -c -8 |\
while read sha
do
# ... check if it's referenced (fixed by another) patch
# Check if the original commit is referenced in master
git log -n1 --pretty=oneline --grep=$sha $latest_branchpoint..origin/master |\
cut -c -8 |\
while read candidate
do
# And flag up if it hasn't landed in branch yet.
if grep -q ^$candidate already_picked ; then
continue
# Check if the potential fix, hasn't landed in branch yet.
found=`git log -n1 --pretty=oneline --reverse --grep=$candidate $latest_branchpoint..HEAD |wc -l`
if test $found = 0
then
echo Commit $candidate might need to be picked, as it references $sha
fi
echo Commit $candidate references $sha
done
done
rm -f already_picked

View File

@@ -1,61 +0,0 @@
#!/bin/bash
# Script for generating a list of candidates [referenced by a Fixes tag] for
# cherry-picking to a stable branch
#
# Usage examples:
#
# $ bin/get-fixes-pick-list.sh
# $ bin/get-fixes-pick-list.sh > picklist
# $ bin/get-fixes-pick-list.sh | tee picklist
# Use the last branchpoint as our limit for the search
latest_branchpoint=`git merge-base origin/master HEAD`
# List all the commits between day 1 and the branch point...
git log --reverse --pretty=%H $latest_branchpoint > already_landed
# ... and the ones cherry-picked.
git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
grep "cherry picked from commit" |\
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
# Grep for commits with Fixes tag
git log --reverse --pretty=%H -i --grep="fixes:" $latest_branchpoint..origin/master |\
while read sha
do
# For each one try to extract the tag
fixes_count=`git show $sha | grep -i "fixes:" | wc -l`
if [ "x$fixes_count" != x1 ] ; then
echo WARNING: Commit $sha has nore than one Fixes tag
fi
fixes=`git show $sha | grep -i "fixes:" | head -n 1`
# The following sed/cut combination is borrowed from GregKH
id=`echo ${fixes} | sed -e 's/^[ \t]*//' | cut -f 2 -d ':' | sed -e 's/^[ \t]*//' | cut -f 1 -d ' '`
# Bail out if we cannot find suitable id.
# Any specific validation the $id is valid and not some junk, is
# implied with the follow up code
if [ "x$id" = x ] ; then
continue
fi
# Check if the offending commit is in branch.
# Be that cherry-picked ...
# ... or landed before the branchpoint.
if grep -q ^$id already_picked ||
grep -q ^$id already_landed ; then
# Finally nominate the fix if it hasn't landed yet.
if grep -q ^$sha already_picked ; then
continue
fi
echo Commit $sha fixes $id
fi
done
rm -f already_picked
rm -f already_landed

View File

@@ -8,16 +8,13 @@
# $ bin/get-pick-list.sh > picklist
# $ bin/get-pick-list.sh | tee picklist
# Use the last branchpoint as our limit for the search
latest_branchpoint=`git merge-base origin/master HEAD`
# Grep for commits with "cherry picked from commit" in the commit message.
git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
grep "cherry picked from commit" |\
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
# Grep for commits that were marked as a candidate for the stable tree.
git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable' $latest_branchpoint..origin/master |\
git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\
while read sha
do
# Check to see whether the patch is on the ignore list.

View File

@@ -12,16 +12,13 @@
# This script intentionally _never_ checks for specific version tag
# Should we consider folding it with the original get-pick-list.sh
# Use the last branchpoint as our limit for the search
latest_branchpoint=`git merge-base origin/master HEAD`
# Grep for commits with "cherry picked from commit" in the commit message.
git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
grep "cherry picked from commit" |\
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
# Grep for commits that were marked as a candidate for the stable tree.
git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' $latest_branchpoint..origin/master |\
git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' HEAD..origin/master |\
while read sha
do
# Check to see whether the patch is on the ignore list.

View File

@@ -943,31 +943,43 @@ llvm_add_target() {
# $1 is the llvm-config command with arguments.
strip_unwanted_llvm_flags() {
# Use \> (marks the end of the word)
echo " `$1`" | sed \
-e 's/\s\+-m\S*//g' \
-e 's/\s\+-DNDEBUG\>//g' \
-e 's/\s\+-D_GNU_SOURCE\>//g' \
-e 's/\s\+-pedantic\>//g' \
-e 's/\s\+-W\S*//g' \
-e 's/\s\+-O\S*//g' \
-e 's/\s\+-g\S*//g' \
-e 's/-fno-rtti\>/-Fno-rtti/g' \
-e 's/\s\+-f\S*//g' \
-e 's/-Fno-rtti\>/-fno-rtti/g' \
-e 's/^ //'
echo `$1` | sed \
-e 's/-march=\S*//g' \
-e 's/-mtune=\S*//g' \
-e 's/-mcpu=\S*//g' \
-e 's/-DNDEBUG\>//g' \
-e 's/-D_GNU_SOURCE\>//g' \
-e 's/-pedantic\>//g' \
-e 's/-Wcovered-switch-default\>//g' \
-e 's/-O.\>//g' \
-e 's/-g\>//g' \
-e 's/-Wall\>//g' \
-e 's/-Wcast-qual\>//g' \
-e 's/-Woverloaded-virtual\>//g' \
-e 's/-fcolor-diagnostics\>//g' \
-e 's/-fdata-sections\>//g' \
-e 's/-ffunction-sections\>//g' \
-e 's/-fno-exceptions\>//g' \
-e 's/-fomit-frame-pointer\>//g' \
-e 's/-fvisibility-inlines-hidden\>//g' \
-e 's/-fPIC\>//g' \
-e 's/-fstack-protector-strong\>//g'
}
llvm_set_environment_variables() {
if test "x$LLVM_CONFIG" != xno; then
LLVM_VERSION=`$LLVM_CONFIG --version | egrep -o '^[[0-9.]]+'`
LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags`
LLVM_BINDIR=`$LLVM_CONFIG --bindir`
LLVM_CPPFLAGS=`strip_unwanted_llvm_flags "$LLVM_CONFIG --cppflags"`
LLVM_CFLAGS=$LLVM_CPPFLAGS # CPPFLAGS seem to be sufficient
LLVM_CXXFLAGS=`strip_unwanted_llvm_flags "$LLVM_CONFIG --cxxflags"`
LLVM_INCLUDEDIR=`$LLVM_CONFIG --includedir`
LLVM_LIBDIR=`$LLVM_CONFIG --libdir`
# We need to respect LLVM_CPPFLAGS when compiling LLVM headers.
# We need to respect LLVM_CFLAGS when compiling LLVM headers.
save_CFLAGS="$CFLAGS"
CFLAGS="$CFLAGS $LLVM_CPPFLAGS"
CFLAGS="$CFLAGS $LLVM_CFLAGS"
AC_COMPUTE_INT([LLVM_VERSION_MAJOR], [LLVM_VERSION_MAJOR],
[#include "${LLVM_INCLUDEDIR}/llvm/Config/llvm-config.h"])
@@ -989,21 +1001,19 @@ llvm_set_environment_variables() {
LLVM_VERSION_INT="${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR}"
fi
FOUND_LLVM=yes
DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT -DMESA_LLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
MESA_LLVM=1
else
FOUND_LLVM=no
MESA_LLVM=0
LLVM_VERSION_INT=0
fi
}
require_llvm() {
if test "x$enable_gallium_llvm" != "xyes"; then
AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
llvm_check_version_for() {
if test "x$MESA_LLVM" = x0; then
AC_MSG_ERROR([LLVM $1 or newer is required for $2])
return
fi
}
llvm_require_version() {
require_llvm $2
llvm_target_version_major=`echo $1 | cut -d. -f1 | egrep -o '^[[0-9]]+'`
llvm_target_version_minor=`echo $1 | cut -d. -f2 | egrep -o '^[[0-9]]+'`
@@ -1044,7 +1054,7 @@ radeon_llvm_check() {
amdgpu_llvm_target_name='amdgpu'
fi
llvm_require_version $*
llvm_check_version_for $*
llvm_add_target $amdgpu_llvm_target_name $2
@@ -1721,34 +1731,6 @@ if test -n "$with_dri_drivers"; then
fi
dnl
dnl Gallium LLVM
dnl
dnl With follow-up commits we'll rework --enable-gallium-llvm to --enable-llvm
dnl Since that is too invasive to stable, do the more conservative thing for now
dnl and consider it as a global LLVM toggle.
dnl
AC_ARG_ENABLE([gallium-llvm],
[AS_HELP_STRING([--enable-gallium-llvm],
[build gallium LLVM support @<:@default=enabled on x86/x86_64@:>@])],
[enable_gallium_llvm="$enableval"],
[enable_gallium_llvm=auto])
if test "x$enable_gallium_llvm" = xauto; then
if test "x$FOUND_LLVM" = xyes; then
case "$host_cpu" in
i*86|x86_64|amd64) enable_gallium_llvm=yes;;
*) enable_gallium_llvm=no;;
esac
else
enable_gallium_llvm=no
fi
fi
if test "x$enable_gallium_llvm" = xyes -a "x$FOUND_LLVM" = xno; then
AC_MSG_ERROR([--enable-gallium-llvm selected but llvm-config is not found])
fi
#
# Vulkan driver configuration
#
@@ -1756,7 +1738,7 @@ fi
AC_ARG_WITH([vulkan-drivers],
[AS_HELP_STRING([--with-vulkan-drivers@<:@=DIRS...@:>@],
[comma delimited Vulkan drivers list, e.g.
"intel,radeon"
"intel"
@<:@default=no@:>@])],
[with_vulkan_drivers="$withval"],
[with_vulkan_drivers="no"])
@@ -2030,21 +2012,7 @@ if test "x$enable_opencl" = xyes; then
AC_MSG_ERROR([Clover requires libelf])
fi
if test "x${ac_cv_cxx_compiler_gnu}" = xyes; then
altivec_enabled=no
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#if !defined(__VEC__) || !defined(__ALTIVEC__)
#error "AltiVec not enabled"
#endif
])], altivec_enabled=yes)
if test "$altivec_enabled" = yes; then
CLOVER_STD_OVERRIDE="-std=gnu++11"
fi
AC_SUBST([CLOVER_STD_OVERRIDE])
fi
llvm_require_version $LLVM_REQUIRED_OPENCL "opencl"
llvm_check_version_for $LLVM_REQUIRED_OPENCL "opencl"
llvm_add_default_components "opencl"
llvm_add_component "all-targets" "opencl"
@@ -2169,6 +2137,24 @@ if ! echo "$egl_platforms" | grep -q 'x11'; then
GL_PC_CFLAGS="$GL_PC_CFLAGS -DMESA_EGL_NO_X11_HEADERS"
fi
dnl
dnl Gallium LLVM
dnl
AC_ARG_ENABLE([gallium-llvm],
[AS_HELP_STRING([--enable-gallium-llvm],
[build gallium LLVM support @<:@default=enabled on x86/x86_64@:>@])],
[enable_gallium_llvm="$enableval"],
[enable_gallium_llvm=auto])
if test -z "$with_gallium_drivers"; then
enable_gallium_llvm=no
fi
if test "x$enable_gallium_llvm" = xauto; then
case "$host_cpu" in
i*86|x86_64|amd64) enable_gallium_llvm=yes;;
esac
fi
dnl Directory for XVMC libs
AC_ARG_WITH([xvmc-libdir],
[AS_HELP_STRING([--with-xvmc-libdir=DIR],
@@ -2216,16 +2202,14 @@ AC_ARG_WITH([d3d-libdir],
AC_SUBST([D3D_DRIVER_INSTALL_DIR])
dnl
dnl r300 doesn't strictly require LLVM, but for performance reasons we
dnl highly recommend LLVM usage. So require it at least on x86 and x86_64
dnl architectures.
dnl Gallium helper functions
dnl
r300_require_llvm() {
case "$host" in *gnux32) return;; esac
case "$host_cpu" in
i*86|x86_64|amd64) require_llvm $1
;;
esac
gallium_require_llvm() {
if test "x$enable_gallium_llvm" = "xyes"; then
llvm_check_version_for $LLVM_REQUIRED_GALLIUM "gallium"
else
AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
fi
}
dnl
@@ -2314,7 +2298,7 @@ if test -n "$with_gallium_drivers"; then
HAVE_GALLIUM_R300=yes
PKG_CHECK_MODULES([RADEON], [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
require_libdrm "r300"
r300_require_llvm "r300"
gallium_require_llvm "r300"
;;
xr600)
HAVE_GALLIUM_R600=yes
@@ -2333,9 +2317,7 @@ if test -n "$with_gallium_drivers"; then
PKG_CHECK_MODULES([AMDGPU], [libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED])
require_libdrm "radeonsi"
radeon_gallium_llvm_check $LLVM_REQUIRED_RADEONSI "radeonsi"
if test "x$enable_egl" = xyes; then
require_basic_egl "radeonsi"
fi
require_basic_egl "radeonsi"
;;
xnouveau)
HAVE_GALLIUM_NOUVEAU=yes
@@ -2357,12 +2339,13 @@ if test -n "$with_gallium_drivers"; then
;;
xswrast)
HAVE_GALLIUM_SOFTPIPE=yes
if test "x$enable_gallium_llvm" = xyes; then
if test "x$MESA_LLVM" = x1 && test "x$enable_gallium_llvm" == "xyes"; then
HAVE_GALLIUM_LLVMPIPE=yes
fi
;;
xswr)
llvm_require_version $LLVM_REQUIRED_SWR "swr"
llvm_check_version_for $LLVM_REQUIRED_SWR "swr"
gallium_require_llvm "swr"
swr_require_cxx_feature_flags "C++11" "__cplusplus >= 201103L" \
",-std=c++11" \
@@ -2394,9 +2377,7 @@ if test -n "$with_gallium_drivers"; then
xvirgl)
HAVE_GALLIUM_VIRGL=yes
require_libdrm "virgl"
if test "x$enable_egl" = xyes; then
require_basic_egl "virgl"
fi
require_basic_egl "virgl"
;;
*)
AC_MSG_ERROR([Unknown Gallium driver: $driver])
@@ -2405,8 +2386,8 @@ if test -n "$with_gallium_drivers"; then
done
fi
if test "x$enable_gallium_llvm" == "xyes" -a "$with_gallium_drivers"; then
llvm_require_version $LLVM_REQUIRED_GALLIUM "gallium"
if test "x$enable_gallium_llvm" == "xyes"; then
llvm_check_version_for $LLVM_REQUIRED_GALLIUM "gallium"
llvm_add_default_components "gallium"
fi
@@ -2416,22 +2397,14 @@ if test "x$HAVE_GALLIUM_ETNAVIV" != xyes -a "x$HAVE_GALLIUM_IMX" == xyes ; then
AC_ERROR([Building with imx requires etnaviv])
fi
dnl
dnl Set defines and buildtime variables only when using LLVM.
dnl
if test "x$enable_gallium_llvm" = xyes; then
DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT -DMESA_LLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
dnl Set LLVM_LIBS - This is done after the driver configuration so
dnl that drivers can add additional components to LLVM_COMPONENTS.
dnl Previously, gallium drivers were updating LLVM_LIBS directly
dnl by calling llvm-config --libs ${DRIVER_LLVM_COMPONENTS}, but
dnl this was causing the same libraries to be appear multiple times
dnl in LLVM_LIBS.
LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags`
LLVM_CFLAGS=$LLVM_CPPFLAGS # CPPFLAGS seem to be sufficient
LLVM_CXXFLAGS=`strip_unwanted_llvm_flags "$LLVM_CONFIG --cxxflags"`
dnl Set LLVM_LIBS - This is done after the driver configuration so
dnl that drivers can add additional components to LLVM_COMPONENTS.
dnl Previously, gallium drivers were updating LLVM_LIBS directly
dnl by calling llvm-config --libs ${DRIVER_LLVM_COMPONENTS}, but
dnl this was causing the same libraries to be appear multiple times
dnl in LLVM_LIBS.
if test "x$MESA_LLVM" != x0; then
if ! $LLVM_CONFIG --libs ${LLVM_COMPONENTS} >/dev/null; then
AC_MSG_ERROR([Calling ${LLVM_CONFIG} failed])
@@ -2533,7 +2506,8 @@ AM_CONDITIONAL(NEED_RADEON_DRM_WINSYS, test "x$HAVE_GALLIUM_R300" = xyes -o \
AM_CONDITIONAL(NEED_WINSYS_XLIB, test "x$enable_glx" = xgallium-xlib)
AM_CONDITIONAL(NEED_RADEON_LLVM, test x$NEED_RADEON_LLVM = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_COMPUTE, test x$enable_opencl = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_LLVM, test "x$enable_gallium_llvm" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_LLVM, test "x$MESA_LLVM" = x1 -a \
"x$enable_gallium_llvm" = xyes)
AM_CONDITIONAL(USE_VC4_SIMULATOR, test x$USE_VC4_SIMULATOR = xyes)
if test "x$USE_VC4_SIMULATOR" = xyes -a "x$HAVE_GALLIUM_ILO" = xyes; then
AC_MSG_ERROR([VC4 simulator on x86 replaces i965 driver build, so ilo must be disabled.])
@@ -2818,7 +2792,7 @@ else
fi
echo ""
if test "x$enable_gallium_llvm" = xyes; then
if test "x$MESA_LLVM" = x1; then
echo " llvm: yes"
echo " llvm-config: $LLVM_CONFIG"
echo " llvm-version: $LLVM_VERSION"
@@ -2865,7 +2839,7 @@ echo " CFLAGS: $cflags"
echo " CXXFLAGS: $cxxflags"
echo " Macros: $defines"
echo ""
if test "x$enable_gallium_llvm" = xyes; then
if test "x$MESA_LLVM" = x1; then
echo " LLVM_CFLAGS: $LLVM_CFLAGS"
echo " LLVM_CXXFLAGS: $LLVM_CXXFLAGS"
echo " LLVM_CPPFLAGS: $LLVM_CPPFLAGS"

View File

@@ -16,6 +16,17 @@
<h1>News</h1>
<h2>January 23, 2017</h2>
<p>
<a href="relnotes/12.0.6.html">Mesa 12.0.6</a> is released.
This is a bug-fix release.
<br>
NOTE: This is an extra release for the 12.0 stable branch, as per developers'
feedback. It is anticipated that 12.0.6 will be the final release in the 12.0
series. Users of 12.0 are encouraged to migrate to the 13.0 series in order
to obtain future fixes.
</p>
<h2>January 5, 2017</h2>
<p>
<a href="relnotes/13.0.3.html">Mesa 13.0.3</a> is released.

View File

@@ -158,6 +158,9 @@ To setup the branchpoint:
git checkout master # make sure we're in master first
git tag -s X.Y-branchpoint -m "Mesa X.Y branchpoint"
git checkout -b X.Y
git checkout master
$EDITOR VERSION # bump the version number
git commit -as
git push origin X.Y-branchpoint X.Y
</pre>
@@ -342,7 +345,7 @@ Here is one solution that I've been using.
tar -xaf mesa-$__version.tar.xz &amp;&amp; cd mesa-$__version &amp;&amp; scons &amp;&amp; cd ..
# Test the automake binaries
rm -rf cd mesa-$__version
rm -rf mesa-$__version
tar -xaf mesa-$__version.tar.xz &amp;&amp; cd mesa-$__version
./configure \
--with-dri-drivers=i965,swrast \

View File

@@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.
</p>
<ul>
<li><a href="relnotes/12.0.6.html">12.0.6 release notes</a>
<li><a href="relnotes/13.0.3.html">13.0.3 release notes</a>
<li><a href="relnotes/12.0.5.html">12.0.5 release notes</a>
<li><a href="relnotes/13.0.2.html">13.0.2 release notes</a>

148
docs/relnotes/12.0.6.html Normal file
View File

@@ -0,0 +1,148 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 12.0.6 Release Notes / January 23, 2017</h1>
<p>
Mesa 12.0.6 is a bug fix release which fixes bugs found since the 12.0.5 release.
</p>
<p>
Mesa 12.0.6 implements the OpenGL 4.3 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.3. OpenGL
4.3 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
65339ba5d76a45225b8b56f9a1da9db15c569e1d163760faa2921da0a8461741 mesa-12.0.6.tar.gz
7d6da9744c1022a4c2ab6ad01a206984d00443fb691568011d01b3dd97e36448 mesa-12.0.6.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<p>This list is likely incomplete.</p>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92234">Bug 92234</a> - [BDW] GPU hang in Shogun2</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95130">Bug 95130</a> - Derivatives of gl_Color wrong when helper pixels used</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99030">Bug 99030</a> - [HSW, regression] transform feedback fails on Linux 4.8</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99354">Bug 99354</a> - [G71] &quot;Assertion `bkref' failed&quot; reproducible with glmark2</li>
</ul>
<h2>Changes</h2>
<p>Chad Versace (3):</p>
<ul>
<li>i965/mt: Disable aux surfaces after making miptree shareable</li>
<li>i965/mt: Disable HiZ when sharing depth buffer externally (v2)</li>
<li>anv: Handle vkGetPhysicalDeviceQueueFamilyProperties with count == 0</li>
</ul>
<p>Emil Velikov (5):</p>
<ul>
<li>docs: add sha256 checksums for 12.0.5</li>
<li>get-typod-pick-list.sh: add new script</li>
<li>automake: use shared llvm libs for make distcheck</li>
<li>egl/wayland: use the destroy_window_callback for swrast</li>
<li>Update version to 12.0.6</li>
</ul>
<p>Fredrik Höglund (1):</p>
<ul>
<li>dri3: Fix MakeCurrent without a default framebuffer</li>
</ul>
<p>Ilia Mirkin (1):</p>
<ul>
<li>nouveau: take extra push space into account for pushbuf_space calls</li>
</ul>
<p>Jason Ekstrand (19):</p>
<ul>
<li>spirv/nir: Fix some texture opcode asserts</li>
<li>spirv/nir: Add support for shadow samplers that return vec4</li>
<li>spirv/nir: Properly handle gather components</li>
<li>anv/pipeline: Set binding_table.gather_texture_start</li>
<li>nir: Add a helper for determining the type of a texture source</li>
<li>nir/lower_tex: Add some helpers for working with tex sources</li>
<li>nir/lower_tex: Add support for lowering coordinate offsets</li>
<li>i965/nir: Enable NIR lowering of txf and rect offsets</li>
<li>i965: Get rid of the do_lower_unnormalized_offsets pass</li>
<li>spirv/nir: Don't increment coord_components for array lod queries</li>
<li>anv/image: Assert that the image format is actually supported</li>
<li>spirv/nir: Move opcode selection higher up in handle_texture</li>
<li>spirv/nir: Refactor type handling in handle_texture</li>
<li>nir/spirv: Refactor coordinate handling in handle_texture</li>
<li>spirv/nir: Handle texture projectors</li>
<li>spirv/nir: Add support for ImageQuerySamples</li>
<li>anv/device: Return the right error for failed maps</li>
<li>anv/device: Implicitly unmap memory objects in FreeMemory</li>
<li>anv/descriptor_set: Write the state offset in the surface state free list.</li>
</ul>
<p>Kenneth Graunke (2):</p>
<ul>
<li>spirv: Move cursor before calling vtn_ssa_value() in phi 2nd pass.</li>
<li>i965: Properly flush in hsw_pause_transform_feedback().</li>
</ul>
<p>Marek Olšák (6):</p>
<ul>
<li>cso: don't release sampler states that are bound</li>
<li>radeonsi: always restore sampler states when unbinding sampler views</li>
<li>radeonsi: fix incorrect FMASK checking in bind_sampler_states</li>
<li>radeonsi: disable CE on SI + AMDGPU</li>
<li>radeonsi: disable the constant engine (CE) on Carrizo and Stoney</li>
<li>gallium/radeon: fix the draw-calls HUD query</li>
</ul>
<p>Matt Turner (3):</p>
<ul>
<li>i965/fs: Rename opt_copy_propagate -&gt; opt_copy_propagation.</li>
<li>i965/fs: Add unit tests for copy propagation pass.</li>
<li>i965/fs: Reject copy propagation into SEL if not min/max.</li>
</ul>
<p>Michel Dänzer (1):</p>
<ul>
<li>cso: Don't restore nr_samplers in cso_restore_fragment_samplers</li>
</ul>
<p>Nicolai Hähnle (1):</p>
<ul>
<li>radeonsi: enable WQM in PS prolog when needed</li>
</ul>
</div>
</body>
</html>

View File

@@ -14,7 +14,7 @@
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.0.0 Release Notes / February 13, 2017</h1>
<h1>Mesa 17.0.0 Release Notes / TBD</h1>
<p>
Mesa 17.0.0 is a new development release.
@@ -33,8 +33,7 @@ because compatibility contexts are not supported.
<h2>SHA256 checksums</h2>
<pre>
696578f0b83796470511a88a95fff15a2a25fa201a9e487716f2ca20c177c3ab mesa-17.0.0.tar.gz
39db3d59700159add7f977307d12a7dfe016363e760ad82280ac4168ea668481 mesa-17.0.0.tar.xz
TBD.
</pre>
@@ -63,222 +62,13 @@ Note: some of the new features are only available with certain drivers.
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=70623">Bug 70623</a> - libglx.so: undefined symbol: _glapi_tls_Context</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=72902">Bug 72902</a> - [IVB/HSW/BDW] DOTA2 segfaults unless Mesa is configured with (non-default) --enable-glx-tls</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73778">Bug 73778</a> - _glapi_tls_Dispatch undefined</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77662">Bug 77662</a> - Fail to render to different faces of depth-stencil cube map</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89043">Bug 89043</a> - undefined symbol: _glapi_tls_Dispatch</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91281">Bug 91281</a> - Tonga VCE 2160p encode fails with BO to small for addr</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92234">Bug 92234</a> - [BDW] GPU hang in Shogun2</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92634">Bug 92634</a> - gallium's vl_mpeg12_decoder does not work with st/va</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92760">Bug 92760</a> - Add FP64 support to the i965 shader backends</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92925">Bug 92925</a> - Incorrect GEN for ASTC in Surface Format Table</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93551">Bug 93551</a> - Divinity: Original Sin Enhanced Edition(Native) crash on start</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94512">Bug 94512</a> - X segfaults with glx-tls enabled in a x32 environment</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94900">Bug 94900</a> - HD6950 GPU lockup loop with various steam games (octodad[always], saints row 4[always], dead island[always], grid autosport[sometimes])</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94904">Bug 94904</a> - [vulkan, BSW] dEQP-VK.api.object_management.multithreaded_per_thread_device intermittent crash</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95460">Bug 95460</a> - Please add more drivers (freedreno, virgl) to features.txt status document</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96959">Bug 96959</a> - nop.sat generated by pow workaround?</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97102">Bug 97102</a> - [dri][swr] stack overflow / infinite loop with GALLIUM_DRIVER=swr</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97232">Bug 97232</a> - Line rendering broken in Dolphin when using gl_ClipDistance</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97287">Bug 97287</a> - GL45-CTS.vertex_attrib_binding.basic-inputL-case1 fails</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97321">Bug 97321</a> - Query INFO_LOG_LENGTH for empty info log should return 0</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97420">Bug 97420</a> - &quot;#version 0&quot; crashes glsl_compiler</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97422">Bug 97422</a> - trying to call a number as a function results into a crash</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97447">Bug 97447</a> - GL 3.0 compatibility context exposes GL_ARB_compute_shader</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97473">Bug 97473</a> - Memory corruption when uploading DXT5 cubemap faces</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97715">Bug 97715</a> - [ILK,G45,G965] piglit.spec.arb_separate_shader_objects.misc api error checks</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97779">Bug 97779</a> - [regression, bisected][BDW, GPU hang] stuck on render ring, always reproducible</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97804">Bug 97804</a> - Later precision statement isn't overriding earlier one</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97952">Bug 97952</a> - /usr/include/string.h:518:12: error: exception specification in declaration does not match previous declaration</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97967">Bug 97967</a> - glsl/tests/cache-test regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98005">Bug 98005</a> - VCE dual instance encoding inconsistent since st/va: enable dual instances encode by sync surface</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98012">Bug 98012</a> - [IVB] Segfault when running Dolphin twice with Vulkan</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98134">Bug 98134</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.buffer.draw_buffers wants a different GL error code</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98172">Bug 98172</a> - Concurrent call to glClientWaitSync results in segfault in one of the waiters.</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98238">Bug 98238</a> - witcher 2: objects are black when changing lod</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98243">Bug 98243</a> - dEQP mismatched UBO precision qualifiers</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98245">Bug 98245</a> - GLES3.1 link negative dEQP &quot;expected linking to fail, but passed.&quot;</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98250">Bug 98250</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.texture.texparameterIiv/texparameterIuiv failure</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98263">Bug 98263</a> - [radv] The Talos Principle fails to launch with &quot;Fatal error: Cannot set display mode.&quot;</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98297">Bug 98297</a> - Can't configure a desktop with 3x4k monitors in one row</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98299">Bug 98299</a> - Compute shaders generate stupid divides</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98307">Bug 98307</a> - &quot;st/glsl_to_tgsi: explicitly track all input and output declaration&quot; broke flightgear colors on rs780</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98326">Bug 98326</a> - [dEQP, EGL] pbuffer depth/stencil tests fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98327">Bug 98327</a> - [dEQP, EGL] dEQP-EGL.functional.resize not supported</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98328">Bug 98328</a> - [dEQP, EGL] luminance tests fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98330">Bug 98330</a> - [dEQP, EGL] dEQP-EGL.functional.buffer_age.no_preserve fails</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98339">Bug 98339</a> - dEQP-EGL: Got EGL_BAD_MATCH: eglCreateSyncKHR()</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98343">Bug 98343</a> - dEQP-EGL: GL_INVALID_ENUM at teglCreateContextExtTests</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98415">Bug 98415</a> - Vulkan Driver JSON file contains incorrect field</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98421">Bug 98421</a> - src/loader/loader.c:111:40: error: unknown type name drmDevicePtr</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98431">Bug 98431</a> - UnrealEngine v4 demos startup fails to blorp blit assert</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98480">Bug 98480</a> - Support R8 image texture in ES 3.1</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98512">Bug 98512</a> - radeon r600 vdpau: Invalid command stream: texture bo too small</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98518">Bug 98518</a> - [r600g, bisected] regression: NI/Turks MSAA texture corruption with FreeCAD and Wine games</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98526">Bug 98526</a> - glsl/tests/general-ir-test regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98595">Bug 98595</a> - glsl: ralloc assertion &quot;info-&gt;canary == CANARY&quot; failed</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98599">Bug 98599</a> - xterm menus corrupt since tgsi/scan: handle indirect image indexing correctly</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98632">Bug 98632</a> - Fix build on Hurd without PATH_MAX</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98681">Bug 98681</a> - ir_builder_print_visitor.cpp:401:67: error: expected ')' before 'PRIx64'</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98694">Bug 98694</a> - &quot;(5=2)?1:1&quot; as array size decleration crashes glsl_compiler</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98740">Bug 98740</a> - bitcode.cpp:102:8: error: Error is not a member of llvm</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98767">Bug 98767</a> - [swrast] ralloc.c:84: get_header: Assertion `info-&gt;canary == CANARY' failed.</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98774">Bug 98774</a> - glsl/tests/warnings-test regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98815">Bug 98815</a> - [SKL/BDW GT2] large perf regression in TessMark</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98840">Bug 98840</a> - nir clone test fails</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98893">Bug 98893</a> - [SKL] piglit.spec.arb_shader_image_load_store.semantics intermittent</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98914">Bug 98914</a> - mesa-vdpau-drivers: breaks vdpau for mpeg2video</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98917">Bug 98917</a> - [BDW SKL BSW KBL] Tessellation CTS tests regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98975">Bug 98975</a> - Wasteland 2 Directors Cut: Hangs. GPU fault</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99010">Bug 99010</a> - --disable-gallium-llvm no longer recognized</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99013">Bug 99013</a> - [regression, bisected] radeonsi: commit 4c8c13b3 &quot;Use amdgcn intrinsics for fs interpolation&quot; makes system unusable</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99030">Bug 99030</a> - [HSW, regression] transform feedback fails on Linux 4.8</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99038">Bug 99038</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.negative_api.create_pixmap_surface crashes</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99072">Bug 99072</a> - [byt,ivb,snb] ES3-CTS.gtf.GL3Tests.shadow regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99085">Bug 99085</a> - [EGL] dEQP-EGL.functional.sharing.gles2.multithread intermittent</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99097">Bug 99097</a> - [vulkancts] dEQP-VK.image.store regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99100">Bug 99100</a> - [SKL,BDW,BSW,KBL] dEQP-VK.glsl.return.return_in_dynamic_loop_dynamic_vertex regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99119">Bug 99119</a> - swr_fence_work.cpp(42): error: argument of type &quot;std::nullptr_t&quot; is incompatible with parameter of type &quot;unsigned long&quot;</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99144">Bug 99144</a> - Incorrect rendering using glDrawArraysInstancedBaseInstance and first != 0 on Skylake</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99154">Bug 99154</a> - Link time error when using multiple builtin functions</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99158">Bug 99158</a> - vdpau segfaults and gpu locks with kodi on R9285</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99185">Bug 99185</a> - dEQP-EGL.functional.image.modify.tex_rgb5_a1_tex_subimage_rgba8</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99188">Bug 99188</a> - dEQP-EGL.functional.create_context_ext.robust_gl_30.rgb565_no_depth_no_stencil</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99210">Bug 99210</a> - ES3-CTS.functional.texture.mipmap.cube.generate.rgba5551_*</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99214">Bug 99214</a> - Crash in library libswrAVX.so when assigning vertex buffer object pointers with elements of type GL_DOUBLE</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99219">Bug 99219</a> - The Stanley Parable GPU hang when starting a new game</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99229">Bug 99229</a> - [G33] thousands of tests crash</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99231">Bug 99231</a> - [HSW][i965] Crash in upload_3dstate_streamout()</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99287">Bug 99287</a> - piglit.spec.glsl-1_10.execution.vs-nested-return-sibling-loop regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99303">Bug 99303</a> - [REGRESSION][BISECTED] DMs are crashing on start with &quot;radeon&quot;</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99314">Bug 99314</a> - [g33] glsl regressions</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99339">Bug 99339</a> - Blender line rendering broken after removing XY clipping of lines</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99354">Bug 99354</a> - [G71] &quot;Assertion `bkref' failed&quot; reproducible with glmark2</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99389">Bug 99389</a> - Mesa build broken: sid_tables.h</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99391">Bug 99391</a> - [ILK,G45,G965] piglit regressions</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99401">Bug 99401</a> - [g33] regression: piglit.spec.!opengl 1_0.gl-1_0-beginend-coverage</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99419">Bug 99419</a> - Crash(Segmentation fault) si_shader_select in Master Of Orion</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99450">Bug 99450</a> - [amdgpu] Payday 2 visual glitches on some models</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99451">Bug 99451</a> - polygon offset use after free</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99456">Bug 99456</a> - Firefox crashing when opening about:support with WebGL2 enabled</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99631">Bug 99631</a> - segfault with OSVRTrackerView and openscenegraph git master</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99633">Bug 99633</a> - rasterizer/core/clip.h:279:49: error: const struct API_STATE has no member named linkageCount</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99637">Bug 99637</a> - VLC video has corrupted colors when using VDPAU output on Radeon SI</li>
</ul>
<h2>Changes</h2>
<ul>
<li>Building RADV requires --enable-gallium-llvm</li>
<li>The vulkan headers vk_platform.h and vulkan.h are no longer installed</li>
<li>The configure options --with-sha1 and --disable-shader-cache are
removed alongside their respective library requirements</li>
</ul>
TBD.
</div>
</body>

View File

@@ -1,221 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.0.1 Release Notes / March 4, 2017</h1>
<p>
Mesa 17.0.1 is a bug fix release which fixes bugs found since the 17.0.0 release.
</p>
<p>
Mesa 17.0.1 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
e819bd3e515dac26faf9836d8f27a4ddf05323b9b23afb6c06536d4ac82e2743 mesa-17.0.1.tar.gz
96fd70ef5f31d276a17e424e7e1bb79447ccbbe822b56844213ef932e7ad1b0c mesa-17.0.1.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98869">Bug 98869</a> - Electronic Super Joy graphic artefacts (regression,bisected)</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99532">Bug 99532</a> - Compute shader doesn't give right result under some circumstances</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99677">Bug 99677</a> - heap-use-after-free in glsl</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99692">Bug 99692</a> - [radv] Mostly broken on Hawaii PRO/CIK ASICs</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99850">Bug 99850</a> - Tessellation bug on Carrizo</li>
</ul>
<h2>Changes</h2>
<p>Bas Nieuwenhuizen (4):</p>
<ul>
<li>radv: Never try to create more than max_sets descriptor sets.</li>
<li>radv: Reset emitted compute pipeline when calling secondary cmd buffer.</li>
<li>radv: Only use PKT3_OCCLUSION_QUERY when it doesn't hang.</li>
<li>radv: Use correct size for availability flag.</li>
</ul>
<p>Ben Crocker (3):</p>
<ul>
<li>gallivm: Reenable PPC VSX (v3)</li>
<li>gallivm: Improve debug output (V2)</li>
<li>gallivm: Override getHostCPUName() "generic" w/ "pwr8" (v4)</li>
</ul>
<p>Brendan King (1):</p>
<ul>
<li>egl/dri3: implement query surface hook</li>
</ul>
<p>Christian Gmeiner (2):</p>
<ul>
<li>etnaviv: move pctx initialisation to avoid a null dereference</li>
<li>etnaviv: remove number of pixel pipes validation</li>
</ul>
<p>Connor Abbott (1):</p>
<ul>
<li>anv: fix Get*MemoryRequirements for !LLC</li>
</ul>
<p>Daniel Stone (1):</p>
<ul>
<li>egl/wayland: Don't use DRM format codes for SHM</li>
</ul>
<p>Dave Airlie (6):</p>
<ul>
<li>tgsi: fix memory leak in tgsi sanity check</li>
<li>radv: change base aligmment for allocated memory.</li>
<li>radv: fix cik macroModeIndex.</li>
<li>radv: adopt some init config workarounds from radeonsi.</li>
<li>radv: fix depth format in blit2d.</li>
<li>radv: fix txs for sampler buffers</li>
</ul>
<p>Emil Velikov (8):</p>
<ul>
<li>docs: add sha256 checksums for 17.0.0</li>
<li>bin/get-extra-pick-list: use git merge-base to get the branchpoint</li>
<li>bin/get-extra-pick-list: rework to use already_picked list</li>
<li>bin/get-typod-pick-list.sh: limit `git grep ...' to only as needed</li>
<li>bin/get-pick-list.sh: limit `git grep ...' only as needed</li>
<li>bin/get-pick-list.sh: remove ancient way of nominating patches</li>
<li>bin/get-fixes-pick-list.sh: add new script</li>
<li>Update version to 17.0.1</li>
</ul>
<p>Eric Anholt (1):</p>
<ul>
<li>vc4: Avoid emitting small immediates for UBO indirect load address guards.</li>
</ul>
<p>Grazvydas Ignotas (3):</p>
<ul>
<li>r300g: only allow byteswapped formats on big endian</li>
<li>gallium/u_queue: fix a crash with atexit handlers</li>
<li>gallium/u_queue: set num_threads correctly if not all threads start</li>
</ul>
<p>Hans de Goede (1):</p>
<ul>
<li>glx/glvnd: Fix GLXdispatchIndex sorting</li>
</ul>
<p>Ilia Mirkin (4):</p>
<ul>
<li>gm107/ir: fix address offset bitfield for ATOMS</li>
<li>nvc0: set the render condition in the compute object</li>
<li>st/mesa: don't pass compare mode for stencil-sampled textures</li>
<li>nvc0: disable linked tsc mode in compute launch descriptor</li>
</ul>
<p>Jason Ekstrand (10):</p>
<ul>
<li>i965/sampler_state: Clamp min/max LOD to 14 on gen7+</li>
<li>i965/sampler_state: Pass texObj into update_sampler_state</li>
<li>i965/sampler_state: Set the "Base Mip Level" field on Sandy Bridge</li>
<li>intel/blorp: Swizzle clear colors on the CPU</li>
<li>i965/fs: Fix the inline nir_op_pack_double optimization</li>
<li>anv: Add an invalidate_range helper</li>
<li>anv/query: clflush the bo map on non-LLC platforms</li>
<li>genxml: Make MI_STORE_DATA_IMM more consistent</li>
<li>anv/query: Perform CmdResetQueryPool on the GPU</li>
<li>intel/blorp: Explicitly flush all allocated state</li>
</ul>
<p>Jose Maria Casanova Crespo (1):</p>
<ul>
<li>glsl: non-last member unsized array on SSBO must fail compilation on GLSL ES 3.1</li>
</ul>
<p>Kenneth Graunke (1):</p>
<ul>
<li>mesa: Do (TCS &amp;&amp; !TES) draw time validation in ES as well.</li>
</ul>
<p>Leo Liu (1):</p>
<ul>
<li>configure.ac: check require_basic_egl only if egl enabled</li>
</ul>
<p>Lionel Landwerlin (2):</p>
<ul>
<li>anv: wsi: report presentation error per image request</li>
<li>i965/fs: fix uninitialized memory access</li>
</ul>
<p>Marek Olšák (6):</p>
<ul>
<li>radeonsi: fix UNSIGNED_BYTE index buffer fallback with non-zero start (v2)</li>
<li>gallium/util: remove unused u_index_modify helpers</li>
<li>gallium/u_index_modify: don't add PIPE_TRANSFER_UNSYNCHRONIZED unconditionally</li>
<li>gallium/u_queue: fix random crashes when the app calls exit()</li>
<li>radeonsi: fix broken tessellation on Carrizo and Stoney</li>
<li>amd/common: fix ASICREV_IS_POLARIS11_M for Polaris12</li>
</ul>
<p>Mauro Rossi (2):</p>
<ul>
<li>android: radeonsi: fix sid_table.h generated header include path</li>
<li>android: glsl: build shader cache sources</li>
</ul>
<p>Michel Dänzer (1):</p>
<ul>
<li>configure.ac: Drop LLVM compiler flags more radically</li>
</ul>
<p>Nicolai Hähnle (3):</p>
<ul>
<li>winsys/amdgpu: reduce max_alloc_size based on GTT limits</li>
<li>radeonsi: handle MultiDrawIndirect in si_get_draw_start_count</li>
<li>radeonsi: fix UINT/SINT clamping for 10-bit formats on &lt;= CIK</li>
</ul>
<p>Samuel Iglesias Gonsálvez (1):</p>
<ul>
<li>glsl: fix heap-use-after-free in ast_declarator_list::hir()</li>
</ul>
<p>Tapani Pälli (1):</p>
<ul>
<li>android: fix droid_create_image_from_prime_fd_yuv for YV12</li>
</ul>
</div>
</body>
</html>

View File

@@ -1,185 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.0.2 Release Notes / March 20, 2017</h1>
<p>
Mesa 17.0.2 is a bug fix release which fixes bugs found since the 17.0.1 release.
</p>
<p>
Mesa 17.0.2 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
2e0f41e7974ba7a36ca32bbeaf8ebcd65c8fd4d2dc9872f04d4becbd5e7a8cb5 mesa-17.0.2.tar.gz
f8f191f909e01e65de38d5bdea5fb057f21649a3aed20948be02348e77a689d4 mesa-17.0.2.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=68504">Bug 68504</a> - 9.2-rc1 workaround for clover build failure on ppc/altivec: cannot convert 'bool' to '__vector(4) __bool int' in return</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97988">Bug 97988</a> - [radeonsi] playing back videos with VDPAU exhibits deinterlacing/anti-aliasing issues not visible with VA-API</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99484">Bug 99484</a> - Crusader Kings 2 - Loading bars, siege bars, morale bars, etc. do not render correctly</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99715">Bug 99715</a> - Don't print: &quot;Note: Buggy applications may crash, if they do please report to vendor&quot;</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100049">Bug 100049</a> - &quot;ralloc: Make sure ralloc() allocations match malloc()'s alignment.&quot; causes seg fault in 32bit build</li>
</ul>
<h2>Changes</h2>
<p>Alex Smith (3):</p>
<ul>
<li>radv: Emit pending flushes before executing a secondary command buffer</li>
<li>radv: Flush before copying with PKT3_WRITE_DATA in CmdUpdateBuffer</li>
<li>radv/ac: Fix shared memory offset calculation</li>
</ul>
<p>Bas Nieuwenhuizen (3):</p>
<ul>
<li>radv: Disable HTILE for textures with multiple layers/levels.</li>
<li>radv: Emit cache flushes before CP DMA.</li>
<li>Revert "radv: Emit cache flushes before CP DMA."</li>
</ul>
<p>Dave Airlie (3):</p>
<ul>
<li>radv: drop Z24 support.</li>
<li>radv: disable mip point pre clamping.</li>
<li>radv: setup llvm target data layout</li>
</ul>
<p>Emil Velikov (4):</p>
<ul>
<li>docs: add sha256 checksums for 17.0.1</li>
<li>cherry-ignore: add the swizzle blorp_clear fix</li>
<li>i965: move brw_define.h ifndef guard to the top</li>
<li>Update version to 17.0.2</li>
</ul>
<p>Fredrik Höglund (2):</p>
<ul>
<li>radv: fix the dynamic buffer index in vkCmdBindDescriptorSets</li>
<li>radv/ac: fix multiple descriptor sets with dynamic buffers</li>
</ul>
<p>Gregory Hainaut (1):</p>
<ul>
<li>glapi: fix typo in count_scale</li>
</ul>
<p>Ilia Mirkin (2):</p>
<ul>
<li>nvc0: take extra pushbuf space into account for pushbuf_space calls</li>
<li>nvc0: increase alignment to 256 for texture buffers on fermi</li>
</ul>
<p>Jacob Lifshay (1):</p>
<ul>
<li>vulkan/wsi: Improve the DRI3 error message</li>
</ul>
<p>James Legg (1):</p>
<ul>
<li>radv: Fix using more than 4 bound descriptor sets</li>
</ul>
<p>Jason Ekstrand (7):</p>
<ul>
<li>anv/blorp/clear_subpass: Only set surface clear color for fast clears</li>
<li>anv: Accurately advertise dynamic descriptor limits</li>
<li>anv: Stall before fast-clear operations</li>
<li>anv: Properly handle destroying NULL devices and instances</li>
<li>anv/blorp: Turn off AUX after doing a CCS_D resolve</li>
<li>anv/blorp: Only set a clear color for resolves if fast-cleared</li>
<li>nir/intrinsics: Make load_barycentric_input take a 2-component coor</li>
</ul>
<p>Jonas Pfeil (1):</p>
<ul>
<li>ralloc: Make sure ralloc() allocations match malloc()'s alignment.</li>
</ul>
<p>Kenneth Graunke (1):</p>
<ul>
<li>egl: Ensure ResetNotificationStrategy matches for shared contexts.</li>
</ul>
<p>Marek Olšák (3):</p>
<ul>
<li>st/mesa: reset sample_mask, min_sample, and render_condition for PBO ops</li>
<li>st/mesa: set blend state for PBO readbacks</li>
<li>radeonsi: mark all bound shader buffer ranges as initialized</li>
</ul>
<p>Matt Turner (1):</p>
<ul>
<li>clover: Work around build failure with AltiVec.</li>
</ul>
<p>Nanley Chery (2):</p>
<ul>
<li>anv/pass: Avoid accessing attachment array out of bounds</li>
<li>anv/image: Remove extra dependency on HiZ-specific variable</li>
</ul>
<p>Nicolai Hähnle (2):</p>
<ul>
<li>st/glsl_to_tgsi: avoid iterating past the head of the instruction list</li>
<li>st/mesa: inform the driver of framebuffer changes before compute dispatches</li>
</ul>
<p>Robert Foss (1):</p>
<ul>
<li>mesa: Avoid read of uninitialized variable</li>
</ul>
<p>Samuel Iglesias Gonsálvez (5):</p>
<ul>
<li>i965/fs: mark last DF uniform array element as 64 bit live one</li>
<li>i965/fs: detect different bit size accesses to uniforms to push them in proper locations</li>
<li>i965/fs: fix indirect load DF uniforms on BSW/BXT</li>
<li>i965/fs: fix source type when emitting MOV_INDIRECT to read ICP handles</li>
<li>i965/fs: emit MOV_INDIRECT with the source with the right register type</li>
</ul>
<p>Samuel Pitoiset (1):</p>
<ul>
<li>radeonsi: disable sinking common instructions down to the end block</li>
</ul>
</div>
</body>
</html>

View File

@@ -1,189 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.0.3 Release Notes / April 1, 2017</h1>
<p>
Mesa 17.0.3 is a bug fix release which fixes bugs found since the 17.0.2 release.
</p>
<p>
Mesa 17.0.3 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
8253edf1bdd7b14ab63d5982349143a5c9ac3767f39a63257cc9d7e7d92f60f1 mesa-17.0.3.tar.gz
ca646f5075a002d60ef9123c8a4331cede155c01712ef945a65c59a5e69fe7ed mesa-17.0.3.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96743">Bug 96743</a> - [BYT, HSW, SKL, BXT, KBL] GPU hangs with GfxBench 4.0 CarChase</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99246">Bug 99246</a> - [d3dadapter+radeonsi &amp; bisect] EVE-Online : hang on wormhole sight</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100061">Bug 100061</a> - LODQ instruction generated with invalid dst mask</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100182">Bug 100182</a> - Flickering in The Talos Principle on Sky Lake GT4.</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100201">Bug 100201</a> - Windows scons build with MSVC toolchain and LLVM 4.0 fails</li>
</ul>
<h2>Changes</h2>
<p>Alex Deucher (1):</p>
<ul>
<li>radeonsi: add new polaris12 pci id</li>
</ul>
<p>Andres Gomez (5):</p>
<ul>
<li>glsl: on UBO/SSBOs link error reset the number of active blocks to 0</li>
<li>cherry-ignore: add the Invalidate L2 for TRANSFER_WRITE barriers fix</li>
<li>cherry-ignore: add the Flush after unmap in gbm/dri fix</li>
<li>cherry-ignore: corrected typo in the Flush after unmap in gbm/dri fix</li>
<li>Update version to 17.0.3</li>
</ul>
<p>Axel Davy (2):</p>
<ul>
<li>st/nine: Resolve deadlock in surface/volume dtors when using csmt</li>
<li>st/nine: Use atomics for available_texture_mem</li>
</ul>
<p>Bas Nieuwenhuizen (1):</p>
<ul>
<li>radv: flush DB cache before and after HTILE decompress.</li>
</ul>
<p>Dave Airlie (1):</p>
<ul>
<li>radv: fix primitive reset index emission</li>
</ul>
<p>Emil Velikov (1):</p>
<ul>
<li>docs: add sha256 checksums for 17.0.2</li>
</ul>
<p>Ilia Mirkin (1):</p>
<ul>
<li>st/mesa: set result writemask based on ir type</li>
</ul>
<p>Jan Vesely (1):</p>
<ul>
<li>clover: use pipe_resource references</li>
</ul>
<p>Jason Ekstrand (9):</p>
<ul>
<li>anv/query: Invalidate the correct range</li>
<li>anv/GetQueryPoolResults: Actually implement the spec</li>
<li>anv/image: Return early when unbinding an image</li>
<li>anv/query: Fix the location of timestamp availability</li>
<li>anv: Make anv_get_layerCount a macro</li>
<li>anv/blorp: Use anv_get_layerCount everywhere</li>
<li>anv/cmd_buffer: Apply flush operations prior to executing secondaries</li>
<li>anv/cmd_buffer: Fix bad indentation</li>
<li>anv: Flush caches prior to PIPELINE_SELECT on all gens</li>
</ul>
<p>José Fonseca (1):</p>
<ul>
<li>c11/threads: Include thr/xtimec.h for xtime definition when building with MSVC.</li>
</ul>
<p>Juan A. Suarez Romero (1):</p>
<ul>
<li>tests/cache_test: allow crossing mount points</li>
</ul>
<p>Karol Herbst (1):</p>
<ul>
<li>nvc0/ir: treat FMA like MAD for operand propagation</li>
</ul>
<p>Kenneth Graunke (1):</p>
<ul>
<li>i965: Fall back to GL 4.2/4.3 on Haswell if the kernel isn't new enough.</li>
</ul>
<p>Marek Olšák (1):</p>
<ul>
<li>radeonsi: don't hang on shader compile failure</li>
</ul>
<p>Matt Turner (1):</p>
<ul>
<li>i965/fs: Don't emit SEL instructions for type-converting MOVs.</li>
</ul>
<p>Nanley Chery (1):</p>
<ul>
<li>intel: Correct the BDW surface state size</li>
</ul>
<p>Nicolai Hähnle (1):</p>
<ul>
<li>mesa/main: fix MultiDrawElements[BaseVertex] validation of primcount</li>
</ul>
<p>Rob Clark (1):</p>
<ul>
<li>freedreno: fix memory leak</li>
</ul>
<p>Tim Rowley (1):</p>
<ul>
<li>swr: [rasterizer jitter] fix llvm &gt;= 5.0 build break</li>
</ul>
<p>Timothy Arceri (2):</p>
<ul>
<li>glsl: fix lower jumps for returns when loop is inside an if</li>
<li>mesa: update lower_jumps tests after bug fix</li>
</ul>
<p>Topi Pohjolainen (1):</p>
<ul>
<li>i965/gen8+: Do full stall when switching pipeline</li>
</ul>
<p>Xu Randy (2):</p>
<ul>
<li>anv/blorp: Fix a crash in CmdClearColorImage</li>
<li>anv/genX: Solve the vkCreateGraphicsPipelines crash</li>
</ul>
</div>
</body>
</html>

View File

@@ -1,149 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.0.4 Release Notes / April 17, 2017</h1>
<p>
Mesa 17.0.4 is a bug fix release which fixes bugs found since the 17.0.3 release.
</p>
<p>
Mesa 17.0.4 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
c4c34ba05d48f76b45bc05bc4b6e9242077f403d63c4f0c355c7b07786de233e mesa-17.0.4.tar.gz
1269dc8545a193932a0779b2db5bce9be4a5f6813b98c38b93b372be8362a346 mesa-17.0.4.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99515">Bug 99515</a> - SIGSEGV MAPERR on Android nougat-x86 with mesa 17.0.0rc</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100391">Bug 100391</a> - SachaWillems deferredmultisampling asserts</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100452">Bug 100452</a> - push_constants host memory leak when resetting command buffer</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100582">Bug 100582</a> - [GEN8+] piglit.spec.arb_stencil_texturing.glblitframebuffer corrupts state.gl_texture* assertions</li>
</ul>
<h2>Changes</h2>
<p>Alex Deucher (1):</p>
<ul>
<li>radeonsi: add new polaris10 pci id</li>
</ul>
<p>Alex Smith (1):</p>
<ul>
<li>radv: Invalidate L2 for TRANSFER_WRITE barriers</li>
</ul>
<p>Andres Gomez (1):</p>
<ul>
<li>docs: add sha256 checksums for 17.0.3</li>
</ul>
<p>Craig Stout (1):</p>
<ul>
<li>anv/cmd_buffer: fix host memory leak</li>
</ul>
<p>Emil Velikov (3):</p>
<ul>
<li>Revert "cherry-ignore: add the Flush after unmap in gbm/dri fix"</li>
<li>Revert "freedreno: fix memory leak"</li>
<li>Update version to 17.0.4</li>
</ul>
<p>Fabio Estevam (1):</p>
<ul>
<li>loader: Move non-error message to debug level</li>
</ul>
<p>Ilia Mirkin (4):</p>
<ul>
<li>nvc0/ir: fix LSB/BFE/BFI implementations</li>
<li>nvc0/ir: fix overwriting of offset register with interpolateAtOffset</li>
<li>nvc0: increase texture buffer object alignment to 256 for pre-GM107</li>
<li>nouveau: when mapping a persistent buffer, synchronize on former xfers</li>
</ul>
<p>Jason Ekstrand (5):</p>
<ul>
<li>i965/fs: Always provide a default LOD of 0 for TXS and TXL</li>
<li>anv/pipeline: Properly handle unset gl_Layer and gl_ViewportIndex</li>
<li>anv/blorp: Align vertex buffers to 64B</li>
<li>i965/blorp: Align vertex buffers to 64B</li>
<li>i965/blorp: Bump the batch space estimate</li>
</ul>
<p>Jerome Duval (2):</p>
<ul>
<li>haiku: build fixes around debug defines</li>
<li>haiku/winsys: fix dt prototype args</li>
</ul>
<p>Julien Isorce (4):</p>
<ul>
<li>winsys/radeon: check null in radeon_cs_create_fence</li>
<li>winsys/radeon: check null return from radeon_cs_create_fence in cs_flush</li>
<li>radeon: initialize hole variable before calling container_of</li>
<li>radeon_drm_bo: explicitly check return value of drmCommandWriteRead</li>
</ul>
<p>Kenneth Graunke (4):</p>
<ul>
<li>i965: Document the sad story of the kernel command parser.</li>
<li>i965: Set screen-&gt;cmd_parser_version to 0 if we can't write registers.</li>
<li>i965: Skip register write detection when possible.</li>
<li>i965: Set kernel features before computing max GL version.</li>
</ul>
<p>Marek Olšák (1):</p>
<ul>
<li>targets: export radeon winsys_create functions to silence LLVM warning</li>
</ul>
<p>Michal Srb (1):</p>
<ul>
<li>st: Add cubeMapFace parameter to st_finalize_texture.</li>
</ul>
<p>Thomas Hellstrom (1):</p>
<ul>
<li>gbm/dri: Flush after unmap</li>
</ul>
</div>
</body>
</html>

View File

@@ -1,144 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.0.5 Release Notes / April 28, 2017</h1>
<p>
Mesa 17.0.5 is a bug fix release which fixes bugs found since the 17.0.4 release.
</p>
<p>
Mesa 17.0.5 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
7510eee0d0077860b250d30d73305048c2df4ba09ea8fc04e4f3eec7beece301 mesa-17.0.5.tar.gz
668efa445d2f57a26e5c096b1965a685733a3b57d9c736f9d6460263847f9bfe mesa-17.0.5.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97524">Bug 97524</a> - Samplers referring to the same texture unit with different types should raise GL_INVALID_OPERATION</li>
</ul>
<h2>Changes</h2>
<p>Andres Gomez (16):</p>
<ul>
<li>cherry-ignore: Add the pci_id into the shader cache UUID</li>
<li>cherry-ignore: fix crash if ctx torn down with no rendering</li>
<li>cherry-ignore: Fix typos.</li>
<li>cherry-ignore: Revert "etnaviv: Cannot render to rb-swapped formats"</li>
<li>cherry-ignore: Revert "i965/fs: Don't emit SEL instructions for type-converting MOVs."</li>
<li>cherry-ignore: fix typo in a2b10g10r10 fast clear calculation</li>
<li>cherry-ignore: remove unused anv_dispatch_table dtable</li>
<li>cherry-ignore: remove unused radv_dispatch_table dtable</li>
<li>cherry-ignore: make radv_resolve_entrypoint static</li>
<li>cherry-ignore: vulkan: add support for libmesa_vulkan_util</li>
<li>cherry-ignore: r600: fix libmesa_amd_common dependency</li>
<li>cherry-ignore: remove dead brw_new_shader() declaration</li>
<li>cherry-ignore: remove i965_symbols_test reference from .gitignore</li>
<li>cherry-ignore: automake: ensure that the destination directory is created</li>
<li>cherry-ignore: provide required gem stubs for the tests</li>
<li>Update version to 17.0.5</li>
</ul>
<p>Boyan Ding (2):</p>
<ul>
<li>nvc0/ir: Properly handle a "split form" of predicate destination</li>
<li>nir: Destination component count of shader_clock intrinsic is 2</li>
</ul>
<p>Emil Velikov (5):</p>
<ul>
<li>docs: add sha256 checksums for 17.0.4</li>
<li>winsys/sw/dri: don't use GNU void pointer arithmetic</li>
<li>st/clover: add space between &lt; and ::</li>
<li>configure.ac: check require_basic_egl only if egl enabled</li>
<li>st/mesa: automake: honour the vdpau header install location</li>
</ul>
<p>Francisco Jerez (2):</p>
<ul>
<li>intel/fs: Use regs_written() in spilling cost heuristic for improved accuracy.</li>
<li>intel/fs: Take into account amount of data read in spilling cost heuristic.</li>
</ul>
<p>Grazvydas Ignotas (1):</p>
<ul>
<li>radv: report timestampPeriod correctly</li>
</ul>
<p>Jason Ekstrand (5):</p>
<ul>
<li>anv/blorp: Flush the texture cache in UpdateBuffer</li>
<li>anv/cmd_buffer: Flush the VF cache at the top of all primaries</li>
<li>anv/cmd_buffer: Always set up a null surface state</li>
<li>anv/cmd_buffer: Use the null surface state for ATTACHMENT_UNUSED</li>
<li>anv/blorp: Properly handle VK_ATTACHMENT_UNUSED</li>
</ul>
<p>Kenneth Graunke (1):</p>
<ul>
<li>i965/vec4: Avoid reswizzling MACH instructions in opt_register_coalesce().</li>
</ul>
<p>Marek Olšák (1):</p>
<ul>
<li>st/mesa: invalidate the readpix cache in st_indirect_draw_vbo</li>
</ul>
<p>Nanley Chery (1):</p>
<ul>
<li>anv/cmd_buffer: Disable CCS on BDW input attachments</li>
</ul>
<p>Nicolai Hähnle (4):</p>
<ul>
<li>mesa: fix remaining xfb prims check for GLES with multiple instances</li>
<li>mesa: extract need_xfb_remaining_prims_check</li>
<li>mesa: move glMultiDrawArrays to vbo and fix error handling</li>
<li>vbo: fix gl_DrawID handling in glMultiDrawArrays</li>
</ul>
<p>Rob Clark (1):</p>
<ul>
<li>util/queue: don't hang at exit</li>
</ul>
<p>Timothy Arceri (1):</p>
<ul>
<li>mesa: validate sampler type across the whole program</li>
</ul>
</div>
</body>
</html>

View File

@@ -1,186 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.0.6 Release Notes / May 12, 2017</h1>
<p>
Mesa 17.0.6 is a bug fix release which fixes bugs found since the 17.0.5 release.
</p>
<p>
Mesa 17.0.6 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
f1b2497d553e9a584f0caa3a2d9d310e27ead15fb0af170da69f6e70fb5031cd mesa-17.0.6.tar.gz
89ecf3bcd0f18dcca5aaa42bf36bb52a2df33be89889f94aaaad91f7a504a69d mesa-17.0.6.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98428">Bug 98428</a> - Undefined non-weak-symbol in dri-drivers</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100854">Bug 100854</a> - YUV to RGB Color Space Conversion result is not precise</li>
</ul>
<h2>Changes</h2>
<p>Adam Jackson (1):</p>
<ul>
<li>egl/platform/drm: Don't take display ownership until gbm is initialized</li>
</ul>
<p>Andres Gomez (7):</p>
<ul>
<li>docs: add sha256 checksums for 17.0.5</li>
<li>travis: replace Trusty-based LLVM toolchain apt-get with apt addon</li>
<li>travis: add the possibility of using the txc-dxtn library</li>
<li>cherry-ignore: 17.1 nominations only</li>
<li>cherry-ignore: fix regression in descriptor set freeing.</li>
<li>cherry-ignore: rejected commits</li>
<li>Update version to 17.0.6</li>
</ul>
<p>Ben Boeckel (1):</p>
<ul>
<li>scons: update for LLVM 4.0</li>
</ul>
<p>Brian Paul (1):</p>
<ul>
<li>st/mesa: move duplicated st_ws_framebuffer() function into header file</li>
</ul>
<p>Chad Versace (3):</p>
<ul>
<li>egl: Emit error when EGLSurface is lost</li>
<li>egl/android: Cancel any outstanding ANativeBuffer in surface destructor</li>
<li>egl/android: Mark surface as lost when dequeueBuffer fails</li>
</ul>
<p>Christian Gmeiner (1):</p>
<ul>
<li>etnaviv: add L8A8_UNORM texture format</li>
</ul>
<p>Dave Airlie (2):</p>
<ul>
<li>radv/wsi: report presentation error per image request</li>
<li>radv: enable POLARIS12 support.</li>
</ul>
<p>Emil Velikov (21):</p>
<ul>
<li>travis: correct libdrm required regex to also track libdrm itself</li>
<li>travis: add nearly all gallium drivers to the list</li>
<li>travis: use both cores for make/make check</li>
<li>travis: bring the scons build on par with AppVeyor</li>
<li>travis: explicitly LD_LIBRARY_PATH the local libraries</li>
<li>travis: enable apt cache</li>
<li>travis: automatically manage ccache caching</li>
<li>travis: remove unused -dev packages</li>
<li>travis: rework "if test" blocks in the script section</li>
<li>travis: split out matrix from env</li>
<li>travis: add separate "scons" and "scons llvm" targets</li>
<li>travis: add "scons swr" to the build matrix</li>
<li>travis: add "make swr" to the build matrix</li>
<li>travis: split the make target to three separate ones</li>
<li>travis: model scons check target like the make one</li>
<li>travis: add Gallium state-tracker targets</li>
<li>travis: enable wayland support</li>
<li>travis: bump MAKEFLAGS to -j4</li>
<li>gallium/dri: always link against shared glapi</li>
<li>mesa/dri: always link against shared glapi</li>
<li>glx: glX_proto_send.py: use correct compile guard GLX_INDIRECT_RENDERING</li>
</ul>
<p>Eric Anholt (1):</p>
<ul>
<li>nir: Pick just the channels we want for bitmap and drawpixels lowering.</li>
</ul>
<p>Ilia Mirkin (1):</p>
<ul>
<li>gallium/targets: fix bool setting on BE architectures</li>
</ul>
<p>Jason Ekstrand (1):</p>
<ul>
<li>anv/cmd_buffer: Use the device allocator for QueueSubmit</li>
</ul>
<p>Johnson Lin (1):</p>
<ul>
<li>nir/lower_tex: Fix minor error in YUV color conversion matrix</li>
</ul>
<p>Marek Olšák (2):</p>
<ul>
<li>radeonsi: adjust ESGS ring buffer size computation on VI</li>
<li>radeonsi: apply the tess+GS hang workaround to Polaris12 as well</li>
</ul>
<p>Nicolai Hähnle (1):</p>
<ul>
<li>radeonsi: fix gl_PrimitiveID in tessellation with instanced draws on SI</li>
</ul>
<p>Philipp Zabel (3):</p>
<ul>
<li>renderonly: close transfer prime_fd</li>
<li>renderonly: drop resources on destroy</li>
<li>renderonly: use drmIoctl</li>
</ul>
<p>Rhys Kidd (3):</p>
<ul>
<li>travis: Support LLVM 3.8+ on Trusty-based Travis-CI via apt-get not apt addon</li>
<li>travis: Add radv vulkan driver to continuous integration</li>
<li>travis: Add radeonsi to continuous integration</li>
</ul>
<p>Rob Clark (1):</p>
<ul>
<li>freedreno/a3xx: fix hang w/ large render targets and small gmem</li>
</ul>
<p>Samuel Iglesias Gonsálvez (5):</p>
<ul>
<li>i965/vec4: fix vertical stride to avoid breaking region parameter rule</li>
<li>i965/vec4: fix register width for DF VGRF and UNIFORM</li>
<li>i965/vec4: don't modify regioning parameters to the sources of DF align1 instructions</li>
<li>anv: anv_gem_mmap() returns MAP_FAILED as mapping error</li>
<li>anv: vkBindImageMemory() should return VK_ERROR_OUT_OF_{HOST,DEVICE}_MEMORY on failure</li>
</ul>
</div>
</body>
</html>

View File

@@ -1,145 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.0.7 Release Notes / June 1, 2017</h1>
<p>
Mesa 17.0.7 is a bug fix release which fixes bugs found since the 17.0.6 release.
</p>
<p>
Mesa 17.0.7 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
bc68d13c6b1a053b855ac453ebf7e62bd89511adf44bad6c613e09f7fa13390a mesa-17.0.7.tar.gz
f6d75304a229c8d10443e219d6b6c0c342567dbab5a879ebe7cfa3c9139c4492 mesa-17.0.7.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98833">Bug 98833</a> - [REGRESSION, bisected] Wayland revert commit breaks non-Vsync fullscreen frame updates</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100741">Bug 100741</a> - Chromium - Memory leak</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100925">Bug 100925</a> - [HSW/BSW/BDW/SKL] Google Earth is not resolving all the details in the map correctly</li>
</ul>
<h2>Changes</h2>
<p>Andres Gomez (1):</p>
<ul>
<li>docs: add sha256 checksums for 17.0.6</li>
</ul>
<p>Bartosz Tomczyk (1):</p>
<ul>
<li>mesa: Avoid leaking surface in st_renderbuffer_delete</li>
</ul>
<p>Chad Versace (1):</p>
<ul>
<li>egl: Partially revert 23c86c74, fix eglMakeCurrent</li>
</ul>
<p>Daniel Stone (7):</p>
<ul>
<li>vulkan: Fix Wayland uninitialised registry</li>
<li>vulkan/wsi/wayland: Remove roundtrip when creating image</li>
<li>vulkan/wsi/wayland: Use per-display event queue</li>
<li>vulkan/wsi/wayland: Use proxy wrappers for swapchain</li>
<li>egl/wayland: Don't open-code roundtrip</li>
<li>egl/wayland: Use per-surface event queues</li>
<li>egl/wayland: Ensure we get a back buffer</li>
</ul>
<p>Emil Velikov (5):</p>
<ul>
<li>st/va: fix misplaced closing bracket</li>
<li>anv: automake: list shared libraries after the static ones</li>
<li>radv: automake: list shared libraries after the static ones</li>
<li>egl/wayland: select the format based on the interface used</li>
<li>Update version to 17.0.7</li>
</ul>
<p>Eric Anholt (2):</p>
<ul>
<li>renderonly: Initialize fields of struct winsys_handle.</li>
<li>vc4: Don't allocate new BOs to avoid synchronization when they're shared.</li>
</ul>
<p>Hans de Goede (1):</p>
<ul>
<li>glxglvnddispatch: Add missing dispatch for GetDriverConfig</li>
</ul>
<p>Ilia Mirkin (1):</p>
<ul>
<li>nvc0/ir: SHLADD's middle source must be an immediate</li>
</ul>
<p>Jason Ekstrand (2):</p>
<ul>
<li>i965/blorp: Do and end-of-pipe sync on both sides of fast-clear ops</li>
<li>i965: Round copy size to the nearest block in intel_miptree_copy</li>
</ul>
<p>Lucas Stach (1):</p>
<ul>
<li>etnaviv: stop oversizing buffer resources</li>
</ul>
<p>Nanley Chery (2):</p>
<ul>
<li>anv/formats: Update the three-channel BC1 mappings</li>
<li>i965/formats: Update the three-channel DXT1 mappings</li>
</ul>
<p>Pohjolainen, Topi (1):</p>
<ul>
<li>intel/isl/gen7: Use stencil vertical alignment of 8 instead of 4</li>
</ul>
<p>Samuel Iglesias Gonsálvez (3):</p>
<ul>
<li>i965/vec4/gs: restore the uniform values which was overwritten by failed vec4_gs_visitor execution</li>
<li>i965/vec4: fix swizzle and writemask when loading an uniform with constant offset</li>
<li>i965/vec4: load dvec3/4 uniforms first in the push constant buffer</li>
</ul>
<p>Tom Stellard (1):</p>
<ul>
<li>gallivm: Make sure module has the correct data layout when pass manager runs</li>
</ul>
</div>
</body>
</html>

View File

@@ -1164,6 +1164,8 @@ struct __DRIdri2ExtensionRec {
#define __DRI_IMAGE_FORMAT_ARGB2101010 0x100a
#define __DRI_IMAGE_FORMAT_SARGB8 0x100b
#define __DRI_IMAGE_FORMAT_ARGB1555 0x100c
#define __DRI_IMAGE_FORMAT_R16 0x100d
#define __DRI_IMAGE_FORMAT_GR1616 0x100e
#define __DRI_IMAGE_USE_SHARE 0x0001
#define __DRI_IMAGE_USE_SCANOUT 0x0002
@@ -1192,6 +1194,8 @@ struct __DRIdri2ExtensionRec {
#define __DRI_IMAGE_FOURCC_R8 0x20203852
#define __DRI_IMAGE_FOURCC_GR88 0x38385247
#define __DRI_IMAGE_FOURCC_ARGB1555 0x35315241
#define __DRI_IMAGE_FOURCC_R16 0x20363152
#define __DRI_IMAGE_FOURCC_GR1616 0x32335247
#define __DRI_IMAGE_FOURCC_RGB565 0x36314752
#define __DRI_IMAGE_FOURCC_ARGB8888 0x34325241
#define __DRI_IMAGE_FOURCC_XRGB8888 0x34325258

View File

@@ -30,9 +30,6 @@
#define EMULATED_THREADS_H_INCLUDED_
#include <time.h>
#ifdef _MSC_VER
#include <thr/xtimec.h> // for xtime
#endif
#ifndef TIME_UTC
#define TIME_UTC 1
@@ -44,13 +41,11 @@
typedef void (*tss_dtor_t)(void*);
typedef int (*thrd_start_t)(void*);
#ifndef _MSC_VER
struct xtime {
time_t sec;
long nsec;
};
typedef struct xtime xtime;
#endif
/*-------------------- enumeration constants --------------------*/

View File

@@ -202,7 +202,6 @@ CHIPSET(0x67C9, POLARIS10_, POLARIS10)
CHIPSET(0x67CA, POLARIS10_, POLARIS10)
CHIPSET(0x67CC, POLARIS10_, POLARIS10)
CHIPSET(0x67CF, POLARIS10_, POLARIS10)
CHIPSET(0x67D0, POLARIS10_, POLARIS10)
CHIPSET(0x67DF, POLARIS10_, POLARIS10)
CHIPSET(0x98E4, STONEY_, STONEY)
@@ -212,5 +211,4 @@ CHIPSET(0x6981, POLARIS12_, POLARIS12)
CHIPSET(0x6985, POLARIS12_, POLARIS12)
CHIPSET(0x6986, POLARIS12_, POLARIS12)
CHIPSET(0x6987, POLARIS12_, POLARIS12)
CHIPSET(0x6995, POLARIS12_, POLARIS12)
CHIPSET(0x699F, POLARIS12_, POLARIS12)

View File

@@ -6,7 +6,7 @@ extern "C" {
#endif
/*
** Copyright (c) 2015-2016 The Khronos Group Inc.
** Copyright (c) 2015-2017 The Khronos Group Inc.
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ extern "C" {
#define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
#define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
// Version of this file
#define VK_HEADER_VERSION 38
#define VK_HEADER_VERSION 39
#define VK_NULL_HANDLE 0
@@ -145,6 +145,7 @@ typedef enum VkResult {
VK_ERROR_INCOMPATIBLE_DISPLAY_KHR = -1000003001,
VK_ERROR_VALIDATION_FAILED_EXT = -1000011001,
VK_ERROR_INVALID_SHADER_NV = -1000012000,
VK_ERROR_OUT_OF_POOL_MEMORY_KHR = -1000069000,
VK_RESULT_BEGIN_RANGE = VK_ERROR_FRAGMENTED_POOL,
VK_RESULT_END_RANGE = VK_INCOMPLETE,
VK_RESULT_RANGE_SIZE = (VK_INCOMPLETE - VK_ERROR_FRAGMENTED_POOL + 1),
@@ -225,13 +226,28 @@ typedef enum VkStructureType {
VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_NV = 1000057000,
VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_NV = 1000057001,
VK_STRUCTURE_TYPE_WIN32_KEYED_MUTEX_ACQUIRE_RELEASE_INFO_NV = 1000058000,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR = 1000059000,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR = 1000059001,
VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2_KHR = 1000059002,
VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR = 1000059003,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR = 1000059004,
VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2_KHR = 1000059005,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2_KHR = 1000059006,
VK_STRUCTURE_TYPE_SPARSE_IMAGE_FORMAT_PROPERTIES_2_KHR = 1000059007,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SPARSE_IMAGE_FORMAT_INFO_2_KHR = 1000059008,
VK_STRUCTURE_TYPE_VALIDATION_FLAGS_EXT = 1000061000,
VK_STRUCTURE_TYPE_VI_SURFACE_CREATE_INFO_NN = 1000062000,
VK_STRUCTURE_TYPE_OBJECT_TABLE_CREATE_INFO_NVX = 1000086000,
VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NVX = 1000086001,
VK_STRUCTURE_TYPE_CMD_PROCESS_COMMANDS_INFO_NVX = 1000086002,
VK_STRUCTURE_TYPE_CMD_RESERVE_SPACE_FOR_COMMANDS_INFO_NVX = 1000086003,
VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_LIMITS_NVX = 1000086004,
VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_FEATURES_NVX = 1000086005,
VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT = 1000090000,
VK_STRUCTURE_TYPE_DISPLAY_POWER_INFO_EXT = 1000091000,
VK_STRUCTURE_TYPE_DEVICE_EVENT_INFO_EXT = 1000091001,
VK_STRUCTURE_TYPE_DISPLAY_EVENT_INFO_EXT = 1000091002,
VK_STRUCTURE_TYPE_SWAPCHAIN_COUNTER_CREATE_INFO_EXT = 1000091003,
VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO,
VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO,
VK_STRUCTURE_TYPE_RANGE_SIZE = (VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1),
@@ -840,6 +856,8 @@ typedef enum VkFormatFeatureFlagBits {
VK_FORMAT_FEATURE_BLIT_DST_BIT = 0x00000800,
VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT = 0x00001000,
VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_IMG = 0x00002000,
VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR = 0x00004000,
VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR = 0x00008000,
VK_FORMAT_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
} VkFormatFeatureFlagBits;
typedef VkFlags VkFormatFeatureFlags;
@@ -863,6 +881,7 @@ typedef enum VkImageCreateFlagBits {
VK_IMAGE_CREATE_SPARSE_ALIASED_BIT = 0x00000004,
VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000008,
VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000010,
VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT_KHR = 0x00000020,
VK_IMAGE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
} VkImageCreateFlagBits;
typedef VkFlags VkImageCreateFlags;
@@ -3206,6 +3225,18 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSurfaceKHR)
typedef enum VkColorSpaceKHR {
VK_COLOR_SPACE_SRGB_NONLINEAR_KHR = 0,
VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT = 1000104001,
VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT = 1000104002,
VK_COLOR_SPACE_SCRGB_LINEAR_EXT = 1000104003,
VK_COLOR_SPACE_SCRGB_NONLINEAR_EXT = 1000104004,
VK_COLOR_SPACE_DCI_P3_LINEAR_EXT = 1000104005,
VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT = 1000104006,
VK_COLOR_SPACE_BT709_LINEAR_EXT = 1000104007,
VK_COLOR_SPACE_BT709_NONLINEAR_EXT = 1000104008,
VK_COLOR_SPACE_BT2020_LINEAR_EXT = 1000104009,
VK_COLOR_SPACE_BT2020_NONLINEAR_EXT = 1000104010,
VK_COLOR_SPACE_ADOBERGB_LINEAR_EXT = 1000104011,
VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT = 1000104012,
VK_COLOR_SPACE_BEGIN_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
VK_COLOR_SPACE_END_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
VK_COLOR_SPACE_RANGE_SIZE_KHR = (VK_COLOR_SPACE_SRGB_NONLINEAR_KHR - VK_COLOR_SPACE_SRGB_NONLINEAR_KHR + 1),
@@ -3741,6 +3772,136 @@ VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceWin32PresentationSupportKHR(
#define VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME "VK_KHR_sampler_mirror_clamp_to_edge"
#define VK_KHR_get_physical_device_properties2 1
#define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_SPEC_VERSION 1
#define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME "VK_KHR_get_physical_device_properties2"
typedef struct VkPhysicalDeviceFeatures2KHR {
VkStructureType sType;
void* pNext;
VkPhysicalDeviceFeatures features;
} VkPhysicalDeviceFeatures2KHR;
typedef struct VkPhysicalDeviceProperties2KHR {
VkStructureType sType;
void* pNext;
VkPhysicalDeviceProperties properties;
} VkPhysicalDeviceProperties2KHR;
typedef struct VkFormatProperties2KHR {
VkStructureType sType;
void* pNext;
VkFormatProperties formatProperties;
} VkFormatProperties2KHR;
typedef struct VkImageFormatProperties2KHR {
VkStructureType sType;
void* pNext;
VkImageFormatProperties imageFormatProperties;
} VkImageFormatProperties2KHR;
typedef struct VkPhysicalDeviceImageFormatInfo2KHR {
VkStructureType sType;
const void* pNext;
VkFormat format;
VkImageType type;
VkImageTiling tiling;
VkImageUsageFlags usage;
VkImageCreateFlags flags;
} VkPhysicalDeviceImageFormatInfo2KHR;
typedef struct VkQueueFamilyProperties2KHR {
VkStructureType sType;
void* pNext;
VkQueueFamilyProperties queueFamilyProperties;
} VkQueueFamilyProperties2KHR;
typedef struct VkPhysicalDeviceMemoryProperties2KHR {
VkStructureType sType;
void* pNext;
VkPhysicalDeviceMemoryProperties memoryProperties;
} VkPhysicalDeviceMemoryProperties2KHR;
typedef struct VkSparseImageFormatProperties2KHR {
VkStructureType sType;
void* pNext;
VkSparseImageFormatProperties properties;
} VkSparseImageFormatProperties2KHR;
typedef struct VkPhysicalDeviceSparseImageFormatInfo2KHR {
VkStructureType sType;
const void* pNext;
VkFormat format;
VkImageType type;
VkSampleCountFlagBits samples;
VkImageUsageFlags usage;
VkImageTiling tiling;
} VkPhysicalDeviceSparseImageFormatInfo2KHR;
typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFeatures2KHR)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2KHR* pFeatures);
typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceProperties2KHR)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2KHR* pProperties);
typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceFormatProperties2KHR)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties2KHR* pFormatProperties);
typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceImageFormatProperties2KHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceImageFormatInfo2KHR* pImageFormatInfo, VkImageFormatProperties2KHR* pImageFormatProperties);
typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR)(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties2KHR* pQueueFamilyProperties);
typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceMemoryProperties2KHR)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties2KHR* pMemoryProperties);
typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2KHR* pFormatInfo, uint32_t* pPropertyCount, VkSparseImageFormatProperties2KHR* pProperties);
#ifndef VK_NO_PROTOTYPES
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFeatures2KHR(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures2KHR* pFeatures);
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceProperties2KHR(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties2KHR* pProperties);
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFormatProperties2KHR(
VkPhysicalDevice physicalDevice,
VkFormat format,
VkFormatProperties2KHR* pFormatProperties);
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceImageFormatProperties2KHR(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceImageFormatInfo2KHR* pImageFormatInfo,
VkImageFormatProperties2KHR* pImageFormatProperties);
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyProperties2KHR(
VkPhysicalDevice physicalDevice,
uint32_t* pQueueFamilyPropertyCount,
VkQueueFamilyProperties2KHR* pQueueFamilyProperties);
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceMemoryProperties2KHR(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceMemoryProperties2KHR* pMemoryProperties);
VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceSparseImageFormatProperties2KHR(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceSparseImageFormatInfo2KHR* pFormatInfo,
uint32_t* pPropertyCount,
VkSparseImageFormatProperties2KHR* pProperties);
#endif
#define VK_KHR_shader_draw_parameters 1
#define VK_KHR_SHADER_DRAW_PARAMETERS_SPEC_VERSION 1
#define VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME "VK_KHR_shader_draw_parameters"
#define VK_KHR_maintenance1 1
#define VK_KHR_MAINTENANCE1_SPEC_VERSION 1
#define VK_KHR_MAINTENANCE1_EXTENSION_NAME "VK_KHR_maintenance1"
typedef VkFlags VkCommandPoolTrimFlagsKHR;
typedef void (VKAPI_PTR *PFN_vkTrimCommandPoolKHR)(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlagsKHR flags);
#ifndef VK_NO_PROTOTYPES
VKAPI_ATTR void VKAPI_CALL vkTrimCommandPoolKHR(
VkDevice device,
VkCommandPool commandPool,
VkCommandPoolTrimFlagsKHR flags);
#endif
#define VK_EXT_debug_report 1
VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugReportCallbackEXT)
@@ -4167,6 +4328,42 @@ typedef struct VkValidationFlagsEXT {
#ifdef VK_USE_PLATFORM_VI_NN
#define VK_NN_vi_surface 1
#define VK_NN_VI_SURFACE_SPEC_VERSION 1
#define VK_NN_VI_SURFACE_EXTENSION_NAME "VK_NN_vi_surface"
typedef VkFlags VkViSurfaceCreateFlagsNN;
typedef struct VkViSurfaceCreateInfoNN {
VkStructureType sType;
const void* pNext;
VkViSurfaceCreateFlagsNN flags;
void* window;
} VkViSurfaceCreateInfoNN;
typedef VkResult (VKAPI_PTR *PFN_vkCreateViSurfaceNN)(VkInstance instance, const VkViSurfaceCreateInfoNN* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface);
#ifndef VK_NO_PROTOTYPES
VKAPI_ATTR VkResult VKAPI_CALL vkCreateViSurfaceNN(
VkInstance instance,
const VkViSurfaceCreateInfoNN* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSurfaceKHR* pSurface);
#endif
#endif /* VK_USE_PLATFORM_VI_NN */
#define VK_EXT_shader_subgroup_ballot 1
#define VK_EXT_SHADER_SUBGROUP_BALLOT_SPEC_VERSION 1
#define VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME "VK_EXT_shader_subgroup_ballot"
#define VK_EXT_shader_subgroup_vote 1
#define VK_EXT_SHADER_SUBGROUP_VOTE_SPEC_VERSION 1
#define VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME "VK_EXT_shader_subgroup_vote"
#define VK_NVX_device_generated_commands 1
VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkObjectTableNVX)
VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectCommandsLayoutNVX)
@@ -4322,6 +4519,7 @@ typedef struct VkObjectTableIndexBufferEntryNVX {
VkObjectEntryTypeNVX type;
VkObjectEntryUsageFlagsNVX flags;
VkBuffer buffer;
VkIndexType indexType;
} VkObjectTableIndexBufferEntryNVX;
typedef struct VkObjectTablePushConstantEntryNVX {
@@ -4393,6 +4591,171 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX(
VkDeviceGeneratedCommandsLimitsNVX* pLimits);
#endif
#define VK_EXT_direct_mode_display 1
#define VK_EXT_DIRECT_MODE_DISPLAY_SPEC_VERSION 1
#define VK_EXT_DIRECT_MODE_DISPLAY_EXTENSION_NAME "VK_EXT_direct_mode_display"
typedef VkResult (VKAPI_PTR *PFN_vkReleaseDisplayEXT)(VkPhysicalDevice physicalDevice, VkDisplayKHR display);
#ifndef VK_NO_PROTOTYPES
VKAPI_ATTR VkResult VKAPI_CALL vkReleaseDisplayEXT(
VkPhysicalDevice physicalDevice,
VkDisplayKHR display);
#endif
#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
#define VK_EXT_acquire_xlib_display 1
#include <X11/extensions/Xrandr.h>
#define VK_EXT_ACQUIRE_XLIB_DISPLAY_SPEC_VERSION 1
#define VK_EXT_ACQUIRE_XLIB_DISPLAY_EXTENSION_NAME "VK_EXT_acquire_xlib_display"
typedef VkResult (VKAPI_PTR *PFN_vkAcquireXlibDisplayEXT)(VkPhysicalDevice physicalDevice, Display* dpy, VkDisplayKHR display);
typedef VkResult (VKAPI_PTR *PFN_vkGetRandROutputDisplayEXT)(VkPhysicalDevice physicalDevice, Display* dpy, RROutput rrOutput, VkDisplayKHR* pDisplay);
#ifndef VK_NO_PROTOTYPES
VKAPI_ATTR VkResult VKAPI_CALL vkAcquireXlibDisplayEXT(
VkPhysicalDevice physicalDevice,
Display* dpy,
VkDisplayKHR display);
VKAPI_ATTR VkResult VKAPI_CALL vkGetRandROutputDisplayEXT(
VkPhysicalDevice physicalDevice,
Display* dpy,
RROutput rrOutput,
VkDisplayKHR* pDisplay);
#endif
#endif /* VK_USE_PLATFORM_XLIB_XRANDR_EXT */
#define VK_EXT_display_surface_counter 1
#define VK_EXT_DISPLAY_SURFACE_COUNTER_SPEC_VERSION 1
#define VK_EXT_DISPLAY_SURFACE_COUNTER_EXTENSION_NAME "VK_EXT_display_surface_counter"
typedef enum VkSurfaceCounterFlagBitsEXT {
VK_SURFACE_COUNTER_VBLANK_EXT = 0x00000001,
VK_SURFACE_COUNTER_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF
} VkSurfaceCounterFlagBitsEXT;
typedef VkFlags VkSurfaceCounterFlagsEXT;
typedef struct VkSurfaceCapabilities2EXT {
VkStructureType sType;
void* pNext;
uint32_t minImageCount;
uint32_t maxImageCount;
VkExtent2D currentExtent;
VkExtent2D minImageExtent;
VkExtent2D maxImageExtent;
uint32_t maxImageArrayLayers;
VkSurfaceTransformFlagsKHR supportedTransforms;
VkSurfaceTransformFlagBitsKHR currentTransform;
VkCompositeAlphaFlagsKHR supportedCompositeAlpha;
VkImageUsageFlags supportedUsageFlags;
VkSurfaceCounterFlagsEXT supportedSurfaceCounters;
} VkSurfaceCapabilities2EXT;
typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceCapabilities2EXT)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, VkSurfaceCapabilities2EXT* pSurfaceCapabilities);
#ifndef VK_NO_PROTOTYPES
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilities2EXT(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
VkSurfaceCapabilities2EXT* pSurfaceCapabilities);
#endif
#define VK_EXT_display_control 1
#define VK_EXT_DISPLAY_CONTROL_SPEC_VERSION 1
#define VK_EXT_DISPLAY_CONTROL_EXTENSION_NAME "VK_EXT_display_control"
typedef enum VkDisplayPowerStateEXT {
VK_DISPLAY_POWER_STATE_OFF_EXT = 0,
VK_DISPLAY_POWER_STATE_SUSPEND_EXT = 1,
VK_DISPLAY_POWER_STATE_ON_EXT = 2,
VK_DISPLAY_POWER_STATE_BEGIN_RANGE_EXT = VK_DISPLAY_POWER_STATE_OFF_EXT,
VK_DISPLAY_POWER_STATE_END_RANGE_EXT = VK_DISPLAY_POWER_STATE_ON_EXT,
VK_DISPLAY_POWER_STATE_RANGE_SIZE_EXT = (VK_DISPLAY_POWER_STATE_ON_EXT - VK_DISPLAY_POWER_STATE_OFF_EXT + 1),
VK_DISPLAY_POWER_STATE_MAX_ENUM_EXT = 0x7FFFFFFF
} VkDisplayPowerStateEXT;
typedef enum VkDeviceEventTypeEXT {
VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT = 0,
VK_DEVICE_EVENT_TYPE_BEGIN_RANGE_EXT = VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT,
VK_DEVICE_EVENT_TYPE_END_RANGE_EXT = VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT,
VK_DEVICE_EVENT_TYPE_RANGE_SIZE_EXT = (VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT - VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT + 1),
VK_DEVICE_EVENT_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF
} VkDeviceEventTypeEXT;
typedef enum VkDisplayEventTypeEXT {
VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT = 0,
VK_DISPLAY_EVENT_TYPE_BEGIN_RANGE_EXT = VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT,
VK_DISPLAY_EVENT_TYPE_END_RANGE_EXT = VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT,
VK_DISPLAY_EVENT_TYPE_RANGE_SIZE_EXT = (VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT - VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT + 1),
VK_DISPLAY_EVENT_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF
} VkDisplayEventTypeEXT;
typedef struct VkDisplayPowerInfoEXT {
VkStructureType sType;
const void* pNext;
VkDisplayPowerStateEXT powerState;
} VkDisplayPowerInfoEXT;
typedef struct VkDeviceEventInfoEXT {
VkStructureType sType;
const void* pNext;
VkDeviceEventTypeEXT deviceEvent;
} VkDeviceEventInfoEXT;
typedef struct VkDisplayEventInfoEXT {
VkStructureType sType;
const void* pNext;
VkDisplayEventTypeEXT displayEvent;
} VkDisplayEventInfoEXT;
typedef struct VkSwapchainCounterCreateInfoEXT {
VkStructureType sType;
const void* pNext;
VkSurfaceCounterFlagsEXT surfaceCounters;
} VkSwapchainCounterCreateInfoEXT;
typedef VkResult (VKAPI_PTR *PFN_vkDisplayPowerControlEXT)(VkDevice device, VkDisplayKHR display, const VkDisplayPowerInfoEXT* pDisplayPowerInfo);
typedef VkResult (VKAPI_PTR *PFN_vkRegisterDeviceEventEXT)(VkDevice device, const VkDeviceEventInfoEXT* pDeviceEventInfo, const VkAllocationCallbacks* pAllocator, VkFence* pFence);
typedef VkResult (VKAPI_PTR *PFN_vkRegisterDisplayEventEXT)(VkDevice device, VkDisplayKHR display, const VkDisplayEventInfoEXT* pDisplayEventInfo, const VkAllocationCallbacks* pAllocator, VkFence* pFence);
typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainCounterEXT)(VkDevice device, VkSwapchainKHR swapchain, VkSurfaceCounterFlagBitsEXT counter, uint64_t* pCounterValue);
#ifndef VK_NO_PROTOTYPES
VKAPI_ATTR VkResult VKAPI_CALL vkDisplayPowerControlEXT(
VkDevice device,
VkDisplayKHR display,
const VkDisplayPowerInfoEXT* pDisplayPowerInfo);
VKAPI_ATTR VkResult VKAPI_CALL vkRegisterDeviceEventEXT(
VkDevice device,
const VkDeviceEventInfoEXT* pDeviceEventInfo,
const VkAllocationCallbacks* pAllocator,
VkFence* pFence);
VKAPI_ATTR VkResult VKAPI_CALL vkRegisterDisplayEventEXT(
VkDevice device,
VkDisplayKHR display,
const VkDisplayEventInfoEXT* pDisplayEventInfo,
const VkAllocationCallbacks* pAllocator,
VkFence* pFence);
VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainCounterEXT(
VkDevice device,
VkSwapchainKHR swapchain,
VkSurfaceCounterFlagBitsEXT counter,
uint64_t* pCounterValue);
#endif
#define VK_EXT_swapchain_colorspace 1
#define VK_SWAPCHAIN_COLOR_SPACE_SPEC_VERSION 1
#define VK_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME "VK_EXT_swapchain_colorspace"
#ifdef __cplusplus
}
#endif

View File

@@ -105,26 +105,8 @@ def generate(env):
'HAVE_STDINT_H',
])
env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
# LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter irreader`
if llvm_version >= distutils.version.LooseVersion('4.0'):
env.Prepend(LIBS = [
'LLVMX86Disassembler', 'LLVMX86AsmParser',
'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
'LLVMDebugInfoCodeView', 'LLVMCodeGen',
'LLVMScalarOpts', 'LLVMInstCombine',
'LLVMTransformUtils',
'LLVMBitWriter', 'LLVMX86Desc',
'LLVMMCDisassembler', 'LLVMX86Info',
'LLVMX86AsmPrinter', 'LLVMX86Utils',
'LLVMMCJIT', 'LLVMExecutionEngine', 'LLVMTarget',
'LLVMAnalysis', 'LLVMProfileData',
'LLVMRuntimeDyld', 'LLVMObject', 'LLVMMCParser',
'LLVMBitReader', 'LLVMMC', 'LLVMCore',
'LLVMSupport',
'LLVMIRReader', 'LLVMAsmParser',
'LLVMDemangle', 'LLVMGlobalISel', 'LLVMDebugInfoMSF',
])
elif llvm_version >= distutils.version.LooseVersion('3.9'):
# LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter`
if llvm_version >= distutils.version.LooseVersion('3.9'):
env.Prepend(LIBS = [
'LLVMX86Disassembler', 'LLVMX86AsmParser',
'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',

View File

@@ -55,7 +55,7 @@ LOCAL_C_INCLUDES := \
external/llvm/include \
external/llvm/device/include \
external/libcxx/include \
$(ELF_INCLUDES)
external/elfutils/$(if $(filter 5,$(MESA_ANDROID_MAJOR_VERSION)),0.153/,$(if $(filter 6,$(MESA_ANDROID_MAJOR_VERSION)),src/))libelf
LOCAL_STATIC_LIBRARIES := libLLVMCore

View File

@@ -119,7 +119,6 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family)
case CHIP_POLARIS10:
return "polaris10";
case CHIP_POLARIS11:
case CHIP_POLARIS12:
return "polaris11";
#endif
default:

View File

@@ -264,7 +264,7 @@ static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
LLVMValueRef ptr;
int addr_space;
offset = LLVMConstInt(ctx->i32, idx * 16, false);
offset = LLVMConstInt(ctx->i32, idx, false);
ptr = ctx->shared_memory;
ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
@@ -1267,9 +1267,6 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
src[1] = to_float(ctx, src[1]);
result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
break;
case nir_op_irem:
result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
break;
case nir_op_idiv:
result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
break;
@@ -1721,17 +1718,15 @@ static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
unsigned desc_set = nir_intrinsic_desc_set(instr);
unsigned binding = nir_intrinsic_binding(instr);
LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
unsigned base_offset = layout->binding[binding].offset;
LLVMValueRef offset, stride;
if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
layout->binding[binding].dynamic_offset_offset;
desc_ptr = ctx->push_constants;
base_offset = pipeline_layout->push_constant_size + 16 * idx;
base_offset = ctx->options->layout->push_constant_size;
base_offset += 16 * layout->binding[binding].dynamic_offset_offset;
stride = LLVMConstInt(ctx->i32, 16, false);
} else
stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
@@ -1750,12 +1745,9 @@ static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
nir_intrinsic_instr *instr)
{
LLVMValueRef ptr, addr;
LLVMValueRef ptr;
addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0);
addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx, instr->src[0]), "");
ptr = build_gep0(ctx, ctx->push_constants, addr);
ptr = build_gep0(ctx, ctx->push_constants, get_src(ctx, instr->src[0]));
ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
return LLVMBuildLoad(ctx->builder, ptr, "");
@@ -2088,9 +2080,6 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
LLVMValueRef derived_ptr;
if (indir_index)
indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
for (unsigned chan = 0; chan < ve; chan++) {
LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
if (indir_index)
@@ -2195,9 +2184,6 @@ visit_store_var(struct nir_to_llvm_context *ctx,
ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
LLVMValueRef derived_ptr;
if (indir_index)
indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
for (unsigned chan = 0; chan < 4; chan++) {
if (!(writemask & (1 << chan)))
continue;
@@ -2252,7 +2238,7 @@ static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
}
static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
nir_intrinsic_instr *instr)
nir_intrinsic_instr *instr, bool add_frag_pos)
{
const struct glsl_type *type = instr->variables[0]->var->type;
if(instr->variables[0]->deref.child)
@@ -2267,8 +2253,6 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
LLVMValueRef res;
int count;
enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
@@ -2394,11 +2378,12 @@ static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
} else {
bool is_da = glsl_sampler_type_is_array(type) ||
glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
bool add_frag_pos = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS;
LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero;
LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false);
LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false);
params[0] = get_image_coords(ctx, instr);
params[0] = get_image_coords(ctx, instr, add_frag_pos);
params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
if (HAVE_LLVM <= 0x0309) {
@@ -2457,7 +2442,7 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
LLVMValueRef slc = i1false;
params[0] = to_float(ctx, get_src(ctx, instr->src[2]));
params[1] = get_image_coords(ctx, instr); /* coords */
params[1] = get_image_coords(ctx, instr, false); /* coords */
params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
if (HAVE_LLVM <= 0x0309) {
@@ -2517,7 +2502,7 @@ static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
bool da = glsl_sampler_type_is_array(type) ||
glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
coords = params[param_count++] = get_image_coords(ctx, instr);
coords = params[param_count++] = get_image_coords(ctx, instr, false);
params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
params[param_count++] = i1false; /* r128 */
params[param_count++] = da ? i1true : i1false; /* da */
@@ -3169,15 +3154,6 @@ static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
*fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
}
static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx,
LLVMValueRef coord)
{
coord = to_float(ctx, coord);
coord = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
coord = to_integer(ctx, coord);
return coord;
}
static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
{
LLVMValueRef result = NULL;
@@ -3235,11 +3211,6 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
}
}
if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
result = get_buffer_size(ctx, res_ptr, true);
goto write_result;
}
if (instr->op == nir_texop_texture_samples) {
LLVMValueRef res, samples, is_msaa;
res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
@@ -3339,16 +3310,15 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
/* Pack texture coordinates */
if (coord) {
address[count++] = coords[0];
if (instr->coord_components > 1) {
if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
coords[1] = apply_round_slice(ctx, coords[1]);
}
if (instr->coord_components > 1)
address[count++] = coords[1];
}
if (instr->coord_components > 2) {
/* This seems like a bit of a hack - but it passes Vulkan CTS with it */
if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->op != nir_texop_txf) {
coords[2] = apply_round_slice(ctx, coords[2]);
coords[2] = to_float(ctx, coords[2]);
coords[2] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coords[2],
1, 0);
coords[2] = to_integer(ctx, coords[2]);
}
address[count++] = coords[2];
}
@@ -3407,8 +3377,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
* The sample index should be adjusted as follows:
* sample_index = (fmask >> (sample_index * 4)) & 0xF;
*/
if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
instr->op != nir_texop_txs) {
if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS) {
LLVMValueRef txf_address[4];
struct ac_tex_info txf_info = { 0 };
unsigned txf_count = count;
@@ -4461,13 +4430,6 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
memset(shader_info, 0, sizeof(*shader_info));
LLVMSetTarget(ctx.module, "amdgcn--");
LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
LLVMSetDataLayout(ctx.module, data_layout_str);
LLVMDisposeTargetData(data_layout);
LLVMDisposeMessage(data_layout_str);
setup_types(&ctx);
ctx.builder = LLVMCreateBuilderInContext(ctx.context);
@@ -4495,7 +4457,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
idx++;
}
shared_size *= 16;
shared_size *= 4;
var = LLVMAddGlobalInAddressSpace(ctx.module,
LLVMArrayType(ctx.i8, shared_size),
"compute_lds",

View File

@@ -27,7 +27,6 @@
#include "llvm-c/Core.h"
#include "llvm-c/TargetMachine.h"
#include "amd_family.h"
#include "../vulkan/radv_descriptor_set.h"
struct ac_shader_binary;
struct ac_shader_config;
@@ -77,8 +76,7 @@ enum ac_ud_index {
AC_UD_MAX_UD = AC_UD_VS_MAX_UD,
};
// Match MAX_SETS from radv_descriptor_set.h
#define AC_UD_MAX_SETS MAX_SETS
#define AC_UD_MAX_SETS 4
struct ac_userdata_locations {
struct ac_userdata_info descriptor_sets[AC_UD_MAX_SETS];

View File

@@ -157,7 +157,7 @@ enum {
#define ASICREV_IS_POLARIS10_P(eChipRev)\
((eChipRev >= VI_POLARIS10_P_A0) && (eChipRev < VI_POLARIS11_M_A0))
#define ASICREV_IS_POLARIS11_M(eChipRev) \
(eChipRev >= VI_POLARIS11_M_A0 && eChipRev < VI_POLARIS12_V_A0)
(eChipRev >= VI_POLARIS11_M_A0)
#define ASICREV_IS_POLARIS12_V(eChipRev)\
(eChipRev >= VI_POLARIS12_V_A0)

View File

@@ -21,7 +21,9 @@
include Makefile.sources
noinst_HEADERS = \
vulkan_includedir = $(includedir)/vulkan
vulkan_include_HEADERS = \
$(top_srcdir)/include/vulkan/vk_platform.h \
$(top_srcdir)/include/vulkan/vulkan.h
@@ -57,21 +59,8 @@ VULKAN_SOURCES = \
$(VULKAN_GENERATED_FILES) \
$(VULKAN_FILES)
VULKAN_LIB_DEPS = \
libvulkan_common.la \
$(top_builddir)/src/vulkan/wsi/libvulkan_wsi.la \
$(top_builddir)/src/amd/common/libamd_common.la \
$(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \
$(top_builddir)/src/compiler/nir/libnir.la \
$(top_builddir)/src/util/libmesautil.la \
$(LLVM_LIBS) \
$(LIBELF_LIBS) \
$(PTHREAD_LIBS) \
$(AMDGPU_LIBS) \
$(LIBDRM_LIBS) \
$(PTHREAD_LIBS) \
$(DLOPEN_LIBS) \
-lm
VULKAN_LIB_DEPS =
if HAVE_PLATFORM_X11
AM_CPPFLAGS += \
@@ -103,6 +92,22 @@ endif
noinst_LTLIBRARIES = libvulkan_common.la
libvulkan_common_la_SOURCES = $(VULKAN_SOURCES)
VULKAN_LIB_DEPS += \
libvulkan_common.la \
$(top_builddir)/src/vulkan/wsi/libvulkan_wsi.la \
$(top_builddir)/src/amd/common/libamd_common.la \
$(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \
$(top_builddir)/src/compiler/nir/libnir.la \
$(top_builddir)/src/util/libmesautil.la \
$(LLVM_LIBS) \
$(LIBELF_LIBS) \
$(PTHREAD_LIBS) \
$(AMDGPU_LIBS) \
$(LIBDRM_LIBS) \
$(PTHREAD_LIBS) \
$(DLOPEN_LIBS) \
-lm
nodist_EXTRA_libvulkan_radeon_la_SOURCES = dummy.cpp
libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES)

View File

@@ -1552,7 +1552,7 @@ void radv_CmdBindDescriptorSets(
radv_bind_descriptor_set(cmd_buffer, set, idx);
for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) {
unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
unsigned idx = j + layout->set[i].dynamic_offset_start;
uint32_t *dst = cmd_buffer->dynamic_buffers + idx * 4;
assert(dyn_idx < dynamicOffsetCount);
@@ -1818,9 +1818,6 @@ void radv_CmdExecuteCommands(
{
RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer);
/* Emit pending flushes on primary prior to executing secondary */
si_emit_cache_flush(primary);
for (uint32_t i = 0; i < commandBufferCount; i++) {
RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);
@@ -1830,7 +1827,6 @@ void radv_CmdExecuteCommands(
/* if we execute secondary we need to re-emit out pipelines */
if (commandBufferCount) {
primary->state.emitted_pipeline = NULL;
primary->state.emitted_compute_pipeline = NULL;
primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
primary->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL;
}
@@ -1974,7 +1970,7 @@ void radv_CmdDraw(
static void radv_emit_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
{
uint32_t primitive_reset_index = cmd_buffer->state.index_type ? 0xffffffffu : 0xffffu;
uint32_t primitive_reset_index = cmd_buffer->state.last_primitive_reset_index ? 0xffffffffu : 0xffffu;
if (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
primitive_reset_index != cmd_buffer->state.last_primitive_reset_index) {
@@ -2411,13 +2407,7 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
range.baseMipLevel = 0;
range.levelCount = 1;
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
radv_decompress_depth_image_inplace(cmd_buffer, image, &range);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
}
}
@@ -2580,8 +2570,7 @@ void radv_CmdPipelineBarrier(
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
break;
case VK_ACCESS_TRANSFER_WRITE_BIT:
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_INV_GLOBAL_L2;
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
break;
default:
break;
@@ -2617,7 +2606,6 @@ void radv_CmdPipelineBarrier(
break;
case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
case VK_ACCESS_TRANSFER_READ_BIT:
case VK_ACCESS_TRANSFER_WRITE_BIT:
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2;
default:

View File

@@ -275,13 +275,12 @@ radv_descriptor_set_create(struct radv_device *device,
uint32_t layout_size = align_u32(layout->size, 32);
set->size = layout->size;
if (!cmd_buffer) {
if (pool->current_offset + layout_size <= pool->size &&
pool->allocated_sets < pool->max_sets) {
if (pool->current_offset + layout_size <= pool->size) {
set->bo = pool->bo;
set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset);
set->va = device->ws->buffer_get_va(set->bo) + pool->current_offset;
pool->current_offset += layout_size;
++pool->allocated_sets;
} else {
int entry = pool->free_list, prev_entry = -1;
uint32_t offset;
@@ -418,7 +417,6 @@ VkResult radv_CreateDescriptorPool(
pool->full_list = 0;
pool->free_nodes[max_sets - 1].next = -1;
pool->max_sets = max_sets;
pool->allocated_sets = 0;
for (int i = 0; i + 1 < max_sets; ++i)
pool->free_nodes[i].next = i + 1;
@@ -496,7 +494,6 @@ VkResult radv_ResetDescriptorPool(
radv_descriptor_set_destroy(device, pool, set, false);
}
pool->allocated_sets = 0;
pool->current_offset = 0;
pool->free_list = -1;
pool->full_list = 0;

View File

@@ -570,7 +570,7 @@ void radv_GetPhysicalDeviceProperties(
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = false,
.timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
.timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
.maxClipDistances = 8,
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances = 8,
@@ -1121,7 +1121,7 @@ VkResult radv_AllocateMemory(
if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
flags |= RADEON_FLAG_GTT_WC;
mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
domain, flags);
if (!mem->bo) {
@@ -2024,7 +2024,7 @@ radv_init_sampler(struct radv_device *device,
S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
S_008F38_MIP_POINT_PRECLAMP(0) |
S_008F38_MIP_POINT_PRECLAMP(1) |
S_008F38_DISABLE_LSB_CEIL(1) |
S_008F38_FILTER_PREC_FIX(1) |
S_008F38_ANISO_OVERRIDE(is_vi));

View File

@@ -730,6 +730,9 @@ uint32_t radv_translate_dbformat(VkFormat format)
case VK_FORMAT_D16_UNORM:
case VK_FORMAT_D16_UNORM_S8_UINT:
return V_028040_Z_16;
case VK_FORMAT_X8_D24_UNORM_PACK32:
case VK_FORMAT_D24_UNORM_S8_UINT:
return V_028040_Z_24; /* deprecated on SI */
case VK_FORMAT_D32_SFLOAT:
case VK_FORMAT_D32_SFLOAT_S8_UINT:
return V_028040_Z_32_FLOAT;

View File

@@ -510,7 +510,6 @@ radv_image_alloc_fmask(struct radv_device *device,
image->fmask.offset = align64(image->size, image->fmask.alignment);
image->size = image->fmask.offset + image->fmask.size;
image->alignment = MAX2(image->alignment, image->fmask.alignment);
}
static void
@@ -572,7 +571,6 @@ radv_image_alloc_cmask(struct radv_device *device,
/* + 8 for storing the clear values */
image->clear_value_offset = image->cmask.offset + image->cmask.size;
image->size = image->cmask.offset + image->cmask.size + 8;
image->alignment = MAX2(image->alignment, image->cmask.alignment);
}
static void
@@ -583,7 +581,6 @@ radv_image_alloc_dcc(struct radv_device *device,
/* + 8 for storing the clear values */
image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
image->size = image->dcc_offset + image->surface.dcc_size + 8;
image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
}
static unsigned
@@ -655,9 +652,6 @@ radv_image_alloc_htile(struct radv_device *device,
if (device->debug_flags & RADV_DEBUG_NO_HIZ)
return;
if (image->array_size > 1 || image->levels > 1)
return;
image->htile.size = radv_image_get_htile_size(device, image);
if (!image->htile.size)

View File

@@ -26,7 +26,6 @@
#include "radv_meta.h"
#include "nir/nir_builder.h"
#include "vk_format.h"
enum blit2d_dst_type {
/* We can bind this destination as a "normal" render target and render
@@ -284,10 +283,8 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
for (unsigned r = 0; r < num_rects; ++r) {
VkFormat depth_format = 0;
if (dst->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
depth_format = vk_format_stencil_only(dst->image->vk_format);
else if (dst->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
depth_format = vk_format_depth_only(dst->image->vk_format);
if (dst->aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT)
depth_format = dst->image->vk_format;
struct blit2d_src_temps src_temps;
blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format);

View File

@@ -524,8 +524,6 @@ void radv_CmdUpdateBuffer(
assert(!(va & 3));
if (dataSize < 4096) {
si_emit_cache_flush(cmd_buffer);
cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8);
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);

View File

@@ -543,7 +543,6 @@ struct radv_descriptor_pool {
int free_list;
int full_list;
uint32_t max_sets;
uint32_t allocated_sets;
struct radv_descriptor_pool_free_node free_nodes[];
};

View File

@@ -131,7 +131,6 @@ VkResult radv_GetQueryPoolResults(
VkDeviceSize stride,
VkQueryResultFlags flags)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
char *data = pData;
VkResult result = VK_SUCCESS;
@@ -142,20 +141,23 @@ VkResult radv_GetQueryPoolResults(
char *src = pool->ptr + query * pool->stride;
uint32_t available;
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query))
;
}
if (!*(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query) &&
!(flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
*(uint32_t*)dest = 0;
result = VK_NOT_READY;
continue;
}
available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
switch (pool->type) {
case VK_QUERY_TYPE_TIMESTAMP: {
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query))
;
}
available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
result = VK_NOT_READY;
break;
}
case VK_QUERY_TYPE_TIMESTAMP:
if (flags & VK_QUERY_RESULT_64_BIT) {
*(uint64_t*)dest = *(uint64_t*)src;
dest += 8;
@@ -164,32 +166,8 @@ VkResult radv_GetQueryPoolResults(
dest += 4;
}
break;
}
case VK_QUERY_TYPE_OCCLUSION: {
volatile uint64_t const *src64 = (volatile uint64_t const *)src;
uint64_t result = 0;
int db_count = get_max_db(device);
available = 1;
for (int i = 0; i < db_count; ++i) {
uint64_t start, end;
do {
start = src64[2 * i];
end = src64[2 * i + 1];
} while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT));
if (!(start & (1ull << 63)) || !(end & (1ull << 63)))
available = 0;
else {
result += end - start;
}
}
if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
result = VK_NOT_READY;
break;
}
uint64_t result = *(uint64_t*)(src + pool->stride - 16);
if (flags & VK_QUERY_RESULT_64_BIT) {
*(uint64_t*)dest = result;
@@ -205,11 +183,8 @@ VkResult radv_GetQueryPoolResults(
}
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
if (flags & VK_QUERY_RESULT_64_BIT) {
*(uint64_t*)dest = available;
} else {
*(uint32_t*)dest = available;
}
*(uint32_t*)dest = available;
dest += 4;
}
}
@@ -382,14 +357,11 @@ void radv_CmdEndQuery(
radeon_emit(cs, va + 8);
radeon_emit(cs, (va + 8) >> 32);
/* hangs for VK_COMMAND_BUFFER_LEVEL_SECONDARY. */
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
radeon_emit(cs, PKT3(PKT3_OCCLUSION_QUERY, 3, 0));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, va + pool->stride - 16);
radeon_emit(cs, (va + pool->stride - 16) >> 32);
}
radeon_emit(cs, PKT3(PKT3_OCCLUSION_QUERY, 3, 0));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, va + pool->stride - 16);
radeon_emit(cs, (va + pool->stride - 16) >> 32);
break;
default:

View File

@@ -94,7 +94,7 @@ struct radeon_info {
uint32_t compute_rings;
uint32_t vce_fw_version;
uint32_t vce_harvest_config;
uint32_t clock_crystal_freq; /* in kHz */
uint32_t clock_crystal_freq;
/* Kernel info. */
uint32_t drm_major; /* version */

View File

@@ -347,20 +347,16 @@ VkResult radv_QueuePresentKHR(
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
RADV_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
VkResult item_result;
assert(radv_device_from_handle(swapchain->device) == queue->device);
if (swapchain->fences[0] == VK_NULL_HANDLE) {
item_result = radv_CreateFence(radv_device_to_handle(queue->device),
result = radv_CreateFence(radv_device_to_handle(queue->device),
&(VkFenceCreateInfo) {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.flags = 0,
}, &swapchain->alloc, &swapchain->fences[0]);
if (pPresentInfo->pResults != NULL)
pPresentInfo->pResults[i] = item_result;
result = result == VK_SUCCESS ? item_result : result;
if (item_result != VK_SUCCESS)
continue;
if (result != VK_SUCCESS)
return result;
} else {
radv_ResetFences(radv_device_to_handle(queue->device),
1, &swapchain->fences[0]);
@@ -376,14 +372,11 @@ VkResult radv_QueuePresentKHR(
pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence);
fence->submitted = true;
item_result = swapchain->queue_present(swapchain,
pPresentInfo->pImageIndices[i]);
result = swapchain->queue_present(swapchain,
pPresentInfo->pImageIndices[i]);
/* TODO: What if one of them returns OUT_OF_DATE? */
if (pPresentInfo->pResults != NULL)
pPresentInfo->pResults[i] = item_result;
result = result == VK_SUCCESS ? item_result : result;
if (item_result != VK_SUCCESS)
continue;
if (result != VK_SUCCESS)
return result;
VkFence last = swapchain->fences[2];
swapchain->fences[2] = swapchain->fences[1];

View File

@@ -291,7 +291,6 @@ void si_init_config(struct radv_physical_device *physical_device,
raster_config_1 = 0x0000002a;
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
raster_config = 0x16000012;
raster_config_1 = 0x00000000;
break;
@@ -373,15 +372,6 @@ void si_init_config(struct radv_physical_device *physical_device,
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
if (physical_device->rad_info.chip_class >= CIK) {
/* If this is 0, Bonaire can hang even if GS isn't being used.
* Other chips are unaffected. These are suboptimal values,
* but we don't use on-chip GS.
*/
radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL,
S_028A44_ES_VERTS_PER_SUBGRP(64) |
S_028A44_GS_PRIMS_PER_SUBGRP(4));
radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
@@ -394,6 +384,7 @@ void si_init_config(struct radv_physical_device *physical_device,
*
* LATE_ALLOC_VS = 2 is the highest safe number.
*/
radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
} else {
@@ -402,6 +393,7 @@ void si_init_config(struct radv_physical_device *physical_device,
* - VS can't execute on CU0.
* - If HS writes outputs to LDS, LS can't execute on CU0.
*/
radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe));
radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
}

View File

@@ -274,19 +274,6 @@ static void radv_set_micro_tile_mode(struct radeon_surf *surf,
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
}
static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
{
unsigned index, tileb;
tileb = 8 * 8 * surf->bpe;
tileb = MIN2(surf->tile_split, tileb);
for (index = 0; tileb > 64; index++)
tileb >>= 1;
assert(index < 16);
return index;
}
static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
struct radeon_surf *surf)
@@ -448,7 +435,6 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
else
AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
}
}

View File

@@ -106,7 +106,6 @@ get_chip_name(enum radeon_family family)
case CHIP_FIJI: return "AMD RADV FIJI";
case CHIP_POLARIS10: return "AMD RADV POLARIS10";
case CHIP_POLARIS11: return "AMD RADV POLARIS11";
case CHIP_POLARIS12: return "AMD RADV POLARIS12";
case CHIP_STONEY: return "AMD RADV STONEY";
default: return "AMD RADV unknown";
}
@@ -271,10 +270,6 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
ws->family = FAMILY_VI;
ws->rev_id = VI_POLARIS11_M_A0;
break;
case CHIP_POLARIS12:
ws->family = FAMILY_VI;
ws->rev_id = VI_POLARIS12_V_A0;
break;
default:
fprintf(stderr, "amdgpu: Unknown family.\n");
goto fail;

View File

@@ -36,7 +36,6 @@ include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
$(LIBGLCPP_FILES) \
$(LIBGLSL_FILES) \
$(LIBGLSL_SHADER_CACHE_FILES)
LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \

View File

@@ -73,6 +73,7 @@ glsl_tests_general_ir_test_SOURCES = \
glsl/tests/builtin_variable_test.cpp \
glsl/tests/invalidate_locations_test.cpp \
glsl/tests/general_ir_test.cpp \
glsl/tests/lower_int64_test.cpp \
glsl/tests/opt_add_neg_to_sub_test.cpp \
glsl/tests/varyings_test.cpp
glsl_tests_general_ir_test_CFLAGS = \

View File

@@ -20,8 +20,11 @@ LIBGLSL_FILES = \
glsl/blob.c \
glsl/blob.h \
glsl/builtin_functions.cpp \
glsl/builtin_functions.h \
glsl/builtin_int64.h \
glsl/builtin_types.cpp \
glsl/builtin_variables.cpp \
glsl/generate_ir.cpp \
glsl/glsl_parser_extras.cpp \
glsl/glsl_parser_extras.h \
glsl/glsl_symbol_table.cpp \
@@ -88,6 +91,7 @@ LIBGLSL_FILES = \
glsl/lower_distance.cpp \
glsl/lower_if_to_cond_assign.cpp \
glsl/lower_instructions.cpp \
glsl/lower_int64.cpp \
glsl/lower_jumps.cpp \
glsl/lower_mat_op_to_vec.cpp \
glsl/lower_noise.cpp \

View File

@@ -78,6 +78,16 @@ DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3)
DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4)
DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4)
DECL_TYPE(int64_t, GL_INT64_ARB, GLSL_TYPE_INT64, 1, 1)
DECL_TYPE(i64vec2, GL_INT64_VEC2_ARB, GLSL_TYPE_INT64, 2, 1)
DECL_TYPE(i64vec3, GL_INT64_VEC3_ARB, GLSL_TYPE_INT64, 3, 1)
DECL_TYPE(i64vec4, GL_INT64_VEC4_ARB, GLSL_TYPE_INT64, 4, 1)
DECL_TYPE(uint64_t, GL_UNSIGNED_INT64_ARB, GLSL_TYPE_UINT64, 1, 1)
DECL_TYPE(u64vec2, GL_UNSIGNED_INT64_VEC2_ARB, GLSL_TYPE_UINT64, 2, 1)
DECL_TYPE(u64vec3, GL_UNSIGNED_INT64_VEC3_ARB, GLSL_TYPE_UINT64, 3, 1)
DECL_TYPE(u64vec4, GL_UNSIGNED_INT64_VEC4_ARB, GLSL_TYPE_UINT64, 4, 1)
DECL_TYPE(sampler, GL_SAMPLER_1D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_VOID)
DECL_TYPE(sampler1D, GL_SAMPLER_1D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_FLOAT)
DECL_TYPE(sampler2D, GL_SAMPLER_2D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_FLOAT)

View File

@@ -195,6 +195,8 @@ enum ast_operators {
ast_float_constant,
ast_bool_constant,
ast_double_constant,
ast_int64_constant,
ast_uint64_constant,
ast_sequence,
ast_aggregate
@@ -255,6 +257,8 @@ public:
unsigned uint_constant;
int bool_constant;
double double_constant;
uint64_t uint64_constant;
int64_t int64_constant;
} primary_expression;

View File

@@ -27,6 +27,7 @@
#include "ir.h"
#include "main/core.h" /* for MIN2 */
#include "main/shaderobj.h"
#include "builtin_functions.h"
static ir_rvalue *
convert_component(ir_rvalue *src, const glsl_type *desired_type);
@@ -760,6 +761,12 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
case GLSL_TYPE_DOUBLE:
result = new(ctx) ir_expression(ir_unop_d2u, src);
break;
case GLSL_TYPE_UINT64:
result = new(ctx) ir_expression(ir_unop_u642u, src);
break;
case GLSL_TYPE_INT64:
result = new(ctx) ir_expression(ir_unop_i642u, src);
break;
}
break;
case GLSL_TYPE_INT:
@@ -776,6 +783,12 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
case GLSL_TYPE_DOUBLE:
result = new(ctx) ir_expression(ir_unop_d2i, src);
break;
case GLSL_TYPE_UINT64:
result = new(ctx) ir_expression(ir_unop_u642i, src);
break;
case GLSL_TYPE_INT64:
result = new(ctx) ir_expression(ir_unop_i642i, src);
break;
}
break;
case GLSL_TYPE_FLOAT:
@@ -792,6 +805,12 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
case GLSL_TYPE_DOUBLE:
result = new(ctx) ir_expression(ir_unop_d2f, desired_type, src, NULL);
break;
case GLSL_TYPE_UINT64:
result = new(ctx) ir_expression(ir_unop_u642f, desired_type, src, NULL);
break;
case GLSL_TYPE_INT64:
result = new(ctx) ir_expression(ir_unop_i642f, desired_type, src, NULL);
break;
}
break;
case GLSL_TYPE_BOOL:
@@ -810,6 +829,14 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
case GLSL_TYPE_DOUBLE:
result = new(ctx) ir_expression(ir_unop_d2b, desired_type, src, NULL);
break;
case GLSL_TYPE_UINT64:
result = new(ctx) ir_expression(ir_unop_i642b,
new(ctx) ir_expression(ir_unop_u642i64,
src));
break;
case GLSL_TYPE_INT64:
result = new(ctx) ir_expression(ir_unop_i642b, desired_type, src, NULL);
break;
}
break;
case GLSL_TYPE_DOUBLE:
@@ -828,7 +855,60 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
case GLSL_TYPE_FLOAT:
result = new(ctx) ir_expression(ir_unop_f2d, desired_type, src, NULL);
break;
case GLSL_TYPE_UINT64:
result = new(ctx) ir_expression(ir_unop_u642d, desired_type, src, NULL);
break;
case GLSL_TYPE_INT64:
result = new(ctx) ir_expression(ir_unop_i642d, desired_type, src, NULL);
break;
}
break;
case GLSL_TYPE_UINT64:
switch (b) {
case GLSL_TYPE_INT:
result = new(ctx) ir_expression(ir_unop_i2u64, src);
break;
case GLSL_TYPE_UINT:
result = new(ctx) ir_expression(ir_unop_u2u64, src);
break;
case GLSL_TYPE_BOOL:
result = new(ctx) ir_expression(ir_unop_i642u64,
new(ctx) ir_expression(ir_unop_b2i64,
src));
break;
case GLSL_TYPE_FLOAT:
result = new(ctx) ir_expression(ir_unop_f2u64, src);
break;
case GLSL_TYPE_DOUBLE:
result = new(ctx) ir_expression(ir_unop_d2u64, src);
break;
case GLSL_TYPE_INT64:
result = new(ctx) ir_expression(ir_unop_i642u64, src);
break;
}
break;
case GLSL_TYPE_INT64:
switch (b) {
case GLSL_TYPE_INT:
result = new(ctx) ir_expression(ir_unop_i2i64, src);
break;
case GLSL_TYPE_UINT:
result = new(ctx) ir_expression(ir_unop_u2i64, src);
break;
case GLSL_TYPE_BOOL:
result = new(ctx) ir_expression(ir_unop_b2i64, src);
break;
case GLSL_TYPE_FLOAT:
result = new(ctx) ir_expression(ir_unop_f2i64, src);
break;
case GLSL_TYPE_DOUBLE:
result = new(ctx) ir_expression(ir_unop_d2i64, src);
break;
case GLSL_TYPE_UINT64:
result = new(ctx) ir_expression(ir_unop_u642i64, src);
break;
}
break;
}
assert(result != NULL);
@@ -1260,6 +1340,12 @@ emit_inline_vector_constructor(const glsl_type *type,
case GLSL_TYPE_BOOL:
data.b[i + base_component] = c->get_bool_component(i);
break;
case GLSL_TYPE_UINT64:
data.u64[i + base_component] = c->get_uint64_component(i);
break;
case GLSL_TYPE_INT64:
data.i64[i + base_component] = c->get_int64_component(i);
break;
default:
assert(!"Should not get here.");
break;
@@ -1267,8 +1353,7 @@ emit_inline_vector_constructor(const glsl_type *type,
}
/* Mask of fields to be written in the assignment. */
constant_mask |=
((1U << rhs_components) - 1) << base_lhs_component;
constant_mask |= ((1U << rhs_components) - 1) << base_lhs_component;
constant_components += rhs_components;
base_component += rhs_components;

View File

@@ -58,6 +58,7 @@
#include "main/shaderobj.h"
#include "ir.h"
#include "ir_builder.h"
#include "builtin_functions.h"
using namespace ir_builder;
@@ -259,6 +260,26 @@ get_implicit_conversion_operation(const glsl_type *to, const glsl_type *from,
case GLSL_TYPE_INT: return ir_unop_i2d;
case GLSL_TYPE_UINT: return ir_unop_u2d;
case GLSL_TYPE_FLOAT: return ir_unop_f2d;
case GLSL_TYPE_INT64: return ir_unop_i642d;
case GLSL_TYPE_UINT64: return ir_unop_u642d;
default: return (ir_expression_operation)0;
}
case GLSL_TYPE_UINT64:
if (!state->has_int64())
return (ir_expression_operation)0;
switch (from->base_type) {
case GLSL_TYPE_INT: return ir_unop_i2u64;
case GLSL_TYPE_UINT: return ir_unop_u2u64;
case GLSL_TYPE_INT64: return ir_unop_i642u64;
default: return (ir_expression_operation)0;
}
case GLSL_TYPE_INT64:
if (!state->has_int64())
return (ir_expression_operation)0;
switch (from->base_type) {
case GLSL_TYPE_INT: return ir_unop_i2i64;
default: return (ir_expression_operation)0;
}
@@ -513,12 +534,12 @@ bit_logic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
* (|). The operands must be of type signed or unsigned integers or
* integer vectors."
*/
if (!type_a->is_integer()) {
if (!type_a->is_integer_32_64()) {
_mesa_glsl_error(loc, state, "LHS of `%s' must be an integer",
ast_expression::operator_string(op));
return glsl_type::error_type;
}
if (!type_b->is_integer()) {
if (!type_b->is_integer_32_64()) {
_mesa_glsl_error(loc, state, "RHS of `%s' must be an integer",
ast_expression::operator_string(op));
return glsl_type::error_type;
@@ -599,11 +620,11 @@ modulus_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
* "The operator modulus (%) operates on signed or unsigned integers or
* integer vectors."
*/
if (!type_a->is_integer()) {
if (!type_a->is_integer_32_64()) {
_mesa_glsl_error(loc, state, "LHS of operator %% must be an integer");
return glsl_type::error_type;
}
if (!type_b->is_integer()) {
if (!type_b->is_integer_32_64()) {
_mesa_glsl_error(loc, state, "RHS of operator %% must be an integer");
return glsl_type::error_type;
}
@@ -721,7 +742,7 @@ shift_result_type(const struct glsl_type *type_a,
* must be signed or unsigned integers or integer vectors. One operand
* can be signed while the other is unsigned."
*/
if (!type_a->is_integer()) {
if (!type_a->is_integer_32_64()) {
_mesa_glsl_error(loc, state, "LHS of operator %s must be an integer or "
"integer vector", ast_expression::operator_string(op));
return glsl_type::error_type;
@@ -1093,6 +1114,8 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
case GLSL_TYPE_INT:
case GLSL_TYPE_BOOL:
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
return new(mem_ctx) ir_expression(operation, op0, op1);
case GLSL_TYPE_ARRAY: {
@@ -1256,6 +1279,10 @@ constant_one_for_inc_dec(void *ctx, const glsl_type *type)
return new(ctx) ir_constant((unsigned) 1);
case GLSL_TYPE_INT:
return new(ctx) ir_constant(1);
case GLSL_TYPE_UINT64:
return new(ctx) ir_constant((uint64_t) 1);
case GLSL_TYPE_INT64:
return new(ctx) ir_constant((int64_t) 1);
default:
case GLSL_TYPE_FLOAT:
return new(ctx) ir_constant(1.0f);
@@ -1535,7 +1562,7 @@ ast_expression::do_hir(exec_list *instructions,
error_emitted = true;
}
if (!op[0]->type->is_integer()) {
if (!op[0]->type->is_integer_32_64()) {
_mesa_glsl_error(&loc, state, "operand of `~' must be an integer");
error_emitted = true;
}
@@ -2006,6 +2033,14 @@ ast_expression::do_hir(exec_list *instructions,
result = new(ctx) ir_constant(this->primary_expression.double_constant);
break;
case ast_uint64_constant:
result = new(ctx) ir_constant(this->primary_expression.uint64_constant);
break;
case ast_int64_constant:
result = new(ctx) ir_constant(this->primary_expression.int64_constant);
break;
case ast_sequence: {
/* It should not be possible to generate a sequence in the AST without
* any expressions in it.
@@ -2132,6 +2167,8 @@ ast_expression::has_sequence_subexpression() const
case ast_float_constant:
case ast_bool_constant:
case ast_double_constant:
case ast_int64_constant:
case ast_uint64_constant:
return false;
case ast_aggregate:
@@ -3828,6 +3865,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
"varying variables may not be of type struct");
break;
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
break;
default:
_mesa_glsl_error(loc, state, "illegal type for a varying variable");
@@ -4886,6 +4925,9 @@ ast_declarator_list::hir(exec_list *instructions,
switch (check_type->base_type) {
case GLSL_TYPE_FLOAT:
break;
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
break;
case GLSL_TYPE_UINT:
case GLSL_TYPE_INT:
if (state->is_version(120, 300))
@@ -5220,13 +5262,11 @@ ast_declarator_list::hir(exec_list *instructions,
* sized by an earlier input primitive layout qualifier, when
* present, as per the following table."
*/
const enum ir_variable_mode mode = (const enum ir_variable_mode)
(earlier == NULL ? var->data.mode : earlier->data.mode);
const bool implicitly_sized =
(mode == ir_var_shader_in &&
(var->data.mode == ir_var_shader_in &&
state->stage >= MESA_SHADER_TESS_CTRL &&
state->stage <= MESA_SHADER_GEOMETRY) ||
(mode == ir_var_shader_out &&
(var->data.mode == ir_var_shader_out &&
state->stage == MESA_SHADER_TESS_CTRL);
if (t->is_unsized_array() && !implicitly_sized)
@@ -7863,9 +7903,10 @@ ast_interface_block::hir(exec_list *instructions,
}
if (var->type->is_unsized_array()) {
if (var->is_in_shader_storage_block() &&
is_unsized_array_last_element(var)) {
var->data.from_ssbo_unsized_array = true;
if (var->is_in_shader_storage_block()) {
if (is_unsized_array_last_element(var)) {
var->data.from_ssbo_unsized_array = true;
}
} else {
/* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
*
@@ -7873,10 +7914,6 @@ ast_interface_block::hir(exec_list *instructions,
* block and the size is not specified at compile-time, it is
* sized at run-time. In all other cases, arrays are sized only
* at compile-time."
*
* In desktop GLSL it is allowed to have unsized-arrays that are
* not last, as long as we can determine that they are implicitly
* sized.
*/
if (state->es_shader) {
_mesa_glsl_error(&loc, state, "unsized array `%s' "

View File

@@ -61,6 +61,7 @@
#include "glsl_parser_extras.h"
#include "program/prog_instruction.h"
#include <math.h>
#include "builtin_functions.h"
#define M_PIf ((float) M_PI)
#define M_PI_2f ((float) M_PI_2)
@@ -470,6 +471,13 @@ shader_clock(const _mesa_glsl_parse_state *state)
return state->ARB_shader_clock_enable;
}
static bool
shader_clock_int64(const _mesa_glsl_parse_state *state)
{
return state->ARB_shader_clock_enable &&
state->ARB_gpu_shader_int64_enable;
}
static bool
shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
{
@@ -532,15 +540,21 @@ fp64(const _mesa_glsl_parse_state *state)
}
static bool
compute_shader(const _mesa_glsl_parse_state *state)
int64(const _mesa_glsl_parse_state *state)
{
return state->stage == MESA_SHADER_COMPUTE;
return state->has_int64();
}
static bool
compute_shader_supported(const _mesa_glsl_parse_state *state)
int64_fp64(const _mesa_glsl_parse_state *state)
{
return state->has_compute_shader();
return state->has_int64() && state->has_double();
}
static bool
compute_shader(const _mesa_glsl_parse_state *state)
{
return state->stage == MESA_SHADER_COMPUTE;
}
static bool
@@ -562,6 +576,11 @@ vote(const _mesa_glsl_parse_state *state)
return state->ARB_shader_group_vote_enable;
}
static bool
integer_functions_supported(const _mesa_glsl_parse_state *state)
{
return state->extensions->MESA_shader_integer_functions;
}
/** @} */
/******************************************************************************/
@@ -729,6 +748,12 @@ private:
B1(floatBitsToUint)
B1(intBitsToFloat)
B1(uintBitsToFloat)
BA1(doubleBitsToInt64)
BA1(doubleBitsToUint64)
BA1(int64BitsToDouble)
BA1(uint64BitsToDouble)
ir_function_signature *_packUnorm2x16(builtin_available_predicate avail);
ir_function_signature *_packSnorm2x16(builtin_available_predicate avail);
ir_function_signature *_packUnorm4x8(builtin_available_predicate avail);
@@ -741,6 +766,10 @@ private:
ir_function_signature *_unpackHalf2x16(builtin_available_predicate avail);
ir_function_signature *_packDouble2x32(builtin_available_predicate avail);
ir_function_signature *_unpackDouble2x32(builtin_available_predicate avail);
ir_function_signature *_packInt2x32(builtin_available_predicate avail);
ir_function_signature *_unpackInt2x32(builtin_available_predicate avail);
ir_function_signature *_packUint2x32(builtin_available_predicate avail);
ir_function_signature *_unpackUint2x32(builtin_available_predicate avail);
BA1(length)
BA1(distance);
@@ -1104,15 +1133,15 @@ builtin_builder::create_intrinsics()
ir_intrinsic_group_memory_barrier),
NULL);
add_function("__intrinsic_memory_barrier_atomic_counter",
_memory_barrier_intrinsic(compute_shader_supported,
_memory_barrier_intrinsic(compute_shader,
ir_intrinsic_memory_barrier_atomic_counter),
NULL);
add_function("__intrinsic_memory_barrier_buffer",
_memory_barrier_intrinsic(compute_shader_supported,
_memory_barrier_intrinsic(compute_shader,
ir_intrinsic_memory_barrier_buffer),
NULL);
add_function("__intrinsic_memory_barrier_image",
_memory_barrier_intrinsic(compute_shader_supported,
_memory_barrier_intrinsic(compute_shader,
ir_intrinsic_memory_barrier_image),
NULL);
add_function("__intrinsic_memory_barrier_shared",
@@ -1190,7 +1219,7 @@ builtin_builder::create_builtins()
_##NAME(glsl_type::ivec4_type), \
NULL);
#define FID(NAME) \
#define FI64(NAME) \
add_function(#NAME, \
_##NAME(always_available, glsl_type::float_type), \
_##NAME(always_available, glsl_type::vec2_type), \
@@ -1204,6 +1233,10 @@ builtin_builder::create_builtins()
_##NAME(fp64, glsl_type::dvec2_type), \
_##NAME(fp64, glsl_type::dvec3_type), \
_##NAME(fp64, glsl_type::dvec4_type), \
_##NAME(int64, glsl_type::int64_t_type), \
_##NAME(int64, glsl_type::i64vec2_type), \
_##NAME(int64, glsl_type::i64vec3_type), \
_##NAME(int64, glsl_type::i64vec4_type), \
NULL);
#define FIUD_VEC(NAME) \
@@ -1222,6 +1255,14 @@ builtin_builder::create_builtins()
_##NAME(fp64, glsl_type::dvec2_type), \
_##NAME(fp64, glsl_type::dvec3_type), \
_##NAME(fp64, glsl_type::dvec4_type), \
_##NAME(int64, glsl_type::int64_t_type), \
_##NAME(int64, glsl_type::i64vec2_type), \
_##NAME(int64, glsl_type::i64vec3_type), \
_##NAME(int64, glsl_type::i64vec4_type), \
_##NAME(int64, glsl_type::uint64_t_type), \
_##NAME(int64, glsl_type::u64vec2_type), \
_##NAME(int64, glsl_type::u64vec3_type), \
_##NAME(int64, glsl_type::u64vec4_type), \
NULL);
#define IU(NAME) \
@@ -1258,6 +1299,14 @@ builtin_builder::create_builtins()
_##NAME(fp64, glsl_type::dvec2_type), \
_##NAME(fp64, glsl_type::dvec3_type), \
_##NAME(fp64, glsl_type::dvec4_type), \
_##NAME(int64, glsl_type::int64_t_type), \
_##NAME(int64, glsl_type::i64vec2_type), \
_##NAME(int64, glsl_type::i64vec3_type), \
_##NAME(int64, glsl_type::i64vec4_type), \
_##NAME(int64, glsl_type::uint64_t_type), \
_##NAME(int64, glsl_type::u64vec2_type), \
_##NAME(int64, glsl_type::u64vec3_type), \
_##NAME(int64, glsl_type::u64vec4_type), \
NULL);
#define FIUD2_MIXED(NAME) \
@@ -1296,6 +1345,21 @@ builtin_builder::create_builtins()
_##NAME(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), \
_##NAME(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), \
_##NAME(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), \
\
_##NAME(int64, glsl_type::int64_t_type, glsl_type::int64_t_type), \
_##NAME(int64, glsl_type::i64vec2_type, glsl_type::int64_t_type), \
_##NAME(int64, glsl_type::i64vec3_type, glsl_type::int64_t_type), \
_##NAME(int64, glsl_type::i64vec4_type, glsl_type::int64_t_type), \
_##NAME(int64, glsl_type::i64vec2_type, glsl_type::i64vec2_type), \
_##NAME(int64, glsl_type::i64vec3_type, glsl_type::i64vec3_type), \
_##NAME(int64, glsl_type::i64vec4_type, glsl_type::i64vec4_type), \
_##NAME(int64, glsl_type::uint64_t_type, glsl_type::uint64_t_type), \
_##NAME(int64, glsl_type::u64vec2_type, glsl_type::uint64_t_type), \
_##NAME(int64, glsl_type::u64vec3_type, glsl_type::uint64_t_type), \
_##NAME(int64, glsl_type::u64vec4_type, glsl_type::uint64_t_type), \
_##NAME(int64, glsl_type::u64vec2_type, glsl_type::u64vec2_type), \
_##NAME(int64, glsl_type::u64vec3_type, glsl_type::u64vec3_type), \
_##NAME(int64, glsl_type::u64vec4_type, glsl_type::u64vec4_type), \
NULL);
F(radians)
@@ -1330,8 +1394,8 @@ builtin_builder::create_builtins()
F(log2)
FD(sqrt)
FD(inversesqrt)
FID(abs)
FID(sign)
FI64(abs)
FI64(sign)
FD(floor)
FD(trunc)
FD(round)
@@ -1408,6 +1472,16 @@ builtin_builder::create_builtins()
_mix_sel(shader_integer_mix, glsl_type::bvec2_type, glsl_type::bvec2_type),
_mix_sel(shader_integer_mix, glsl_type::bvec3_type, glsl_type::bvec3_type),
_mix_sel(shader_integer_mix, glsl_type::bvec4_type, glsl_type::bvec4_type),
_mix_sel(int64, glsl_type::int64_t_type, glsl_type::bool_type),
_mix_sel(int64, glsl_type::i64vec2_type, glsl_type::bvec2_type),
_mix_sel(int64, glsl_type::i64vec3_type, glsl_type::bvec3_type),
_mix_sel(int64, glsl_type::i64vec4_type, glsl_type::bvec4_type),
_mix_sel(int64, glsl_type::uint64_t_type, glsl_type::bool_type),
_mix_sel(int64, glsl_type::u64vec2_type, glsl_type::bvec2_type),
_mix_sel(int64, glsl_type::u64vec3_type, glsl_type::bvec3_type),
_mix_sel(int64, glsl_type::u64vec4_type, glsl_type::bvec4_type),
NULL);
add_function("step",
@@ -1466,6 +1540,34 @@ builtin_builder::create_builtins()
_uintBitsToFloat(glsl_type::uvec4_type),
NULL);
add_function("doubleBitsToInt64",
_doubleBitsToInt64(int64_fp64, glsl_type::double_type),
_doubleBitsToInt64(int64_fp64, glsl_type::dvec2_type),
_doubleBitsToInt64(int64_fp64, glsl_type::dvec3_type),
_doubleBitsToInt64(int64_fp64, glsl_type::dvec4_type),
NULL);
add_function("doubleBitsToUint64",
_doubleBitsToUint64(int64_fp64, glsl_type::double_type),
_doubleBitsToUint64(int64_fp64, glsl_type::dvec2_type),
_doubleBitsToUint64(int64_fp64, glsl_type::dvec3_type),
_doubleBitsToUint64(int64_fp64, glsl_type::dvec4_type),
NULL);
add_function("int64BitsToDouble",
_int64BitsToDouble(int64_fp64, glsl_type::int64_t_type),
_int64BitsToDouble(int64_fp64, glsl_type::i64vec2_type),
_int64BitsToDouble(int64_fp64, glsl_type::i64vec3_type),
_int64BitsToDouble(int64_fp64, glsl_type::i64vec4_type),
NULL);
add_function("uint64BitsToDouble",
_uint64BitsToDouble(int64_fp64, glsl_type::uint64_t_type),
_uint64BitsToDouble(int64_fp64, glsl_type::u64vec2_type),
_uint64BitsToDouble(int64_fp64, glsl_type::u64vec3_type),
_uint64BitsToDouble(int64_fp64, glsl_type::u64vec4_type),
NULL);
add_function("packUnorm2x16", _packUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL);
add_function("packSnorm2x16", _packSnorm2x16(shader_packing_or_es3), NULL);
add_function("packUnorm4x8", _packUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL);
@@ -1479,6 +1581,10 @@ builtin_builder::create_builtins()
add_function("packDouble2x32", _packDouble2x32(fp64), NULL);
add_function("unpackDouble2x32", _unpackDouble2x32(fp64), NULL);
add_function("packInt2x32", _packInt2x32(int64), NULL);
add_function("unpackInt2x32", _unpackInt2x32(int64), NULL);
add_function("packUint2x32", _packUint2x32(int64), NULL);
add_function("unpackUint2x32", _unpackUint2x32(int64), NULL);
FD(length)
FD(distance)
@@ -2964,15 +3070,15 @@ builtin_builder::create_builtins()
NULL);
add_function("memoryBarrierAtomicCounter",
_memory_barrier("__intrinsic_memory_barrier_atomic_counter",
compute_shader_supported),
compute_shader),
NULL);
add_function("memoryBarrierBuffer",
_memory_barrier("__intrinsic_memory_barrier_buffer",
compute_shader_supported),
compute_shader),
NULL);
add_function("memoryBarrierImage",
_memory_barrier("__intrinsic_memory_barrier_image",
compute_shader_supported),
compute_shader),
NULL);
add_function("memoryBarrierShared",
_memory_barrier("__intrinsic_memory_barrier_shared",
@@ -2984,10 +3090,39 @@ builtin_builder::create_builtins()
glsl_type::uvec2_type),
NULL);
add_function("clockARB",
_shader_clock(shader_clock_int64,
glsl_type::uint64_t_type),
NULL);
add_function("anyInvocationARB", _vote(ir_unop_vote_any), NULL);
add_function("allInvocationsARB", _vote(ir_unop_vote_all), NULL);
add_function("allInvocationsEqualARB", _vote(ir_unop_vote_eq), NULL);
add_function("__builtin_idiv64",
generate_ir::idiv64(mem_ctx, integer_functions_supported),
NULL);
add_function("__builtin_imod64",
generate_ir::imod64(mem_ctx, integer_functions_supported),
NULL);
add_function("__builtin_sign64",
generate_ir::sign64(mem_ctx, integer_functions_supported),
NULL);
add_function("__builtin_udiv64",
generate_ir::udiv64(mem_ctx, integer_functions_supported),
NULL);
add_function("__builtin_umod64",
generate_ir::umod64(mem_ctx, integer_functions_supported),
NULL);
add_function("__builtin_umul64",
generate_ir::umul64(mem_ctx, integer_functions_supported),
NULL);
#undef F
#undef FI
#undef FIUD_VEC
@@ -3858,6 +3993,42 @@ builtin_builder::_uintBitsToFloat(const glsl_type *type)
return sig;
}
ir_function_signature *
builtin_builder::_doubleBitsToInt64(builtin_available_predicate avail, const glsl_type *type)
{
ir_variable *x = in_var(type, "x");
MAKE_SIG(glsl_type::i64vec(type->vector_elements), avail, 1, x);
body.emit(ret(bitcast_d2i64(x)));
return sig;
}
ir_function_signature *
builtin_builder::_doubleBitsToUint64(builtin_available_predicate avail, const glsl_type *type)
{
ir_variable *x = in_var(type, "x");
MAKE_SIG(glsl_type::u64vec(type->vector_elements), avail, 1, x);
body.emit(ret(bitcast_d2u64(x)));
return sig;
}
ir_function_signature *
builtin_builder::_int64BitsToDouble(builtin_available_predicate avail, const glsl_type *type)
{
ir_variable *x = in_var(type, "x");
MAKE_SIG(glsl_type::dvec(type->vector_elements), avail, 1, x);
body.emit(ret(bitcast_i642d(x)));
return sig;
}
ir_function_signature *
builtin_builder::_uint64BitsToDouble(builtin_available_predicate avail, const glsl_type *type)
{
ir_variable *x = in_var(type, "x");
MAKE_SIG(glsl_type::dvec(type->vector_elements), avail, 1, x);
body.emit(ret(bitcast_u642d(x)));
return sig;
}
ir_function_signature *
builtin_builder::_packUnorm2x16(builtin_available_predicate avail)
{
@@ -3967,6 +4138,42 @@ builtin_builder::_unpackDouble2x32(builtin_available_predicate avail)
return sig;
}
ir_function_signature *
builtin_builder::_packInt2x32(builtin_available_predicate avail)
{
ir_variable *v = in_var(glsl_type::ivec2_type, "v");
MAKE_SIG(glsl_type::int64_t_type, avail, 1, v);
body.emit(ret(expr(ir_unop_pack_int_2x32, v)));
return sig;
}
ir_function_signature *
builtin_builder::_unpackInt2x32(builtin_available_predicate avail)
{
ir_variable *p = in_var(glsl_type::int64_t_type, "p");
MAKE_SIG(glsl_type::ivec2_type, avail, 1, p);
body.emit(ret(expr(ir_unop_unpack_int_2x32, p)));
return sig;
}
ir_function_signature *
builtin_builder::_packUint2x32(builtin_available_predicate avail)
{
ir_variable *v = in_var(glsl_type::uvec2_type, "v");
MAKE_SIG(glsl_type::uint64_t_type, avail, 1, v);
body.emit(ret(expr(ir_unop_pack_uint_2x32, v)));
return sig;
}
ir_function_signature *
builtin_builder::_unpackUint2x32(builtin_available_predicate avail)
{
ir_variable *p = in_var(glsl_type::uint64_t_type, "p");
MAKE_SIG(glsl_type::uvec2_type, avail, 1, p);
body.emit(ret(expr(ir_unop_unpack_uint_2x32, p)));
return sig;
}
ir_function_signature *
builtin_builder::_length(builtin_available_predicate avail, const glsl_type *type)
{
@@ -5693,7 +5900,13 @@ builtin_builder::_shader_clock(builtin_available_predicate avail,
body.emit(call(shader->symbols->get_function("__intrinsic_shader_clock"),
retval, sig->parameters));
body.emit(ret(retval));
if (type == glsl_type::uint64_t_type) {
body.emit(ret(expr(ir_unop_pack_uint_2x32, retval)));
} else {
body.emit(ret(retval));
}
return sig;
}

View File

@@ -0,0 +1,68 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef BULITIN_FUNCTIONS_H
#define BULITIN_FUNCTIONS_H
extern void
_mesa_glsl_initialize_builtin_functions();
extern ir_function_signature *
_mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state,
const char *name, exec_list *actual_parameters);
extern ir_function *
_mesa_glsl_find_builtin_function_by_name(const char *name);
extern gl_shader *
_mesa_glsl_get_builtin_function_shader(void);
extern ir_function_signature *
_mesa_get_main_function_signature(glsl_symbol_table *symbols);
extern void
_mesa_glsl_release_builtin_functions(void);
namespace generate_ir {
ir_function_signature *
udiv64(void *mem_ctx, builtin_available_predicate avail);
ir_function_signature *
idiv64(void *mem_ctx, builtin_available_predicate avail);
ir_function_signature *
umod64(void *mem_ctx, builtin_available_predicate avail);
ir_function_signature *
imod64(void *mem_ctx, builtin_available_predicate avail);
ir_function_signature *
umul64(void *mem_ctx, builtin_available_predicate avail);
ir_function_signature *
sign64(void *mem_ctx, builtin_available_predicate avail);
}
#endif /* BULITIN_FUNCTIONS_H */

File diff suppressed because it is too large Load Diff

View File

@@ -409,5 +409,17 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state)
add_type(symbols, glsl_type::dmat4x2_type);
add_type(symbols, glsl_type::dmat4x3_type);
}
if (state->ARB_gpu_shader_int64_enable) {
add_type(symbols, glsl_type::int64_t_type);
add_type(symbols, glsl_type::i64vec2_type);
add_type(symbols, glsl_type::i64vec3_type);
add_type(symbols, glsl_type::i64vec4_type);
add_type(symbols, glsl_type::uint64_t_type);
add_type(symbols, glsl_type::u64vec2_type);
add_type(symbols, glsl_type::u64vec3_type);
add_type(symbols, glsl_type::u64vec4_type);
}
}
/** @} */

View File

@@ -30,6 +30,7 @@
#include "main/uniforms.h"
#include "program/prog_statevars.h"
#include "program/prog_instruction.h"
#include "builtin_functions.h"
using namespace ir_builder;

View File

@@ -0,0 +1,33 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "ir_builder.h"
#include "builtin_functions.h"
#include "program/prog_instruction.h" /* for SWIZZLE_X, &c. */
using namespace ir_builder;
namespace generate_ir {
#include "builtin_int64.h"
}

View File

@@ -1334,7 +1334,8 @@ add_builtin_define(glcpp_parser_t *parser, const char *name, int value)
}
glcpp_parser_t *
glcpp_parser_create(glcpp_extension_iterator extensions, void *state, gl_api api)
glcpp_parser_create(const struct gl_extensions *extension_list,
glcpp_extension_iterator extensions, void *state, gl_api api)
{
glcpp_parser_t *parser;
@@ -1369,6 +1370,7 @@ glcpp_parser_create(glcpp_extension_iterator extensions, void *state, gl_api api
parser->error = 0;
parser->extensions = extensions;
parser->extension_list = extension_list;
parser->state = state;
parser->api = api;
parser->version = 0;
@@ -2335,6 +2337,21 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
parser->extensions(parser->state, add_builtin_define, parser,
version, parser->is_gles);
if (parser->extension_list) {
/* If MESA_shader_integer_functions is supported, then the building
* blocks required for the 64x64 => 64 multiply exist. Add defines for
* those functions so that they can be tested.
*/
if (parser->extension_list->MESA_shader_integer_functions) {
add_builtin_define(parser, "__have_builtin_builtin_sign64", 1);
add_builtin_define(parser, "__have_builtin_builtin_umul64", 1);
add_builtin_define(parser, "__have_builtin_builtin_udiv64", 1);
add_builtin_define(parser, "__have_builtin_builtin_umod64", 1);
add_builtin_define(parser, "__have_builtin_builtin_idiv64", 1);
add_builtin_define(parser, "__have_builtin_builtin_imod64", 1);
}
}
if (explicitly_set) {
ralloc_asprintf_rewrite_tail(&parser->output, &parser->output_length,
"#version %" PRIiMAX "%s%s", version,

View File

@@ -205,6 +205,7 @@ struct glcpp_parser {
size_t info_log_length;
int error;
glcpp_extension_iterator extensions;
const struct gl_extensions *extension_list;
void *state;
gl_api api;
unsigned version;
@@ -225,7 +226,8 @@ struct glcpp_parser {
};
glcpp_parser_t *
glcpp_parser_create (glcpp_extension_iterator extensions, void *state, gl_api api);
glcpp_parser_create(const struct gl_extensions *extension_list,
glcpp_extension_iterator extensions, void *state, gl_api api);
int
glcpp_parser_parse (glcpp_parser_t *parser);

View File

@@ -218,7 +218,7 @@ glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log,
{
int errors;
glcpp_parser_t *parser =
glcpp_parser_create(extensions, state, gl_ctx->API);
glcpp_parser_create(&gl_ctx->Extensions, extensions, state, gl_ctx->API);
if (! gl_ctx->Const.DisableGLSLLineContinuations)
*shader = remove_line_continuations(parser, *shader);

View File

@@ -107,17 +107,29 @@ literal_integer(char *text, int len, struct _mesa_glsl_parse_state *state,
{
bool is_uint = (text[len - 1] == 'u' ||
text[len - 1] == 'U');
bool is_long = (text[len - 1] == 'l' || text[len - 1] == 'L');
const char *digits = text;
if (is_long)
is_uint = (text[len - 2] == 'u' && text[len - 1] == 'l') ||
(text[len - 2] == 'U' && text[len - 1] == 'L');
/* Skip "0x" */
if (base == 16)
digits += 2;
unsigned long long value = strtoull(digits, NULL, base);
lval->n = (int)value;
if (is_long)
lval->n64 = (int64_t)value;
else
lval->n = (int)value;
if (value > UINT_MAX) {
if (is_long && !is_uint && base == 10 && value > (uint64_t)LLONG_MAX + 1) {
/* Tries to catch unintentionally providing a negative value. */
_mesa_glsl_warning(lloc, state,
"signed literal value `%s' is interpreted as %lld",
text, lval->n64);
} else if (!is_long && value > UINT_MAX) {
/* Note that signed 0xffffffff is valid, not out of range! */
if (state->is_version(130, 300)) {
_mesa_glsl_error(lloc, state,
@@ -135,7 +147,10 @@ literal_integer(char *text, int len, struct _mesa_glsl_parse_state *state,
"signed literal value `%s' is interpreted as %d",
text, lval->n);
}
return is_uint ? UINTCONSTANT : INTCONSTANT;
if (is_long)
return is_uint ? UINT64CONSTANT : INT64CONSTANT;
else
return is_uint ? UINTCONSTANT : INTCONSTANT;
}
#define LITERAL_INTEGER(base) \
@@ -462,13 +477,13 @@ layout {
\|= return OR_ASSIGN;
-= return SUB_ASSIGN;
[1-9][0-9]*[uU]? {
[1-9][0-9]*([uU]|[lL]|ul|UL)? {
return LITERAL_INTEGER(10);
}
0[xX][0-9a-fA-F]+[uU]? {
0[xX][0-9a-fA-F]+([uU]|[lL]|ul|UL)? {
return LITERAL_INTEGER(16);
}
0[0-7]*[uU]? {
0[0-7]*([uU]|[lL]|ul|UL)? {
return LITERAL_INTEGER(8);
}
@@ -591,6 +606,16 @@ resource KEYWORD(420, 300, 0, 0, RESOURCE);
sample KEYWORD_WITH_ALT(400, 300, 400, 320, yyextra->ARB_gpu_shader5_enable || yyextra->OES_shader_multisample_interpolation_enable, SAMPLE);
subroutine KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_shader_subroutine_enable, SUBROUTINE);
/* Additional words for ARB_gpu_shader_int64 */
int64_t KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, INT64_TOK);
i64vec2 KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, I64VEC2);
i64vec3 KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, I64VEC3);
i64vec4 KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, I64VEC4);
uint64_t KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, UINT64_TOK);
u64vec2 KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, U64VEC2);
u64vec3 KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, U64VEC3);
u64vec4 KEYWORD_WITH_ALT(0, 0, 0, 0, yyextra->ARB_gpu_shader_int64_enable, U64VEC4);
[_a-zA-Z][_a-zA-Z0-9]* {
struct _mesa_glsl_parse_state *state = yyextra;

View File

@@ -97,6 +97,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
%union {
int n;
int64_t n64;
float real;
double dreal;
const char *identifier;
@@ -136,6 +137,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK DOUBLE_TOK
%token BREAK BUFFER CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT
%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4 DVEC2 DVEC3 DVEC4
%token INT64_TOK UINT64_TOK I64VEC2 I64VEC3 I64VEC4 U64VEC2 U64VEC3 U64VEC4
%token CENTROID IN_TOK OUT_TOK INOUT_TOK UNIFORM VARYING SAMPLE
%token NOPERSPECTIVE FLAT SMOOTH
%token MAT2X2 MAT2X3 MAT2X4
@@ -173,6 +175,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
%token <real> FLOATCONSTANT
%token <dreal> DOUBLECONSTANT
%token <n> INTCONSTANT UINTCONSTANT BOOLCONSTANT
%token <n64> INT64CONSTANT UINT64CONSTANT
%token <identifier> FIELD_SELECTION
%token LEFT_OP RIGHT_OP
%token INC_OP DEC_OP LE_OP GE_OP EQ_OP NE_OP
@@ -451,6 +454,20 @@ primary_expression:
$$->set_location(@1);
$$->primary_expression.uint_constant = $1;
}
| INT64CONSTANT
{
void *ctx = state->linalloc;
$$ = new(ctx) ast_expression(ast_int64_constant, NULL, NULL, NULL);
$$->set_location(@1);
$$->primary_expression.int64_constant = $1;
}
| UINT64CONSTANT
{
void *ctx = state->linalloc;
$$ = new(ctx) ast_expression(ast_uint64_constant, NULL, NULL, NULL);
$$->set_location(@1);
$$->primary_expression.uint64_constant = $1;
}
| FLOATCONSTANT
{
void *ctx = state->linalloc;
@@ -2304,6 +2321,14 @@ basic_type_specifier_nonarray:
| UIMAGE2DMS { $$ = "uimage2DMS"; }
| UIMAGE2DMSARRAY { $$ = "uimage2DMSArray"; }
| ATOMIC_UINT { $$ = "atomic_uint"; }
| INT64_TOK { $$ = "int64_t"; }
| I64VEC2 { $$ = "i64vec2"; }
| I64VEC3 { $$ = "i64vec3"; }
| I64VEC4 { $$ = "i64vec4"; }
| UINT64_TOK { $$ = "uint64_t"; }
| U64VEC2 { $$ = "u64vec2"; }
| U64VEC3 { $$ = "u64vec3"; }
| U64VEC4 { $$ = "u64vec4"; }
;
precision_qualifier:

View File

@@ -20,6 +20,8 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#define __STDC_FORMAT_MACROS 1
#include <inttypes.h> /* for PRIx64 macro */
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
@@ -37,6 +39,7 @@
#include "glsl_parser.h"
#include "ir_optimization.h"
#include "loop_analysis.h"
#include "builtin_functions.h"
/**
* Format a short human-readable description of the given GLSL version.
@@ -608,6 +611,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(ARB_fragment_layer_viewport),
EXT(ARB_gpu_shader5),
EXT(ARB_gpu_shader_fp64),
EXT(ARB_gpu_shader_int64),
EXT(ARB_post_depth_coverage),
EXT(ARB_sample_shading),
EXT(ARB_separate_shader_objects),
@@ -1247,6 +1251,14 @@ ast_expression::print(void) const
printf("%f ", primary_expression.double_constant);
break;
case ast_int64_constant:
printf("%" PRId64 " ", primary_expression.int64_constant);
break;
case ast_uint64_constant:
printf("%" PRIu64 " ", primary_expression.uint64_constant);
break;
case ast_bool_constant:
printf("%s ",
primary_expression.bool_constant
@@ -1704,7 +1716,7 @@ set_shader_inout_layout(struct gl_shader *shader,
if (state->out_qualifier->out_xfb_stride[i]->
process_qualifier_constant(state, "xfb_stride", &xfb_stride,
true)) {
shader->info.TransformFeedback.BufferStride[i] = xfb_stride;
shader->TransformFeedbackBufferStride[i] = xfb_stride;
}
}
}
@@ -1808,16 +1820,15 @@ set_shader_inout_layout(struct gl_shader *shader,
break;
case MESA_SHADER_FRAGMENT:
shader->info.redeclares_gl_fragcoord =
state->fs_redeclares_gl_fragcoord;
shader->info.uses_gl_fragcoord = state->fs_uses_gl_fragcoord;
shader->info.pixel_center_integer = state->fs_pixel_center_integer;
shader->info.origin_upper_left = state->fs_origin_upper_left;
shader->info.ARB_fragment_coord_conventions_enable =
shader->redeclares_gl_fragcoord = state->fs_redeclares_gl_fragcoord;
shader->uses_gl_fragcoord = state->fs_uses_gl_fragcoord;
shader->pixel_center_integer = state->fs_pixel_center_integer;
shader->origin_upper_left = state->fs_origin_upper_left;
shader->ARB_fragment_coord_conventions_enable =
state->ARB_fragment_coord_conventions_enable;
shader->info.EarlyFragmentTests = state->fs_early_fragment_tests;
shader->info.InnerCoverage = state->fs_inner_coverage;
shader->info.PostDepthCoverage = state->fs_post_depth_coverage;
shader->EarlyFragmentTests = state->fs_early_fragment_tests;
shader->InnerCoverage = state->fs_inner_coverage;
shader->PostDepthCoverage = state->fs_post_depth_coverage;
shader->BlendSupport = state->fs_blend_support;
break;

View File

@@ -250,6 +250,11 @@ struct _mesa_glsl_parse_state {
return ARB_gpu_shader_fp64_enable || is_version(400, 0);
}
bool has_int64() const
{
return ARB_gpu_shader_int64_enable;
}
bool has_420pack() const
{
return ARB_shading_language_420pack_enable || is_version(420, 0);
@@ -610,6 +615,8 @@ struct _mesa_glsl_parse_state {
bool ARB_gpu_shader5_warn;
bool ARB_gpu_shader_fp64_enable;
bool ARB_gpu_shader_fp64_warn;
bool ARB_gpu_shader_int64_enable;
bool ARB_gpu_shader_int64_warn;
bool ARB_post_depth_coverage_enable;
bool ARB_post_depth_coverage_warn;
bool ARB_sample_shading_enable;

View File

@@ -169,8 +169,6 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
shader->info->name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
if (shader_prog->Label)
shader->info->label = ralloc_strdup(shader, shader_prog->Label);
shader->info->clip_distance_array_size = sh->Program->ClipDistanceArraySize;
shader->info->cull_distance_array_size = sh->Program->CullDistanceArraySize;
shader->info->has_transform_feedback_varyings =
shader_prog->TransformFeedback.NumVarying > 0;
@@ -254,6 +252,22 @@ constant_copy(ir_constant *ir, void *mem_ctx)
}
break;
case GLSL_TYPE_UINT64:
/* Only float base types can be matrices. */
assert(cols == 1);
for (unsigned r = 0; r < rows; r++)
ret->values[0].u64[r] = ir->value.u64[r];
break;
case GLSL_TYPE_INT64:
/* Only float base types can be matrices. */
assert(cols == 1);
for (unsigned r = 0; r < rows; r++)
ret->values[0].i64[r] = ir->value.i64[r];
break;
case GLSL_TYPE_BOOL:
/* Only float base types can be matrices. */
assert(cols == 1);
@@ -927,8 +941,7 @@ nir_visitor::visit(ir_call *ir)
nir_builder_instr_insert(&b, &instr->instr);
break;
case nir_intrinsic_shader_clock:
nir_ssa_dest_init(&instr->instr, &instr->dest, 2, 32, NULL);
instr->num_components = 2;
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
nir_builder_instr_insert(&b, &instr->instr);
break;
case nir_intrinsic_store_ssbo: {
@@ -1310,6 +1323,12 @@ type_is_float(glsl_base_type type)
return type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_DOUBLE;
}
static bool
type_is_signed(glsl_base_type type)
{
return type == GLSL_TYPE_INT || type == GLSL_TYPE_INT64;
}
void
nir_visitor::visit(ir_expression *ir)
{
@@ -1465,6 +1484,7 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_f2b: result = nir_f2b(&b, srcs[0]); break;
case ir_unop_i2b: result = nir_i2b(&b, srcs[0]); break;
case ir_unop_b2i: result = nir_b2i(&b, srcs[0]); break;
case ir_unop_b2i64:result = nir_b2i64(&b, srcs[0]); break;
case ir_unop_d2f: result = nir_d2f(&b, srcs[0]); break;
case ir_unop_f2d: result = nir_f2d(&b, srcs[0]); break;
case ir_unop_d2i: result = nir_d2i(&b, srcs[0]); break;
@@ -1478,12 +1498,40 @@ nir_visitor::visit(ir_expression *ir)
assert(supports_ints);
result = nir_u2d(&b, srcs[0]);
break;
case ir_unop_i642i: result = nir_i2i32(&b, srcs[0]); break;
case ir_unop_i642u: result = nir_i2u32(&b, srcs[0]); break;
case ir_unop_i642f: result = nir_i642f(&b, srcs[0]); break;
case ir_unop_i642d: result = nir_i642d(&b, srcs[0]); break;
case ir_unop_u642i: result = nir_u2i32(&b, srcs[0]); break;
case ir_unop_u642u: result = nir_u2u32(&b, srcs[0]); break;
case ir_unop_u642f: result = nir_u642f(&b, srcs[0]); break;
case ir_unop_u642d: result = nir_u642d(&b, srcs[0]); break;
case ir_unop_i2i64: result = nir_i2i64(&b, srcs[0]); break;
case ir_unop_u2i64: result = nir_u2i64(&b, srcs[0]); break;
case ir_unop_f2i64:
case ir_unop_d2i64:
result = nir_f2i64(&b, srcs[0]);
break;
case ir_unop_i2u64: result = nir_i2u64(&b, srcs[0]); break;
case ir_unop_u2u64: result = nir_u2u64(&b, srcs[0]); break;
case ir_unop_f2u64:
case ir_unop_d2u64:
result = nir_f2u64(&b, srcs[0]);
break;
case ir_unop_i2u:
case ir_unop_u2i:
case ir_unop_i642u64:
case ir_unop_u642i64:
case ir_unop_bitcast_i2f:
case ir_unop_bitcast_f2i:
case ir_unop_bitcast_u2f:
case ir_unop_bitcast_f2u:
case ir_unop_bitcast_i642d:
case ir_unop_bitcast_d2i64:
case ir_unop_bitcast_u642d:
case ir_unop_bitcast_d2u64:
case ir_unop_subroutine_to_int:
/* no-op */
result = nir_imov(&b, srcs[0]);
@@ -1537,6 +1585,14 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_unpack_double_2x32:
result = nir_unpack_double_2x32(&b, srcs[0]);
break;
case ir_unop_pack_int_2x32:
case ir_unop_pack_uint_2x32:
result = nir_pack_int_2x32(&b, srcs[0]);
break;
case ir_unop_unpack_int_2x32:
case ir_unop_unpack_uint_2x32:
result = nir_unpack_int_2x32(&b, srcs[0]);
break;
case ir_unop_bitfield_reverse:
result = nir_bitfield_reverse(&b, srcs[0]);
break;
@@ -1627,7 +1683,7 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_div:
if (type_is_float(out_type))
result = nir_fdiv(&b, srcs[0], srcs[1]);
else if (out_type == GLSL_TYPE_INT)
else if (type_is_signed(out_type))
result = nir_idiv(&b, srcs[0], srcs[1]);
else
result = nir_udiv(&b, srcs[0], srcs[1]);
@@ -1639,7 +1695,7 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_min:
if (type_is_float(out_type))
result = nir_fmin(&b, srcs[0], srcs[1]);
else if (out_type == GLSL_TYPE_INT)
else if (type_is_signed(out_type))
result = nir_imin(&b, srcs[0], srcs[1]);
else
result = nir_umin(&b, srcs[0], srcs[1]);
@@ -1647,7 +1703,7 @@ nir_visitor::visit(ir_expression *ir)
case ir_binop_max:
if (type_is_float(out_type))
result = nir_fmax(&b, srcs[0], srcs[1]);
else if (out_type == GLSL_TYPE_INT)
else if (type_is_signed(out_type))
result = nir_imax(&b, srcs[0], srcs[1]);
else
result = nir_umax(&b, srcs[0], srcs[1]);
@@ -1670,8 +1726,8 @@ nir_visitor::visit(ir_expression *ir)
break;
case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break;
case ir_binop_rshift:
result = (out_type == GLSL_TYPE_INT) ? nir_ishr(&b, srcs[0], srcs[1])
: nir_ushr(&b, srcs[0], srcs[1]);
result = (type_is_signed(out_type)) ? nir_ishr(&b, srcs[0], srcs[1])
: nir_ushr(&b, srcs[0], srcs[1]);
break;
case ir_binop_imul_high:
result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1])
@@ -1683,7 +1739,7 @@ nir_visitor::visit(ir_expression *ir)
if (supports_ints) {
if (type_is_float(types[0]))
result = nir_flt(&b, srcs[0], srcs[1]);
else if (types[0] == GLSL_TYPE_INT)
else if (type_is_signed(types[0]))
result = nir_ilt(&b, srcs[0], srcs[1]);
else
result = nir_ult(&b, srcs[0], srcs[1]);
@@ -1695,7 +1751,7 @@ nir_visitor::visit(ir_expression *ir)
if (supports_ints) {
if (type_is_float(types[0]))
result = nir_flt(&b, srcs[1], srcs[0]);
else if (types[0] == GLSL_TYPE_INT)
else if (type_is_signed(types[0]))
result = nir_ilt(&b, srcs[1], srcs[0]);
else
result = nir_ult(&b, srcs[1], srcs[0]);
@@ -1707,7 +1763,7 @@ nir_visitor::visit(ir_expression *ir)
if (supports_ints) {
if (type_is_float(types[0]))
result = nir_fge(&b, srcs[1], srcs[0]);
else if (types[0] == GLSL_TYPE_INT)
else if (type_is_signed(types[0]))
result = nir_ige(&b, srcs[1], srcs[0]);
else
result = nir_uge(&b, srcs[1], srcs[0]);
@@ -1719,7 +1775,7 @@ nir_visitor::visit(ir_expression *ir)
if (supports_ints) {
if (type_is_float(types[0]))
result = nir_fge(&b, srcs[0], srcs[1]);
else if (types[0] == GLSL_TYPE_INT)
else if (type_is_signed(types[0]))
result = nir_ige(&b, srcs[0], srcs[1]);
else
result = nir_uge(&b, srcs[0], srcs[1]);

View File

@@ -0,0 +1,121 @@
/* Compile with:
*
* glsl_compiler --version 140 --dump-builder int64.glsl > builtin_int64.h
*
* Using version 1.40+ prevents built-in variables from being included.
*/
#version 400
#extension GL_ARB_gpu_shader_int64: require
#extension GL_ARB_shading_language_420pack: require
uvec2
umul64(uvec2 a, uvec2 b)
{
uvec2 result;
umulExtended(a.x, b.x, result.y, result.x);
result.y += a.x * b.y + a.y * b.x;
return result;
}
ivec2
sign64(ivec2 a)
{
ivec2 result;
result.y = a.y >> 31;
result.x = result.y | int((a.x | a.y) != 0);
return result;
}
uvec4
udivmod64(uvec2 n, uvec2 d)
{
uvec2 quot = uvec2(0U, 0U);
int log2_denom = findMSB(d.y) + 32;
/* If the upper 32 bits of denom are non-zero, it is impossible for shifts
* greater than 32 bits to occur. If the upper 32 bits of the numerator
* are zero, it is impossible for (denom << [63, 32]) <= numer unless
* denom == 0.
*/
if (d.y == 0 && n.y >= d.x) {
log2_denom = findMSB(d.x);
/* Since the upper 32 bits of denom are zero, log2_denom <= 31 and we
* don't have to compare log2_denom inside the loop as is done in the
* general case (below).
*/
for (int i = 31; i >= 1; i--) {
if (log2_denom <= 31 - i && (d.x << i) <= n.y) {
n.y -= d.x << i;
quot.y |= 1U << i;
}
}
/* log2_denom is always <= 31, so manually peel the last loop
* iteration.
*/
if (d.x <= n.y) {
n.y -= d.x;
quot.y |= 1U;
}
}
uint64_t d64 = packUint2x32(d);
uint64_t n64 = packUint2x32(n);
for (int i = 31; i >= 1; i--) {
if (log2_denom <= 63 - i && (d64 << i) <= n64) {
n64 -= d64 << i;
quot.x |= 1U << i;
}
}
/* log2_denom is always <= 63, so manually peel the last loop
* iteration.
*/
if (d64 <= n64) {
n64 -= d64;
quot.x |= 1U;
}
return uvec4(quot, unpackUint2x32(n64));
}
uvec2
udiv64(uvec2 n, uvec2 d)
{
return udivmod64(n, d).xy;
}
ivec2
idiv64(ivec2 _n, ivec2 _d)
{
const bool negate = (_n.y < 0) != (_d.y < 0);
uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n))));
uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d))));
uvec2 quot = udivmod64(n, d).xy;
return negate ? unpackInt2x32(-int64_t(packUint2x32(quot))) : ivec2(quot);
}
uvec2
umod64(uvec2 n, uvec2 d)
{
return udivmod64(n, d).zw;
}
ivec2
imod64(ivec2 _n, ivec2 _d)
{
const bool negate = (_n.y < 0) != (_d.y < 0);
uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n))));
uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d))));
uvec2 rem = udivmod64(n, d).zy;
return negate ? unpackInt2x32(-int64_t(packUint2x32(rem))) : ivec2(rem);
}

View File

@@ -261,6 +261,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_find_msb:
case ir_unop_find_lsb:
case ir_unop_subroutine_to_int:
case ir_unop_i642i:
case ir_unop_u642i:
this->type = glsl_type::get_instance(GLSL_TYPE_INT,
op0->type->vector_elements, 1);
break;
@@ -271,6 +273,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_d2f:
case ir_unop_bitcast_i2f:
case ir_unop_bitcast_u2f:
case ir_unop_i642f:
case ir_unop_u642f:
this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
op0->type->vector_elements, 1);
break;
@@ -278,6 +282,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_f2b:
case ir_unop_i2b:
case ir_unop_d2b:
case ir_unop_i642b:
this->type = glsl_type::get_instance(GLSL_TYPE_BOOL,
op0->type->vector_elements, 1);
break;
@@ -285,6 +290,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_f2d:
case ir_unop_i2d:
case ir_unop_u2d:
case ir_unop_i642d:
case ir_unop_u642d:
this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE,
op0->type->vector_elements, 1);
break;
@@ -293,18 +300,43 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_f2u:
case ir_unop_d2u:
case ir_unop_bitcast_f2u:
case ir_unop_i642u:
case ir_unop_u642u:
this->type = glsl_type::get_instance(GLSL_TYPE_UINT,
op0->type->vector_elements, 1);
break;
case ir_unop_i2i64:
case ir_unop_u2i64:
case ir_unop_b2i64:
case ir_unop_f2i64:
case ir_unop_d2i64:
case ir_unop_u642i64:
this->type = glsl_type::get_instance(GLSL_TYPE_INT64,
op0->type->vector_elements, 1);
break;
case ir_unop_i2u64:
case ir_unop_u2u64:
case ir_unop_f2u64:
case ir_unop_d2u64:
case ir_unop_i642u64:
this->type = glsl_type::get_instance(GLSL_TYPE_UINT64,
op0->type->vector_elements, 1);
break;
case ir_unop_noise:
this->type = glsl_type::float_type;
break;
case ir_unop_unpack_double_2x32:
case ir_unop_unpack_uint_2x32:
this->type = glsl_type::uvec2_type;
break;
case ir_unop_unpack_int_2x32:
this->type = glsl_type::ivec2_type;
break;
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_snorm_4x8:
case ir_unop_pack_unorm_2x16:
@@ -317,6 +349,14 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
this->type = glsl_type::double_type;
break;
case ir_unop_pack_int_2x32:
this->type = glsl_type::int64_t_type;
break;
case ir_unop_pack_uint_2x32:
this->type = glsl_type::uint64_t_type;
break;
case ir_unop_unpack_snorm_2x16:
case ir_unop_unpack_unorm_2x16:
case ir_unop_unpack_half_2x16:
@@ -347,6 +387,21 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
this->type = glsl_type::bool_type;
break;
case ir_unop_bitcast_i642d:
case ir_unop_bitcast_u642d:
this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE,
op0->type->vector_elements, 1);
break;
case ir_unop_bitcast_d2i64:
this->type = glsl_type::get_instance(GLSL_TYPE_INT64,
op0->type->vector_elements, 1);
break;
case ir_unop_bitcast_d2u64:
this->type = glsl_type::get_instance(GLSL_TYPE_UINT64,
op0->type->vector_elements, 1);
break;
default:
assert(!"not reached: missing automatic type setup for ir_expression");
this->type = op0->type;
@@ -613,6 +668,32 @@ ir_constant::ir_constant(int integer, unsigned vector_elements)
}
}
ir_constant::ir_constant(uint64_t u64, unsigned vector_elements)
: ir_rvalue(ir_type_constant)
{
assert(vector_elements <= 4);
this->type = glsl_type::get_instance(GLSL_TYPE_UINT64, vector_elements, 1);
for (unsigned i = 0; i < vector_elements; i++) {
this->value.u64[i] = u64;
}
for (unsigned i = vector_elements; i < 16; i++) {
this->value.u64[i] = 0;
}
}
ir_constant::ir_constant(int64_t int64, unsigned vector_elements)
: ir_rvalue(ir_type_constant)
{
assert(vector_elements <= 4);
this->type = glsl_type::get_instance(GLSL_TYPE_INT64, vector_elements, 1);
for (unsigned i = 0; i < vector_elements; i++) {
this->value.i64[i] = int64;
}
for (unsigned i = vector_elements; i < 16; i++) {
this->value.i64[i] = 0;
}
}
ir_constant::ir_constant(bool b, unsigned vector_elements)
: ir_rvalue(ir_type_constant)
{
@@ -716,6 +797,11 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
for (unsigned i = 0; i < type->components(); i++)
this->value.d[i] = value->value.d[0];
break;
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
for (unsigned i = 0; i < type->components(); i++)
this->value.u64[i] = value->value.u64[0];
break;
case GLSL_TYPE_BOOL:
for (unsigned i = 0; i < type->components(); i++)
this->value.b[i] = value->value.b[0];
@@ -778,6 +864,12 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
case GLSL_TYPE_DOUBLE:
this->value.d[i] = value->get_double_component(j);
break;
case GLSL_TYPE_UINT64:
this->value.u64[i] = value->get_uint64_component(j);
break;
case GLSL_TYPE_INT64:
this->value.i64[i] = value->get_int64_component(j);
break;
default:
/* FINISHME: What to do? Exceptions are not the answer.
*/
@@ -831,6 +923,8 @@ ir_constant::get_bool_component(unsigned i) const
case GLSL_TYPE_FLOAT: return ((int)this->value.f[i]) != 0;
case GLSL_TYPE_BOOL: return this->value.b[i];
case GLSL_TYPE_DOUBLE: return this->value.d[i] != 0.0;
case GLSL_TYPE_UINT64: return this->value.u64[i] != 0;
case GLSL_TYPE_INT64: return this->value.i64[i] != 0;
default: assert(!"Should not get here."); break;
}
@@ -849,6 +943,8 @@ ir_constant::get_float_component(unsigned i) const
case GLSL_TYPE_FLOAT: return this->value.f[i];
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0f : 0.0f;
case GLSL_TYPE_DOUBLE: return (float) this->value.d[i];
case GLSL_TYPE_UINT64: return (float) this->value.u64[i];
case GLSL_TYPE_INT64: return (float) this->value.i64[i];
default: assert(!"Should not get here."); break;
}
@@ -867,6 +963,8 @@ ir_constant::get_double_component(unsigned i) const
case GLSL_TYPE_FLOAT: return (double) this->value.f[i];
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0 : 0.0;
case GLSL_TYPE_DOUBLE: return this->value.d[i];
case GLSL_TYPE_UINT64: return (double) this->value.u64[i];
case GLSL_TYPE_INT64: return (double) this->value.i64[i];
default: assert(!"Should not get here."); break;
}
@@ -885,6 +983,8 @@ ir_constant::get_int_component(unsigned i) const
case GLSL_TYPE_FLOAT: return (int) this->value.f[i];
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
case GLSL_TYPE_DOUBLE: return (int) this->value.d[i];
case GLSL_TYPE_UINT64: return (int) this->value.u64[i];
case GLSL_TYPE_INT64: return (int) this->value.i64[i];
default: assert(!"Should not get here."); break;
}
@@ -903,6 +1003,48 @@ ir_constant::get_uint_component(unsigned i) const
case GLSL_TYPE_FLOAT: return (unsigned) this->value.f[i];
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
case GLSL_TYPE_DOUBLE: return (unsigned) this->value.d[i];
case GLSL_TYPE_UINT64: return (unsigned) this->value.u64[i];
case GLSL_TYPE_INT64: return (unsigned) this->value.i64[i];
default: assert(!"Should not get here."); break;
}
/* Must return something to make the compiler happy. This is clearly an
* error case.
*/
return 0;
}
int64_t
ir_constant::get_int64_component(unsigned i) const
{
switch (this->type->base_type) {
case GLSL_TYPE_UINT: return this->value.u[i];
case GLSL_TYPE_INT: return this->value.i[i];
case GLSL_TYPE_FLOAT: return (int64_t) this->value.f[i];
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
case GLSL_TYPE_DOUBLE: return (int64_t) this->value.d[i];
case GLSL_TYPE_UINT64: return (int64_t) this->value.u64[i];
case GLSL_TYPE_INT64: return this->value.i64[i];
default: assert(!"Should not get here."); break;
}
/* Must return something to make the compiler happy. This is clearly an
* error case.
*/
return 0;
}
uint64_t
ir_constant::get_uint64_component(unsigned i) const
{
switch (this->type->base_type) {
case GLSL_TYPE_UINT: return this->value.u[i];
case GLSL_TYPE_INT: return this->value.i[i];
case GLSL_TYPE_FLOAT: return (uint64_t) this->value.f[i];
case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0;
case GLSL_TYPE_DOUBLE: return (uint64_t) this->value.d[i];
case GLSL_TYPE_UINT64: return this->value.u64[i];
case GLSL_TYPE_INT64: return (uint64_t) this->value.i64[i];
default: assert(!"Should not get here."); break;
}
@@ -968,6 +1110,8 @@ ir_constant::copy_offset(ir_constant *src, int offset)
case GLSL_TYPE_INT:
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
case GLSL_TYPE_BOOL: {
unsigned int size = src->type->components();
assert (size <= this->type->components() - offset);
@@ -988,6 +1132,12 @@ ir_constant::copy_offset(ir_constant *src, int offset)
case GLSL_TYPE_DOUBLE:
value.d[i+offset] = src->get_double_component(i);
break;
case GLSL_TYPE_UINT64:
value.u64[i+offset] = src->get_uint64_component(i);
break;
case GLSL_TYPE_INT64:
value.i64[i+offset] = src->get_int64_component(i);
break;
default: // Shut up the compiler
break;
}
@@ -1047,6 +1197,12 @@ ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask)
case GLSL_TYPE_DOUBLE:
value.d[i+offset] = src->get_double_component(id++);
break;
case GLSL_TYPE_UINT64:
value.u64[i+offset] = src->get_uint64_component(id++);
break;
case GLSL_TYPE_INT64:
value.i64[i+offset] = src->get_int64_component(id++);
break;
default:
assert(!"Should not get here.");
return;
@@ -1111,6 +1267,14 @@ ir_constant::has_value(const ir_constant *c) const
if (this->value.d[i] != c->value.d[i])
return false;
break;
case GLSL_TYPE_UINT64:
if (this->value.u64[i] != c->value.u64[i])
return false;
break;
case GLSL_TYPE_INT64:
if (this->value.i64[i] != c->value.i64[i])
return false;
break;
default:
assert(!"Should not get here.");
return false;
@@ -1152,6 +1316,14 @@ ir_constant::is_value(float f, int i) const
if (this->value.d[c] != double(f))
return false;
break;
case GLSL_TYPE_UINT64:
if (this->value.u64[c] != uint64_t(i))
return false;
break;
case GLSL_TYPE_INT64:
if (this->value.i64[c] != i)
return false;
break;
default:
/* The only other base types are structures, arrays, and samplers.
* Samplers cannot be constants, and the others should have been

View File

@@ -2093,6 +2093,8 @@ union ir_constant_data {
float f[16];
bool b[16];
double d[16];
uint64_t u64[16];
int64_t i64[16];
};
@@ -2104,6 +2106,8 @@ public:
ir_constant(int i, unsigned vector_elements=1);
ir_constant(float f, unsigned vector_elements=1);
ir_constant(double d, unsigned vector_elements=1);
ir_constant(uint64_t u64, unsigned vector_elements=1);
ir_constant(int64_t i64, unsigned vector_elements=1);
/**
* Construct an ir_constant from a list of ir_constant values
@@ -2154,6 +2158,8 @@ public:
double get_double_component(unsigned i) const;
int get_int_component(unsigned i) const;
unsigned get_uint_component(unsigned i) const;
int64_t get_int64_component(unsigned i) const;
uint64_t get_uint64_component(unsigned i) const;
/*@}*/
ir_constant *get_array_element(unsigned i) const;
@@ -2377,25 +2383,6 @@ extern void
_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
gl_shader *shader);
extern void
_mesa_glsl_initialize_builtin_functions();
extern ir_function_signature *
_mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state,
const char *name, exec_list *actual_parameters);
extern ir_function *
_mesa_glsl_find_builtin_function_by_name(const char *name);
extern gl_shader *
_mesa_glsl_get_builtin_function_shader(void);
extern ir_function_signature *
_mesa_get_main_function_signature(glsl_symbol_table *symbols);
extern void
_mesa_glsl_release_builtin_functions(void);
extern void
reparent_ir(exec_list *list, void *mem_ctx);

View File

@@ -518,6 +518,30 @@ b2f(operand a)
return expr(ir_unop_b2f, a);
}
ir_expression*
bitcast_d2i64(operand a)
{
return expr(ir_unop_bitcast_d2i64, a);
}
ir_expression*
bitcast_d2u64(operand a)
{
return expr(ir_unop_bitcast_d2u64, a);
}
ir_expression*
bitcast_i642d(operand a)
{
return expr(ir_unop_bitcast_i642d, a);
}
ir_expression*
bitcast_u642d(operand a)
{
return expr(ir_unop_bitcast_u642d, a);
}
ir_expression *
interpolate_at_centroid(operand a)
{

View File

@@ -191,6 +191,12 @@ ir_expression *f2d(operand a);
ir_expression *i2d(operand a);
ir_expression *u2d(operand a);
ir_expression *bitcast_d2i64(operand a);
ir_expression *bitcast_d2u64(operand a);
ir_expression *bitcast_i642d(operand a);
ir_expression *bitcast_u642d(operand a);
ir_expression *min2(operand a, operand b);
ir_expression *max2(operand a, operand b);

View File

@@ -396,13 +396,24 @@ ir_builder_print_visitor::visit(ir_constant *ir)
memcpy(&v, &ir->value.d[i], sizeof(v));
if (v != 0)
/* FIXME: This won't actually work until ARB_gpu_shader_int64
* support lands.
*/
print_without_indent("r%04X_data.u64[%u] = 0x%016" PRIx64 "; /* %g */\n",
my_index, i, v, ir->value.d[i]);
break;
}
case GLSL_TYPE_UINT64:
if (ir->value.u64[i] != 0)
print_without_indent("r%04X_data.u64[%u] = %" PRIu64 ";\n",
my_index,
i,
ir->value.u64[i]);
break;
case GLSL_TYPE_INT64:
if (ir->value.i64[i] != 0)
print_without_indent("r%04X_data.i64[%u] = %" PRId64 ";\n",
my_index,
i,
ir->value.i64[i]);
break;
case GLSL_TYPE_BOOL:
if (ir->value.u[i] != 0)
print_without_indent("r%04X_data.u[%u] = 1;\n", my_index, i);

View File

@@ -337,6 +337,8 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_BOOL:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
return new(mem_ctx) ir_constant(this->type, &this->value);
case GLSL_TYPE_STRUCT: {

View File

@@ -88,6 +88,42 @@ bitcast_f2u(float f)
return u;
}
static double
bitcast_u642d(uint64_t u)
{
assert(sizeof(double) == sizeof(uint64_t));
double d;
memcpy(&d, &u, sizeof(d));
return d;
}
static double
bitcast_i642d(int64_t i)
{
assert(sizeof(double) == sizeof(int64_t));
double d;
memcpy(&d, &i, sizeof(d));
return d;
}
static double
bitcast_d2u64(double d)
{
assert(sizeof(double) == sizeof(uint64_t));
uint64_t u;
memcpy(&u, &d, sizeof(d));
return u;
}
static double
bitcast_d2i64(double d)
{
assert(sizeof(double) == sizeof(int64_t));
int64_t i;
memcpy(&i, &d, sizeof(d));
return i;
}
/**
* Evaluate one component of a floating-point 4x8 unpacking function.
*/

View File

@@ -80,14 +80,16 @@ class type_signature_iter(object):
uint_type = type("unsigned", "u", "GLSL_TYPE_UINT")
int_type = type("int", "i", "GLSL_TYPE_INT")
uint64_type = type("uint64_t", "u64", "GLSL_TYPE_UINT64")
int64_type = type("int64_t", "i64", "GLSL_TYPE_INT64")
float_type = type("float", "f", "GLSL_TYPE_FLOAT")
double_type = type("double", "d", "GLSL_TYPE_DOUBLE")
bool_type = type("bool", "b", "GLSL_TYPE_BOOL")
all_types = (uint_type, int_type, float_type, double_type, bool_type)
numeric_types = (uint_type, int_type, float_type, double_type)
signed_numeric_types = (int_type, float_type, double_type)
integer_types = (uint_type, int_type)
all_types = (uint_type, int_type, float_type, double_type, uint64_type, int64_type, bool_type)
numeric_types = (uint_type, int_type, float_type, double_type, uint64_type, int64_type)
signed_numeric_types = (int_type, float_type, double_type, int64_type)
integer_types = (uint_type, int_type, uint64_type, int64_type)
real_types = (float_type, double_type)
# This template is for operations that can have operands of a several
@@ -418,8 +420,8 @@ ir_expression_operation = [
operation("bit_not", 1, printable_name="~", source_types=integer_types, c_expression="~ {src0}"),
operation("logic_not", 1, printable_name="!", source_types=(bool_type,), c_expression="!{src0}"),
operation("neg", 1, source_types=numeric_types, c_expression={'u': "-((int) {src0})", 'default': "-{src0}"}),
operation("abs", 1, source_types=signed_numeric_types, c_expression={'i': "{src0} < 0 ? -{src0} : {src0}", 'f': "fabsf({src0})", 'd': "fabs({src0})"}),
operation("sign", 1, source_types=signed_numeric_types, c_expression={'i': "({src0} > 0) - ({src0} < 0)", 'f': "float(({src0} > 0.0F) - ({src0} < 0.0F))", 'd': "double(({src0} > 0.0) - ({src0} < 0.0))"}),
operation("abs", 1, source_types=signed_numeric_types, c_expression={'i': "{src0} < 0 ? -{src0} : {src0}", 'f': "fabsf({src0})", 'd': "fabs({src0})", 'i64': "{src0} < 0 ? -{src0} : {src0}"}),
operation("sign", 1, source_types=signed_numeric_types, c_expression={'i': "({src0} > 0) - ({src0} < 0)", 'f': "float(({src0} > 0.0F) - ({src0} < 0.0F))", 'd': "double(({src0} > 0.0) - ({src0} < 0.0))", 'i64': "({src0} > 0) - ({src0} < 0)"}),
operation("rcp", 1, source_types=real_types, c_expression={'f': "{src0} != 0.0F ? 1.0F / {src0} : 0.0F", 'd': "{src0} != 0.0 ? 1.0 / {src0} : 0.0"}),
operation("rsq", 1, source_types=real_types, c_expression={'f': "1.0F / sqrtf({src0})", 'd': "1.0 / sqrt({src0})"}),
operation("sqrt", 1, source_types=real_types, c_expression={'f': "sqrtf({src0})", 'd': "sqrt({src0})"}),
@@ -439,7 +441,7 @@ ir_expression_operation = [
# Boolean-to-float conversion
operation("b2f", 1, source_types=(bool_type,), dest_type=float_type, c_expression="{src0} ? 1.0F : 0.0F"),
# int-to-boolean conversion
operation("i2b", 1, source_types=integer_types, dest_type=bool_type, c_expression="{src0} ? true : false"),
operation("i2b", 1, source_types=(uint_type, int_type), dest_type=bool_type, c_expression="{src0} ? true : false"),
# Boolean-to-int conversion
operation("b2i", 1, source_types=(bool_type,), dest_type=int_type, c_expression="{src0} ? 1 : 0"),
# Unsigned-to-float conversion.
@@ -470,6 +472,37 @@ ir_expression_operation = [
operation("bitcast_u2f", 1, source_types=(uint_type,), dest_type=float_type, c_expression="bitcast_u2f({src0})"),
# 'Bit-identical float-to-uint "conversion"
operation("bitcast_f2u", 1, source_types=(float_type,), dest_type=uint_type, c_expression="bitcast_f2u({src0})"),
# Bit-identical u64-to-double "conversion"
operation("bitcast_u642d", 1, source_types=(uint64_type,), dest_type=double_type, c_expression="bitcast_u642d({src0})"),
# Bit-identical i64-to-double "conversion"
operation("bitcast_i642d", 1, source_types=(int64_type,), dest_type=double_type, c_expression="bitcast_i642d({src0})"),
# Bit-identical double-to_u64 "conversion"
operation("bitcast_d2u64", 1, source_types=(double_type,), dest_type=uint64_type, c_expression="bitcast_d2u64({src0})"),
# Bit-identical double-to-i64 "conversion"
operation("bitcast_d2i64", 1, source_types=(double_type,), dest_type=int64_type, c_expression="bitcast_d2i64({src0})"),
# i64-to-i32 conversion
operation("i642i", 1, source_types=(int64_type,), dest_type=int_type, c_expression="{src0}"),
# ui64-to-i32 conversion
operation("u642i", 1, source_types=(uint64_type,), dest_type=int_type, c_expression="{src0}"),
operation("i642u", 1, source_types=(int64_type,), dest_type=uint_type, c_expression="{src0}"),
operation("u642u", 1, source_types=(uint64_type,), dest_type=uint_type, c_expression="{src0}"),
operation("i642b", 1, source_types=(int64_type,), dest_type=bool_type, c_expression="{src0} != 0"),
operation("i642f", 1, source_types=(int64_type,), dest_type=float_type, c_expression="{src0}"),
operation("u642f", 1, source_types=(uint64_type,), dest_type=float_type, c_expression="{src0}"),
operation("i642d", 1, source_types=(int64_type,), dest_type=double_type, c_expression="{src0}"),
operation("u642d", 1, source_types=(uint64_type,), dest_type=double_type, c_expression="{src0}"),
operation("i2i64", 1, source_types=(int_type,), dest_type=int64_type, c_expression="{src0}"),
operation("u2i64", 1, source_types=(uint_type,), dest_type=int64_type, c_expression="{src0}"),
operation("b2i64", 1, source_types=(bool_type,), dest_type=int64_type, c_expression="{src0}"),
operation("f2i64", 1, source_types=(float_type,), dest_type=int64_type, c_expression="{src0}"),
operation("d2i64", 1, source_types=(double_type,), dest_type=int64_type, c_expression="{src0}"),
operation("i2u64", 1, source_types=(int_type,), dest_type=uint64_type, c_expression="{src0}"),
operation("u2u64", 1, source_types=(uint_type,), dest_type=uint64_type, c_expression="{src0}"),
operation("f2u64", 1, source_types=(float_type,), dest_type=uint64_type, c_expression="{src0}"),
operation("d2u64", 1, source_types=(double_type,), dest_type=uint64_type, c_expression="{src0}"),
operation("u642i64", 1, source_types=(uint64_type,), dest_type=int64_type, c_expression="{src0}"),
operation("i642u64", 1, source_types=(int64_type,), dest_type=uint64_type, c_expression="{src0}"),
# Unary floating-point rounding operations.
operation("trunc", 1, source_types=real_types, c_expression={'f': "truncf({src0})", 'd': "trunc({src0})"}),
@@ -503,10 +536,10 @@ ir_expression_operation = [
operation("unpack_half_2x16", 1, printable_name="unpackHalf2x16", source_types=(uint_type,), dest_type=float_type, c_expression="unpack_2x16(unpack_half_1x16, op[0]->value.u[0], &data.f[0], &data.f[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
# Bit operations, part of ARB_gpu_shader5.
operation("bitfield_reverse", 1, source_types=integer_types, c_expression="bitfield_reverse({src0})"),
operation("bit_count", 1, source_types=integer_types, dest_type=int_type, c_expression="_mesa_bitcount({src0})"),
operation("find_msb", 1, source_types=integer_types, dest_type=int_type, c_expression={'u': "find_msb_uint({src0})", 'i': "find_msb_int({src0})"}),
operation("find_lsb", 1, source_types=integer_types, dest_type=int_type, c_expression="find_msb_uint({src0} & -{src0})"),
operation("bitfield_reverse", 1, source_types=(uint_type, int_type), c_expression="bitfield_reverse({src0})"),
operation("bit_count", 1, source_types=(uint_type, int_type), dest_type=int_type, c_expression="_mesa_bitcount({src0})"),
operation("find_msb", 1, source_types=(uint_type, int_type), dest_type=int_type, c_expression={'u': "find_msb_uint({src0})", 'i': "find_msb_int({src0})"}),
operation("find_lsb", 1, source_types=(uint_type, int_type), dest_type=int_type, c_expression="find_msb_uint({src0} & -{src0})"),
operation("saturate", 1, printable_name="sat", source_types=(float_type,), c_expression="CLAMP({src0}, 0.0f, 1.0f)"),
@@ -543,12 +576,18 @@ ir_expression_operation = [
operation("vote_all", 1),
operation("vote_eq", 1),
# 64-bit integer packing ops.
operation("pack_int_2x32", 1, printable_name="packInt2x32", source_types=(int_type,), dest_type=int64_type, c_expression="memcpy(&data.i64[0], &op[0]->value.i[0], sizeof(int64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("pack_uint_2x32", 1, printable_name="packUint2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", source_types=(int64_type,), dest_type=int_type, c_expression="memcpy(&data.i[0], &op[0]->value.i64[0], sizeof(int64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
operation("add", 2, printable_name="+", source_types=numeric_types, c_expression="{src0} + {src1}", flags=vector_scalar_operation),
operation("sub", 2, printable_name="-", source_types=numeric_types, c_expression="{src0} - {src1}", flags=vector_scalar_operation),
# "Floating-point or low 32-bit integer multiply."
operation("mul", 2, printable_name="*", source_types=numeric_types, c_expression="{src0} * {src1}"),
operation("imul_high", 2), # Calculates the high 32-bits of a 64-bit multiply.
operation("div", 2, printable_name="/", source_types=numeric_types, c_expression={'u': "{src1} == 0 ? 0 : {src0} / {src1}", 'i': "{src1} == 0 ? 0 : {src0} / {src1}", 'default': "{src0} / {src1}"}, flags=vector_scalar_operation),
operation("div", 2, printable_name="/", source_types=numeric_types, c_expression={'u': "{src1} == 0 ? 0 : {src0} / {src1}", 'i': "{src1} == 0 ? 0 : {src0} / {src1}", 'u64': "{src1} == 0 ? 0 : {src0} / {src1}", 'i64': "{src1} == 0 ? 0 : {src0} / {src1}", 'default': "{src0} / {src1}"}, flags=vector_scalar_operation),
# Returns the carry resulting from the addition of the two arguments.
operation("carry", 2),
@@ -561,7 +600,7 @@ ir_expression_operation = [
#
# We don't use fmod because it rounds toward zero; GLSL specifies the use
# of floor.
operation("mod", 2, printable_name="%", source_types=numeric_types, c_expression={'u': "{src1} == 0 ? 0 : {src0} % {src1}", 'i': "{src1} == 0 ? 0 : {src0} % {src1}", 'f': "{src0} - {src1} * floorf({src0} / {src1})", 'd': "{src0} - {src1} * floor({src0} / {src1})"}, flags=vector_scalar_operation),
operation("mod", 2, printable_name="%", source_types=numeric_types, c_expression={'u': "{src1} == 0 ? 0 : {src0} % {src1}", 'i': "{src1} == 0 ? 0 : {src0} % {src1}", 'f': "{src0} - {src1} * floorf({src0} / {src1})", 'd': "{src0} - {src1} * floor({src0} / {src1})", 'u64': "{src1} == 0 ? 0 : {src0} % {src1}", 'i64': "{src1} == 0 ? 0 : {src0} % {src1}"}, flags=vector_scalar_operation),
# Binary comparison operators which return a boolean vector.
# The type of both operands must be equal.

View File

@@ -52,6 +52,12 @@
#define DDIV_TO_MUL_RCP 0x100000
#define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
/* Opertaions for lower_64bit_integer_instructions() */
#define MUL64 (1U << 0)
#define SIGN64 (1U << 1)
#define DIV64 (1U << 2)
#define MOD64 (1U << 3)
/**
* \see class lower_packing_builtins_visitor
*/
@@ -164,3 +170,6 @@ void propagate_invariance(exec_list *instructions);
ir_rvalue *
compare_index_block(exec_list *instructions, ir_variable *index,
unsigned base, unsigned components, void *mem_ctx);
bool lower_64bit_integer_instructions(exec_list *instructions,
unsigned what_to_lower);

View File

@@ -21,6 +21,8 @@
* DEALINGS IN THE SOFTWARE.
*/
#define __STDC_FORMAT_MACROS 1
#include <inttypes.h> /* for PRIx64 macro */
#include "ir_print_visitor.h"
#include "compiler/glsl_types.h"
#include "glsl_parser_extras.h"
@@ -476,6 +478,8 @@ void ir_print_visitor::visit(ir_constant *ir)
else
fprintf(f, "%f", ir->value.f[i]);
break;
case GLSL_TYPE_UINT64:fprintf(f, "%" PRIu64, ir->value.u64[i]); break;
case GLSL_TYPE_INT64: fprintf(f, "%" PRIi64, ir->value.i64[i]); break;
case GLSL_TYPE_BOOL: fprintf(f, "%d", ir->value.b[i]); break;
case GLSL_TYPE_DOUBLE:
if (ir->value.d[i] == 0.0)

View File

@@ -253,7 +253,8 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_unop_sign:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT ||
ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE ||
ir->operands[0]->type->base_type == GLSL_TYPE_INT64);
assert(ir->type == ir->operands[0]->type);
break;
@@ -331,6 +332,102 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->type->base_type == GLSL_TYPE_UINT);
break;
case ir_unop_bitcast_u642d:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT64);
assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
break;
case ir_unop_bitcast_i642d:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT64);
assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
break;
case ir_unop_bitcast_d2u64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
assert(ir->type->base_type == GLSL_TYPE_UINT64);
break;
case ir_unop_bitcast_d2i64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
assert(ir->type->base_type == GLSL_TYPE_INT64);
break;
case ir_unop_i642i:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT64);
assert(ir->type->base_type == GLSL_TYPE_INT);
break;
case ir_unop_u642i:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT64);
assert(ir->type->base_type == GLSL_TYPE_INT);
break;
case ir_unop_i642u:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT64);
assert(ir->type->base_type == GLSL_TYPE_UINT);
break;
case ir_unop_u642u:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT64);
assert(ir->type->base_type == GLSL_TYPE_UINT);
break;
case ir_unop_i642b:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT64);
assert(ir->type->base_type == GLSL_TYPE_BOOL);
break;
case ir_unop_i642f:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT64);
assert(ir->type->base_type == GLSL_TYPE_FLOAT);
break;
case ir_unop_u642f:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT64);
assert(ir->type->base_type == GLSL_TYPE_FLOAT);
break;
case ir_unop_i642d:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT64);
assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
break;
case ir_unop_u642d:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT64);
assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
break;
case ir_unop_i2i64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
assert(ir->type->base_type == GLSL_TYPE_INT64);
break;
case ir_unop_u2i64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
assert(ir->type->base_type == GLSL_TYPE_INT64);
break;
case ir_unop_b2i64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
assert(ir->type->base_type == GLSL_TYPE_INT64);
break;
case ir_unop_f2i64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
assert(ir->type->base_type == GLSL_TYPE_INT64);
break;
case ir_unop_d2i64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
assert(ir->type->base_type == GLSL_TYPE_INT64);
break;
case ir_unop_i2u64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
assert(ir->type->base_type == GLSL_TYPE_UINT64);
break;
case ir_unop_u2u64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
assert(ir->type->base_type == GLSL_TYPE_UINT64);
break;
case ir_unop_f2u64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
assert(ir->type->base_type == GLSL_TYPE_UINT64);
break;
case ir_unop_d2u64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
assert(ir->type->base_type == GLSL_TYPE_UINT64);
break;
case ir_unop_u642i64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT64);
assert(ir->type->base_type == GLSL_TYPE_INT64);
break;
case ir_unop_i642u64:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT64);
assert(ir->type->base_type == GLSL_TYPE_UINT64);
break;
case ir_unop_trunc:
case ir_unop_round_even:
case ir_unop_ceil:
@@ -370,6 +467,16 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->operands[0]->type == glsl_type::uvec2_type);
break;
case ir_unop_pack_int_2x32:
assert(ir->type == glsl_type::int64_t_type);
assert(ir->operands[0]->type == glsl_type::ivec2_type);
break;
case ir_unop_pack_uint_2x32:
assert(ir->type == glsl_type::uint64_t_type);
assert(ir->operands[0]->type == glsl_type::uvec2_type);
break;
case ir_unop_unpack_snorm_2x16:
case ir_unop_unpack_unorm_2x16:
case ir_unop_unpack_half_2x16:
@@ -388,6 +495,16 @@ ir_validate::visit_leave(ir_expression *ir)
assert(ir->operands[0]->type == glsl_type::double_type);
break;
case ir_unop_unpack_int_2x32:
assert(ir->type == glsl_type::ivec2_type);
assert(ir->operands[0]->type == glsl_type::int64_t_type);
break;
case ir_unop_unpack_uint_2x32:
assert(ir->type == glsl_type::uvec2_type);
assert(ir->operands[0]->type == glsl_type::uint64_t_type);
break;
case ir_unop_bitfield_reverse:
assert(ir->operands[0]->type == ir->type);
assert(ir->type->is_integer());
@@ -537,7 +654,7 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_binop_lshift:
case ir_binop_rshift:
assert(ir->operands[0]->type->is_integer() &&
assert(ir->operands[0]->type->is_integer_32_64() &&
ir->operands[1]->type->is_integer());
if (ir->operands[0]->type->is_scalar()) {
assert(ir->operands[1]->type->is_scalar());
@@ -555,7 +672,7 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_binop_bit_or:
assert(ir->operands[0]->type->base_type ==
ir->operands[1]->type->base_type);
assert(ir->type->is_integer());
assert(ir->type->is_integer_32_64());
if (ir->operands[0]->type->is_vector() &&
ir->operands[1]->type->is_vector()) {
assert(ir->operands[0]->type->vector_elements ==

View File

@@ -64,6 +64,8 @@ copy_constant_to_storage(union gl_constant_value *storage,
storage[i].f = val->value.f[i];
break;
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
/* XXX need to check on big-endian */
memcpy(&storage[i * 2].u, &val->value.d[i], sizeof(double));
break;

View File

@@ -535,7 +535,7 @@ private:
const char *str_end;
while((str_start = strchr(name_copy, '[')) &&
(str_end = strchr(name_copy, ']'))) {
memmove(str_start, str_end + 1, 1 + strlen(str_end + 1));
memmove(str_start, str_end + 1, 1 + strlen(str_end));
}
unsigned index = 0;
@@ -1042,12 +1042,10 @@ find_empty_block(struct gl_shader_program *prog,
static void
link_setup_uniform_remap_tables(struct gl_context *ctx,
struct gl_shader_program *prog,
unsigned num_explicit_uniform_locs)
struct gl_shader_program *prog)
{
unsigned total_entries = num_explicit_uniform_locs;
unsigned empty_locs =
prog->NumUniformRemapTable - num_explicit_uniform_locs;
unsigned total_entries = prog->NumExplicitUniformLocations;
unsigned empty_locs = prog->NumUniformRemapTable - total_entries;
/* Reserve all the explicit locations of the active uniforms. */
for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
@@ -1206,8 +1204,7 @@ link_setup_uniform_remap_tables(struct gl_context *ctx,
static void
link_assign_uniform_storage(struct gl_context *ctx,
struct gl_shader_program *prog,
const unsigned num_data_slots,
unsigned num_explicit_uniform_locs)
const unsigned num_data_slots)
{
/* On the outside chance that there were no uniforms, bail out.
*/
@@ -1266,15 +1263,14 @@ link_assign_uniform_storage(struct gl_context *ctx,
assert(parcel.values == data_end);
#endif
link_setup_uniform_remap_tables(ctx, prog, num_explicit_uniform_locs);
link_setup_uniform_remap_tables(ctx, prog);
link_set_uniform_initializers(prog, boolean_true);
}
void
link_assign_uniform_locations(struct gl_shader_program *prog,
struct gl_context *ctx,
unsigned int num_explicit_uniform_locs)
struct gl_context *ctx)
{
ralloc_free(prog->data->UniformStorage);
prog->data->UniformStorage = NULL;
@@ -1335,6 +1331,5 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size);
delete hiddenUniforms;
link_assign_uniform_storage(ctx, prog, uniform_size.num_values,
num_explicit_uniform_locs);
link_assign_uniform_storage(ctx, prog, uniform_size.num_values);
}

View File

@@ -106,8 +106,9 @@ create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name,
}
}
bool
static bool
process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
struct gl_shader_program *prog,
unsigned *num_tfeedback_decls,
char ***varying_names)
{
@@ -118,8 +119,9 @@ process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
* xfb_stride to interface block members so this will catch that case also.
*/
for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
if (sh->info.TransformFeedback.BufferStride[j]) {
if (prog->TransformFeedback.BufferStride[j]) {
has_xfb_qualifiers = true;
break;
}
}
@@ -573,7 +575,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
* Demote shader inputs and outputs that are not used in other stages, and
* remove them via dead code elimination.
*/
void
static void
remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
gl_linked_shader *sh,
enum ir_variable_mode mode)
@@ -743,10 +745,12 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
unsigned actual_array_size;
switch (this->lowered_builtin_array_variable) {
case clip_distance:
actual_array_size = prog->LastClipDistanceArraySize;
actual_array_size = prog->last_vert_prog ?
prog->last_vert_prog->info.clip_distance_array_size : 0;
break;
case cull_distance:
actual_array_size = prog->LastCullDistanceArraySize;
actual_array_size = prog->last_vert_prog ?
prog->last_vert_prog->info.cull_distance_array_size : 0;
break;
case tess_level_outer:
actual_array_size = 4;
@@ -1014,7 +1018,7 @@ tfeedback_decl::find_candidate(gl_shader_program *prog,
* If an error occurs, the error is reported through linker_error() and false
* is returned.
*/
bool
static bool
parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
const void *mem_ctx, unsigned num_names,
char **varying_names, tfeedback_decl *decls)
@@ -1069,11 +1073,14 @@ cmp_xfb_offset(const void * x_generic, const void * y_generic)
* If an error occurs, the error is reported through linker_error() and false
* is returned.
*/
bool
static bool
store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
unsigned num_tfeedback_decls,
tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers)
{
if (!prog->last_vert_prog)
return true;
/* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
* tracking the number of buffers doesn't overflow.
*/
@@ -1082,7 +1089,7 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
bool separate_attribs_mode =
prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
struct gl_program *xfb_prog = prog->xfb_program;
struct gl_program *xfb_prog = prog->last_vert_prog;
xfb_prog->sh.LinkedTransformFeedback =
rzalloc(xfb_prog, struct gl_transform_feedback_info);
@@ -1985,7 +1992,7 @@ canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
* 64 bit map. Per-vertex and per-patch both have separate location domains
* with a max of MAX_VARYING.
*/
uint64_t
static uint64_t
reserved_varying_slot(struct gl_linked_shader *stage,
ir_variable_mode io_mode)
{
@@ -2042,7 +2049,7 @@ reserved_varying_slot(struct gl_linked_shader *stage,
* be NULL. In this case, varying locations are assigned solely based on the
* requirements of transform feedback.
*/
bool
static bool
assign_varying_locations(struct gl_context *ctx,
void *mem_ctx,
struct gl_shader_program *prog,
@@ -2373,3 +2380,160 @@ check_against_input_limit(struct gl_context *ctx,
return true;
}
bool
link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last,
struct gl_context *ctx, void *mem_ctx)
{
bool has_xfb_qualifiers = false;
unsigned num_tfeedback_decls = 0;
char **varying_names = NULL;
tfeedback_decl *tfeedback_decls = NULL;
/* From the ARB_enhanced_layouts spec:
*
* "If the shader used to record output variables for transform feedback
* varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
* qualifiers, the values specified by TransformFeedbackVaryings are
* ignored, and the set of variables captured for transform feedback is
* instead derived from the specified layout qualifiers."
*/
for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
/* Find last stage before fragment shader */
if (prog->_LinkedShaders[i]) {
has_xfb_qualifiers =
process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
prog, &num_tfeedback_decls,
&varying_names);
break;
}
}
if (!has_xfb_qualifiers) {
num_tfeedback_decls = prog->TransformFeedback.NumVarying;
varying_names = prog->TransformFeedback.VaryingNames;
}
if (num_tfeedback_decls != 0) {
/* From GL_EXT_transform_feedback:
* A program will fail to link if:
*
* * the <count> specified by TransformFeedbackVaryingsEXT is
* non-zero, but the program object has no vertex or geometry
* shader;
*/
if (first >= MESA_SHADER_FRAGMENT) {
linker_error(prog, "Transform feedback varyings specified, but "
"no vertex, tessellation, or geometry shader is "
"present.\n");
return false;
}
tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl,
num_tfeedback_decls);
if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls,
varying_names, tfeedback_decls))
return false;
}
/* If there is no fragment shader we need to set transform feedback.
*
* For SSO we also need to assign output locations. We assign them here
* because we need to do it for both single stage programs and multi stage
* programs.
*/
if (last < MESA_SHADER_FRAGMENT &&
(num_tfeedback_decls != 0 || prog->SeparateShader)) {
const uint64_t reserved_out_slots =
reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out);
if (!assign_varying_locations(ctx, mem_ctx, prog,
prog->_LinkedShaders[last], NULL,
num_tfeedback_decls, tfeedback_decls,
reserved_out_slots))
return false;
}
if (last <= MESA_SHADER_FRAGMENT) {
/* Remove unused varyings from the first/last stage unless SSO */
remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
prog->_LinkedShaders[first],
ir_var_shader_in);
remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
prog->_LinkedShaders[last],
ir_var_shader_out);
/* If the program is made up of only a single stage */
if (first == last) {
gl_linked_shader *const sh = prog->_LinkedShaders[last];
do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL);
do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls,
tfeedback_decls);
if (prog->SeparateShader) {
const uint64_t reserved_slots =
reserved_varying_slot(sh, ir_var_shader_in);
/* Assign input locations for SSO, output locations are already
* assigned.
*/
if (!assign_varying_locations(ctx, mem_ctx, prog,
NULL /* producer */,
sh /* consumer */,
0 /* num_tfeedback_decls */,
NULL /* tfeedback_decls */,
reserved_slots))
return false;
}
} else {
/* Linking the stages in the opposite order (from fragment to vertex)
* ensures that inter-shader outputs written to in an earlier stage
* are eliminated if they are (transitively) not used in a later
* stage.
*/
int next = last;
for (int i = next - 1; i >= 0; i--) {
if (prog->_LinkedShaders[i] == NULL && i != 0)
continue;
gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
const uint64_t reserved_out_slots =
reserved_varying_slot(sh_i, ir_var_shader_out);
const uint64_t reserved_in_slots =
reserved_varying_slot(sh_next, ir_var_shader_in);
do_dead_builtin_varyings(ctx, sh_i, sh_next,
next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
tfeedback_decls);
if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
tfeedback_decls,
reserved_out_slots | reserved_in_slots))
return false;
/* This must be done after all dead varyings are eliminated. */
if (sh_i != NULL) {
unsigned slots_used = _mesa_bitcount_64(reserved_out_slots);
if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) {
return false;
}
}
unsigned slots_used = _mesa_bitcount_64(reserved_in_slots);
if (!check_against_input_limit(ctx, prog, sh_next, slots_used))
return false;
next = i;
}
}
}
if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls,
has_xfb_qualifiers))
return false;
return true;
}

View File

@@ -176,6 +176,14 @@ private:
case GL_DOUBLE_MAT4:
case GL_DOUBLE_MAT4x2:
case GL_DOUBLE_MAT4x3:
case GL_INT64_ARB:
case GL_INT64_VEC2_ARB:
case GL_INT64_VEC3_ARB:
case GL_INT64_VEC4_ARB:
case GL_UNSIGNED_INT64_ARB:
case GL_UNSIGNED_INT64_VEC2_ARB:
case GL_UNSIGNED_INT64_VEC3_ARB:
case GL_UNSIGNED_INT64_VEC4_ARB:
return true;
default:
return false;
@@ -288,57 +296,13 @@ private:
unsigned stream_id;
};
bool
link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last,
struct gl_context *ctx, void *mem_ctx);
void
cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
gl_linked_shader *producer,
gl_linked_shader *consumer);
bool
parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
const void *mem_ctx, unsigned num_names,
char **varying_names, tfeedback_decl *decls);
bool
process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
unsigned *num_tfeedback_decls,
char ***varying_names);
void
remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
gl_linked_shader *sh,
enum ir_variable_mode mode);
bool
store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
unsigned num_tfeedback_decls,
tfeedback_decl *tfeedback_decls,
bool has_xfb_qualifiers);
bool
assign_varying_locations(struct gl_context *ctx,
void *mem_ctx,
struct gl_shader_program *prog,
gl_linked_shader *producer,
gl_linked_shader *consumer,
unsigned num_tfeedback_decls,
tfeedback_decl *tfeedback_decls,
const uint64_t reserved_slots);
uint64_t
reserved_varying_slot(struct gl_linked_shader *stage,
ir_variable_mode io_mode);
bool
check_against_output_limit(struct gl_context *ctx,
struct gl_shader_program *prog,
gl_linked_shader *producer,
unsigned num_explicit_locations);
bool
check_against_input_limit(struct gl_context *ctx,
struct gl_shader_program *prog,
gl_linked_shader *consumer,
unsigned num_explicit_locations);
#endif /* GLSL_LINK_VARYINGS_H */

File diff suppressed because it is too large Load Diff

View File

@@ -35,8 +35,7 @@ link_invalidate_variable_locations(exec_list *ir);
extern void
link_assign_uniform_locations(struct gl_shader_program *prog,
struct gl_context *ctx,
unsigned int num_explicit_uniform_locs);
struct gl_context *ctx);
extern void
link_set_uniform_initializers(struct gl_shader_program *prog,

View File

@@ -28,6 +28,7 @@
#include "program/prog_instruction.h"
#include "program/prog_statevars.h"
#include "util/bitscan.h"
#include "builtin_functions.h"
using namespace ir_builder;

View File

@@ -0,0 +1,403 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file lower_int64.cpp
*
* Lower 64-bit operations to 32-bit operations. Each 64-bit value is lowered
* to a uvec2. For each operation that can be lowered, there is a function
* called __builtin_foo with the same number of parameters that takes uvec2
* sources and produces uvec2 results. An operation like
*
* uint64_t(x) * uint64_t(y)
*
* becomes
*
* packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y)));
*/
#include "main/macros.h"
#include "compiler/glsl_types.h"
#include "ir.h"
#include "ir_rvalue_visitor.h"
#include "ir_builder.h"
#include "ir_optimization.h"
#include "util/hash_table.h"
#include "builtin_functions.h"
typedef ir_function_signature *(*function_generator)(void *mem_ctx,
builtin_available_predicate avail);
using namespace ir_builder;
namespace lower_64bit {
void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src);
ir_dereference_variable *compact_destination(ir_factory &,
const glsl_type *type,
ir_variable *result[4]);
ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
ir_expression *ir,
ir_function_signature *callee);
};
using namespace lower_64bit;
namespace {
class lower_64bit_visitor : public ir_rvalue_visitor {
public:
lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower)
: progress(false), lower(lower), instructions(instructions),
function_list(), added_functions(&function_list, mem_ctx)
{
functions = _mesa_hash_table_create(mem_ctx,
_mesa_key_hash_string,
_mesa_key_string_equal);
foreach_in_list(ir_instruction, node, instructions) {
ir_function *const f = node->as_function();
if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0)
continue;
add_function(f);
}
}
~lower_64bit_visitor()
{
_mesa_hash_table_destroy(functions, NULL);
}
void handle_rvalue(ir_rvalue **rvalue);
void add_function(ir_function *f)
{
_mesa_hash_table_insert(functions, f->name, f);
}
ir_function *find_function(const char *name)
{
struct hash_entry *const entry =
_mesa_hash_table_search(functions, name);
return entry != NULL ? (ir_function *) entry->data : NULL;
}
bool progress;
private:
unsigned lower; /** Bitfield of which operations to lower */
exec_list *instructions;
/** Hashtable containing all of the known functions in the IR */
struct hash_table *functions;
public:
exec_list function_list;
private:
ir_factory added_functions;
ir_rvalue *handle_op(ir_expression *ir, const char *function_name,
function_generator generator);
};
} /* anonymous namespace */
static bool
is_integer_64(const glsl_type *t)
{
return t->base_type == GLSL_TYPE_UINT64 || t->base_type == GLSL_TYPE_INT64;
}
/**
* Determine if a particular type of lowering should occur
*/
#define lowering(x) (this->lower & x)
bool
lower_64bit_integer_instructions(exec_list *instructions,
unsigned what_to_lower)
{
if (instructions->is_empty())
return false;
ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw();
void *const mem_ctx = ralloc_parent(first_inst);
lower_64bit_visitor v(mem_ctx, instructions, what_to_lower);
visit_list_elements(&v, instructions);
if (v.progress && !v.function_list.is_empty()) {
/* Move all of the nodes from function_list to the head if the incoming
* instruction list.
*/
exec_node *const after = &instructions->head_sentinel;
exec_node *const before = instructions->head_sentinel.next;
exec_node *const head = v.function_list.head_sentinel.next;
exec_node *const tail = v.function_list.tail_sentinel.prev;
before->next = head;
head->prev = before;
after->prev = tail;
tail->next = after;
}
return v.progress;
}
/**
* Expand individual 64-bit values to uvec2 values
*
* Each operation is in one of a few forms.
*
* vector op vector
* vector op scalar
* scalar op vector
* scalar op scalar
*
* In the 'vector op vector' case, the two vectors must have the same size.
* In a way, the 'scalar op scalar' form is special case of the 'vector op
* vector' form.
*
* This method generates a new set of uvec2 values for each element of a
* single operand. If the operand is a scalar, the uvec2 is replicated
* multiple times. A value like
*
* u64vec3(a) + u64vec3(b)
*
* becomes
*
* u64vec3 tmp0 = u64vec3(a) + u64vec3(b);
* uvec2 tmp1 = unpackUint2x32(tmp0.x);
* uvec2 tmp2 = unpackUint2x32(tmp0.y);
* uvec2 tmp3 = unpackUint2x32(tmp0.z);
*
* and the returned operands array contains ir_variable pointers to
*
* { tmp1, tmp2, tmp3, tmp1 }
*/
void
lower_64bit::expand_source(ir_factory &body,
ir_rvalue *val,
ir_variable **expanded_src)
{
assert(val->type->base_type == GLSL_TYPE_UINT64 ||
val->type->base_type == GLSL_TYPE_INT64);
ir_variable *const temp = body.make_temp(val->type, "tmp");
body.emit(assign(temp, val));
const ir_expression_operation unpack_opcode =
val->type->base_type == GLSL_TYPE_UINT64
? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;
const glsl_type *const type =
val->type->base_type == GLSL_TYPE_UINT64
? glsl_type::uvec2_type : glsl_type::ivec2_type;
unsigned i;
for (i = 0; i < val->type->vector_elements; i++) {
expanded_src[i] = body.make_temp(type, "expanded_64bit_source");
body.emit(assign(expanded_src[i],
expr(unpack_opcode, swizzle(temp, i, 1))));
}
for (/* empty */; i < 4; i++)
expanded_src[i] = expanded_src[0];
}
/**
* Convert a series of uvec2 results into a single 64-bit integer vector
*/
ir_dereference_variable *
lower_64bit::compact_destination(ir_factory &body,
const glsl_type *type,
ir_variable *result[4])
{
const ir_expression_operation pack_opcode =
type->base_type == GLSL_TYPE_UINT64
? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32;
ir_variable *const compacted_result =
body.make_temp(type, "compacted_64bit_result");
for (unsigned i = 0; i < type->vector_elements; i++) {
body.emit(assign(compacted_result,
expr(pack_opcode, result[i]),
1U << i));
}
void *const mem_ctx = ralloc_parent(compacted_result);
return new(mem_ctx) ir_dereference_variable(compacted_result);
}
ir_rvalue *
lower_64bit::lower_op_to_function_call(ir_instruction *base_ir,
ir_expression *ir,
ir_function_signature *callee)
{
const unsigned num_operands = ir->get_num_operands();
ir_variable *src[4][4];
ir_variable *dst[4];
void *const mem_ctx = ralloc_parent(ir);
exec_list instructions;
unsigned source_components = 0;
const glsl_type *const result_type =
ir->type->base_type == GLSL_TYPE_UINT64
? glsl_type::uvec2_type : glsl_type::ivec2_type;
ir_factory body(&instructions, mem_ctx);
for (unsigned i = 0; i < num_operands; i++) {
expand_source(body, ir->operands[i], src[i]);
if (ir->operands[i]->type->vector_elements > source_components)
source_components = ir->operands[i]->type->vector_elements;
}
for (unsigned i = 0; i < source_components; i++) {
dst[i] = body.make_temp(result_type, "expanded_64bit_result");
exec_list parameters;
for (unsigned j = 0; j < num_operands; j++)
parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i]));
ir_dereference_variable *const return_deref =
new(mem_ctx) ir_dereference_variable(dst[i]);
ir_call *const c = new(mem_ctx) ir_call(callee,
return_deref,
&parameters);
body.emit(c);
}
ir_rvalue *const rv = compact_destination(body, ir->type, dst);
/* Move all of the nodes from instructions between base_ir and the
* instruction before it.
*/
exec_node *const after = base_ir;
exec_node *const before = after->prev;
exec_node *const head = instructions.head_sentinel.next;
exec_node *const tail = instructions.tail_sentinel.prev;
before->next = head;
head->prev = before;
after->prev = tail;
tail->next = after;
return rv;
}
ir_rvalue *
lower_64bit_visitor::handle_op(ir_expression *ir,
const char *function_name,
function_generator generator)
{
for (unsigned i = 0; i < ir->get_num_operands(); i++)
if (!is_integer_64(ir->operands[i]->type))
return ir;
/* Get a handle to the correct ir_function_signature for the core
* operation.
*/
ir_function_signature *callee = NULL;
ir_function *f = find_function(function_name);
if (f != NULL) {
callee = (ir_function_signature *) f->signatures.get_head();
assert(callee != NULL && callee->ir_type == ir_type_function_signature);
} else {
f = new(base_ir) ir_function(function_name);
callee = generator(base_ir, NULL);
f->add_signature(callee);
add_function(f);
}
return lower_op_to_function_call(this->base_ir, ir, callee);
}
void
lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue)
{
if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression)
return;
ir_expression *const ir = (*rvalue)->as_expression();
assert(ir != NULL);
switch (ir->operation) {
case ir_unop_sign:
if (lowering(SIGN64)) {
*rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64);
this->progress = true;
}
break;
case ir_binop_div:
if (lowering(DIV64)) {
if (ir->type->base_type == GLSL_TYPE_UINT64) {
*rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64);
} else {
*rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64);
}
this->progress = true;
}
break;
case ir_binop_mod:
if (lowering(MOD64)) {
if (ir->type->base_type == GLSL_TYPE_UINT64) {
*rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64);
} else {
*rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64);
}
this->progress = true;
}
break;
case ir_binop_mul:
if (lowering(MUL64)) {
*rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64);
this->progress = true;
}
break;
default:
break;
}
}

View File

@@ -932,21 +932,14 @@ lower_continue:
* break statement if necessary.
*/
return_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
else {
/* Otherwise, ensure that the instructions that follow are only
* executed if the return flag is clear. We can do that by moving
* those instructions into the else clause of the generated if
else
/* Otherwise, all we need to do is ensure that the
* instructions that follow are only executed if the
* return flag is clear. We can do that by moving those
* instructions into the else clause of the generated if
* statement.
*/
move_outer_block_inside(ir, &return_if->else_instructions);
/* In case the loop is embeded inside an if add a new return to
* the return flag then branch and let a future pass tidy it up.
*/
if (this->function.signature->return_type->is_void())
return_if->then_instructions.push_tail(new(ir) ir_return(NULL));
}
ir->insert_after(return_if);
}

View File

@@ -351,6 +351,38 @@ lower_packed_varyings_visitor::bitwise_assign_pack(ir_rvalue *lhs,
rhs = u2i(expr(ir_unop_unpack_double_2x32, rhs));
}
break;
case GLSL_TYPE_INT64:
assert(rhs->type->vector_elements <= 2);
if (rhs->type->vector_elements == 2) {
ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "pack", ir_var_temporary);
assert(lhs->type->vector_elements == 4);
this->out_variables->push_tail(t);
this->out_instructions->push_tail(
assign(t, expr(ir_unop_unpack_int_2x32, swizzle_x(rhs->clone(mem_ctx, NULL))), 0x3));
this->out_instructions->push_tail(
assign(t, expr(ir_unop_unpack_int_2x32, swizzle_y(rhs)), 0xc));
rhs = deref(t).val;
} else {
rhs = expr(ir_unop_unpack_int_2x32, rhs);
}
break;
case GLSL_TYPE_UINT64:
assert(rhs->type->vector_elements <= 2);
if (rhs->type->vector_elements == 2) {
ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "pack", ir_var_temporary);
assert(lhs->type->vector_elements == 4);
this->out_variables->push_tail(t);
this->out_instructions->push_tail(
assign(t, u2i(expr(ir_unop_unpack_uint_2x32, swizzle_x(rhs->clone(mem_ctx, NULL)))), 0x3));
this->out_instructions->push_tail(
assign(t, u2i(expr(ir_unop_unpack_uint_2x32, swizzle_y(rhs))), 0xc));
rhs = deref(t).val;
} else {
rhs = u2i(expr(ir_unop_unpack_uint_2x32, rhs));
}
break;
default:
assert(!"Unexpected type conversion while lowering varyings");
break;
@@ -400,6 +432,36 @@ lower_packed_varyings_visitor::bitwise_assign_unpack(ir_rvalue *lhs,
rhs = expr(ir_unop_pack_double_2x32, i2u(rhs));
}
break;
case GLSL_TYPE_INT64:
assert(lhs->type->vector_elements <= 2);
if (lhs->type->vector_elements == 2) {
ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "unpack", ir_var_temporary);
assert(rhs->type->vector_elements == 4);
this->out_variables->push_tail(t);
this->out_instructions->push_tail(
assign(t, expr(ir_unop_pack_int_2x32, swizzle_xy(rhs->clone(mem_ctx, NULL))), 0x1));
this->out_instructions->push_tail(
assign(t, expr(ir_unop_pack_int_2x32, swizzle(rhs->clone(mem_ctx, NULL), SWIZZLE_ZWZW, 2)), 0x2));
rhs = deref(t).val;
} else {
rhs = expr(ir_unop_pack_int_2x32, rhs);
}
break;
case GLSL_TYPE_UINT64:
assert(lhs->type->vector_elements <= 2);
if (lhs->type->vector_elements == 2) {
ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "unpack", ir_var_temporary);
assert(rhs->type->vector_elements == 4);
this->out_variables->push_tail(t);
this->out_instructions->push_tail(
assign(t, expr(ir_unop_pack_uint_2x32, i2u(swizzle_xy(rhs->clone(mem_ctx, NULL)))), 0x1));
this->out_instructions->push_tail(
assign(t, expr(ir_unop_pack_uint_2x32, i2u(swizzle(rhs->clone(mem_ctx, NULL), SWIZZLE_ZWZW, 2))), 0x2));
rhs = deref(t).val;
} else {
rhs = expr(ir_unop_pack_uint_2x32, i2u(rhs));
}
break;
default:
assert(!"Unexpected type conversion while lowering varyings");
break;

View File

@@ -37,6 +37,7 @@
#include "ir_builder.h"
#include "linker.h"
#include "program/prog_statevars.h"
#include "builtin_functions.h"
namespace {

View File

@@ -472,6 +472,34 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
}
break;
/* This macro CANNOT use the do { } while(true) mechanism because
* then the breaks apply to the loop instead of the switch!
*/
#define HANDLE_PACK_UNPACK_INVERSE(inverse_operation) \
{ \
ir_expression *const op = ir->operands[0]->as_expression(); \
if (op == NULL) \
break; \
if (op->operation == (inverse_operation)) \
return op->operands[0]; \
break; \
}
case ir_unop_unpack_uint_2x32:
HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_uint_2x32);
case ir_unop_pack_uint_2x32:
HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_uint_2x32);
case ir_unop_unpack_int_2x32:
HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_int_2x32);
case ir_unop_pack_int_2x32:
HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_int_2x32);
case ir_unop_unpack_double_2x32:
HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_double_2x32);
case ir_unop_pack_double_2x32:
HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_double_2x32);
#undef HANDLE_PACK_UNPACK_INVERSE
case ir_binop_add:
if (is_vec_zero(op_const[0]))
return ir->operands[1];

View File

@@ -41,6 +41,7 @@
#include "linker.h"
#include "glsl_parser_extras.h"
#include "ir_builder_print_visitor.h"
#include "builtin_functions.h"
#include "opt_add_neg_to_sub.h"
class dead_variable_visitor : public ir_hierarchical_visitor {

View File

@@ -178,6 +178,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
ctx->Extensions.ARB_fragment_layer_viewport = true;
ctx->Extensions.ARB_gpu_shader5 = true;
ctx->Extensions.ARB_gpu_shader_fp64 = true;
ctx->Extensions.ARB_gpu_shader_int64 = true;
ctx->Extensions.ARB_sample_shading = true;
ctx->Extensions.ARB_shader_bit_encoding = true;
ctx->Extensions.ARB_shader_draw_parameters = true;

View File

@@ -111,7 +111,7 @@ rmrf_local(const char *path)
if (path == NULL || *path == '\0' || *path != '.')
return -1;
return nftw(path, remove_entry, 64, FTW_DEPTH | FTW_PHYS);
return nftw(path, remove_entry, 64, FTW_DEPTH | FTW_PHYS | FTW_MOUNT);
}
#define CACHE_TEST_TMP "./cache-test-tmp"

View File

@@ -0,0 +1,440 @@
/*
* Copyright © 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <gtest/gtest.h>
#include "main/compiler.h"
#include "main/mtypes.h"
#include "main/macros.h"
#include "ir.h"
#include "ir_builder.h"
using namespace ir_builder;
namespace lower_64bit {
void expand_source(ir_factory &body,
ir_rvalue *val,
ir_variable **expanded_src);
ir_dereference_variable *compact_destination(ir_factory &body,
const glsl_type *type,
ir_variable *result[4]);
ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir,
ir_expression *ir,
ir_function_signature *callee);
};
class expand_source : public ::testing::Test {
public:
virtual void SetUp();
virtual void TearDown();
exec_list instructions;
ir_factory *body;
ir_variable *expanded_src[4];
void *mem_ctx;
};
void
expand_source::SetUp()
{
mem_ctx = ralloc_context(NULL);
memset(expanded_src, 0, sizeof(expanded_src));
instructions.make_empty();
body = new ir_factory(&instructions, mem_ctx);
}
void
expand_source::TearDown()
{
delete body;
body = NULL;
ralloc_free(mem_ctx);
mem_ctx = NULL;
}
static ir_dereference_variable *
create_variable(void *mem_ctx, const glsl_type *type)
{
ir_variable *var = new(mem_ctx) ir_variable(type,
"variable",
ir_var_temporary);
return new(mem_ctx) ir_dereference_variable(var);
}
static ir_expression *
create_expression(void *mem_ctx, const glsl_type *type)
{
return new(mem_ctx) ir_expression(ir_unop_neg,
create_variable(mem_ctx, type));
}
static void
check_expanded_source(const glsl_type *type,
ir_variable *expanded_src[4])
{
const glsl_type *const expanded_type =
type->base_type == GLSL_TYPE_UINT64
? glsl_type::uvec2_type :glsl_type::ivec2_type;
for (int i = 0; i < type->vector_elements; i++) {
EXPECT_EQ(expanded_type, expanded_src[i]->type);
/* All elements that are part of the vector must be unique. */
for (int j = i - 1; j >= 0; j--) {
EXPECT_NE(expanded_src[i], expanded_src[j])
<< " Element " << i << " is the same as element " << j;
}
}
/* All elements that are not part of the vector must be the same as element
* 0. This is primarily for scalars (where every element is the same).
*/
for (int i = type->vector_elements; i < 4; i++) {
EXPECT_EQ(expanded_src[0], expanded_src[i])
<< " Element " << i << " should be the same as element 0";
}
}
static void
check_instructions(exec_list *instructions,
const glsl_type *type,
const ir_instruction *source)
{
const glsl_type *const expanded_type =
type->base_type == GLSL_TYPE_UINT64
? glsl_type::uvec2_type : glsl_type::ivec2_type;
const ir_expression_operation unpack_opcode =
type->base_type == GLSL_TYPE_UINT64
? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32;
ir_instruction *ir;
/* The instruction list should contain IR to represent:
*
* type tmp1;
* tmp1 = source;
* uvec2 tmp2;
* tmp2 = unpackUint2x32(tmp1.x);
* uvec2 tmp3;
* tmp3 = unpackUint2x32(tmp1.y);
* uvec2 tmp4;
* tmp4 = unpackUint2x32(tmp1.z);
* uvec2 tmp5;
* tmp5 = unpackUint2x32(tmp1.w);
*/
ASSERT_FALSE(instructions->is_empty());
ir = (ir_instruction *) instructions->pop_head();
ir_variable *const tmp1 = ir->as_variable();
EXPECT_EQ(ir_type_variable, ir->ir_type);
EXPECT_EQ(type, tmp1->type) <<
" Got " <<
tmp1->type->name <<
", expected " <<
type->name;
ASSERT_FALSE(instructions->is_empty());
ir = (ir_instruction *) instructions->pop_head();
ir_assignment *const assign1 = ir->as_assignment();
EXPECT_EQ(ir_type_assignment, ir->ir_type);
ASSERT_NE((void *)0, assign1);
EXPECT_EQ(tmp1, assign1->lhs->variable_referenced());
EXPECT_EQ(source, assign1->rhs);
for (unsigned i = 0; i < type->vector_elements; i++) {
ASSERT_FALSE(instructions->is_empty());
ir = (ir_instruction *) instructions->pop_head();
ir_variable *const tmp2 = ir->as_variable();
EXPECT_EQ(ir_type_variable, ir->ir_type);
EXPECT_EQ(expanded_type, tmp2->type);
ASSERT_FALSE(instructions->is_empty());
ir = (ir_instruction *) instructions->pop_head();
ir_assignment *const assign2 = ir->as_assignment();
EXPECT_EQ(ir_type_assignment, ir->ir_type);
ASSERT_NE((void *)0, assign2);
EXPECT_EQ(tmp2, assign2->lhs->variable_referenced());
ir_expression *unpack = assign2->rhs->as_expression();
ASSERT_NE((void *)0, unpack);
EXPECT_EQ(unpack_opcode, unpack->operation);
EXPECT_EQ(tmp1, unpack->operands[0]->variable_referenced());
}
EXPECT_TRUE(instructions->is_empty());
}
TEST_F(expand_source, uint64_variable)
{
const glsl_type *const type = glsl_type::uint64_t_type;
ir_dereference_variable *const deref = create_variable(mem_ctx, type);
lower_64bit::expand_source(*body, deref, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, deref);
}
TEST_F(expand_source, u64vec2_variable)
{
const glsl_type *const type = glsl_type::u64vec2_type;
ir_dereference_variable *const deref = create_variable(mem_ctx, type);
lower_64bit::expand_source(*body, deref, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, deref);
}
TEST_F(expand_source, u64vec3_variable)
{
const glsl_type *const type = glsl_type::u64vec3_type;
/* Generate an operand that is a scalar variable dereference. */
ir_variable *const var = new(mem_ctx) ir_variable(type,
"variable",
ir_var_temporary);
ir_dereference_variable *const deref =
new(mem_ctx) ir_dereference_variable(var);
lower_64bit::expand_source(*body, deref, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, deref);
}
TEST_F(expand_source, u64vec4_variable)
{
const glsl_type *const type = glsl_type::u64vec4_type;
ir_dereference_variable *const deref = create_variable(mem_ctx, type);
lower_64bit::expand_source(*body, deref, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, deref);
}
TEST_F(expand_source, int64_variable)
{
const glsl_type *const type = glsl_type::int64_t_type;
ir_dereference_variable *const deref = create_variable(mem_ctx, type);
lower_64bit::expand_source(*body, deref, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, deref);
}
TEST_F(expand_source, i64vec2_variable)
{
const glsl_type *const type = glsl_type::i64vec2_type;
ir_dereference_variable *const deref = create_variable(mem_ctx, type);
lower_64bit::expand_source(*body, deref, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, deref);
}
TEST_F(expand_source, i64vec3_variable)
{
const glsl_type *const type = glsl_type::i64vec3_type;
ir_dereference_variable *const deref = create_variable(mem_ctx, type);
lower_64bit::expand_source(*body, deref, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, deref);
}
TEST_F(expand_source, i64vec4_variable)
{
const glsl_type *const type = glsl_type::i64vec4_type;
ir_dereference_variable *const deref = create_variable(mem_ctx, type);
lower_64bit::expand_source(*body, deref, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, deref);
}
TEST_F(expand_source, uint64_expression)
{
const glsl_type *const type = glsl_type::uint64_t_type;
ir_expression *const expr = create_expression(mem_ctx, type);
lower_64bit::expand_source(*body, expr, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, expr);
}
TEST_F(expand_source, u64vec2_expression)
{
const glsl_type *const type = glsl_type::u64vec2_type;
ir_expression *const expr = create_expression(mem_ctx, type);
lower_64bit::expand_source(*body, expr, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, expr);
}
TEST_F(expand_source, u64vec3_expression)
{
const glsl_type *const type = glsl_type::u64vec3_type;
ir_expression *const expr = create_expression(mem_ctx, type);
lower_64bit::expand_source(*body, expr, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, expr);
}
TEST_F(expand_source, u64vec4_expression)
{
const glsl_type *const type = glsl_type::u64vec4_type;
ir_expression *const expr = create_expression(mem_ctx, type);
lower_64bit::expand_source(*body, expr, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, expr);
}
TEST_F(expand_source, int64_expression)
{
const glsl_type *const type = glsl_type::int64_t_type;
ir_expression *const expr = create_expression(mem_ctx, type);
lower_64bit::expand_source(*body, expr, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, expr);
}
TEST_F(expand_source, i64vec2_expression)
{
const glsl_type *const type = glsl_type::i64vec2_type;
ir_expression *const expr = create_expression(mem_ctx, type);
lower_64bit::expand_source(*body, expr, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, expr);
}
TEST_F(expand_source, i64vec3_expression)
{
const glsl_type *const type = glsl_type::i64vec3_type;
ir_expression *const expr = create_expression(mem_ctx, type);
lower_64bit::expand_source(*body, expr, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, expr);
}
TEST_F(expand_source, i64vec4_expression)
{
const glsl_type *const type = glsl_type::i64vec4_type;
ir_expression *const expr = create_expression(mem_ctx, type);
lower_64bit::expand_source(*body, expr, expanded_src);
check_expanded_source(type, expanded_src);
check_instructions(&instructions, type, expr);
}
class compact_destination : public ::testing::Test {
public:
virtual void SetUp();
virtual void TearDown();
exec_list instructions;
ir_factory *body;
ir_variable *expanded_src[4];
void *mem_ctx;
};
void
compact_destination::SetUp()
{
mem_ctx = ralloc_context(NULL);
memset(expanded_src, 0, sizeof(expanded_src));
instructions.make_empty();
body = new ir_factory(&instructions, mem_ctx);
}
void
compact_destination::TearDown()
{
delete body;
body = NULL;
ralloc_free(mem_ctx);
mem_ctx = NULL;
}
TEST_F(compact_destination, uint64)
{
const glsl_type *const type = glsl_type::uint64_t_type;
for (unsigned i = 0; i < type->vector_elements; i++) {
expanded_src[i] = new(mem_ctx) ir_variable(glsl_type::uvec2_type,
"result",
ir_var_temporary);
}
ir_dereference_variable *deref =
lower_64bit::compact_destination(*body,
type,
expanded_src);
ASSERT_EQ(ir_type_dereference_variable, deref->ir_type);
EXPECT_EQ(type, deref->var->type) <<
" Got " <<
deref->var->type->name <<
", expected " <<
type->name;
ir_instruction *ir;
ASSERT_FALSE(instructions.is_empty());
ir = (ir_instruction *) instructions.pop_head();
ir_variable *const var = ir->as_variable();
ASSERT_NE((void *)0, var);
EXPECT_EQ(deref->var, var);
for (unsigned i = 0; i < type->vector_elements; i++) {
ASSERT_FALSE(instructions.is_empty());
ir = (ir_instruction *) instructions.pop_head();
ir_assignment *const assign = ir->as_assignment();
ASSERT_NE((void *)0, assign);
EXPECT_EQ(deref->var, assign->lhs->variable_referenced());
}
}

View File

@@ -236,13 +236,6 @@ def if_execute_flag(statements):
check_sexp(statements)
return [['if', ['var_ref', 'execute_flag'], statements, []]]
def if_return_flag(then_statements, else_statements):
"""Wrap statements in an if test with return_flag as the condition.
"""
check_sexp(then_statements)
check_sexp(else_statements)
return [['if', ['var_ref', 'return_flag'], then_statements, else_statements]]
def if_not_return_flag(statements):
"""Wrap statements in an if test so that they will only execute if
return_flag is False.
@@ -459,10 +452,7 @@ def test_lower_pulled_out_jump():
loop(simple_if('b', simple_if('c', [], continue_()),
lowered_return_simple()) +
break_()) +
if_return_flag(assign_x('return_flag', const_bool(1)) +
assign_x('execute_flag', const_bool(0)),
assign_x('d', const_float(1))))
if_not_return_flag(assign_x('d', const_float(1))))
))
create_test_case(doc_string, input_sexp, expected_sexp, 'lower_pulled_out_jump',
lower_main_return=True, pull_out_jumps=True)
@@ -593,14 +583,11 @@ def test_lower_return_void_at_end_of_loop():
assign_x('b', const_float(2))
))
expected_sexp = make_test_case('main', 'void', (
declare_execute_flag() +
declare_return_flag() +
loop(assign_x('a', const_float(1)) +
lowered_return_simple() +
break_()) +
if_return_flag(assign_x('return_flag', const_bool(1)) +
assign_x('execute_flag', const_bool(0)),
assign_x('b', const_float(2)))
if_not_return_flag(assign_x('b', const_float(2)))
))
create_test_case(doc_string, input_sexp, input_sexp, 'return_void_at_end_of_loop_lower_nothing')
create_test_case(doc_string, input_sexp, expected_sexp, 'return_void_at_end_of_loop_lower_return',

View File

@@ -0,0 +1,206 @@
ir_function_signature *
udivmod64(void *mem_ctx, ir_factory &body)
{
ir_function_signature *const sig =
new(mem_ctx) ir_function_signature(glsl_type::uvec4_type);
exec_list sig_parameters;
ir_variable *const r0001 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "numer", ir_var_function_in);
sig_parameters.push_tail(r0001);
ir_variable *const r0002 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "denom", ir_var_function_in);
sig_parameters.push_tail(r0002);
ir_variable *const r0003 = new(mem_ctx) ir_variable(glsl_type::int_type, "i", ir_var_auto);
body.emit(r0003);
ir_variable *const r0004 = new(mem_ctx) ir_variable(glsl_type::uint64_t_type, "n64", ir_var_auto);
body.emit(r0004);
ir_variable *const r0005 = new(mem_ctx) ir_variable(glsl_type::int_type, "log2_denom", ir_var_auto);
body.emit(r0005);
ir_variable *const r0006 = new(mem_ctx) ir_variable(glsl_type::uvec2_type, "quot", ir_var_auto);
body.emit(r0006);
body.emit(assign(r0006, ir_constant::zero(mem_ctx, glsl_type::uvec2_type), 0x03));
ir_expression *const r0007 = expr(ir_unop_find_msb, swizzle_y(r0002));
body.emit(assign(r0005, add(r0007, body.constant(int(32))), 0x01));
/* IF CONDITION */
ir_expression *const r0009 = equal(swizzle_y(r0002), body.constant(0u));
ir_expression *const r000A = nequal(swizzle_y(r0001), body.constant(0u));
ir_expression *const r000B = logic_and(r0009, r000A);
ir_if *f0008 = new(mem_ctx) ir_if(operand(r000B).val);
exec_list *const f0008_parent_instructions = body.instructions;
/* THEN INSTRUCTIONS */
body.instructions = &f0008->then_instructions;
ir_variable *const r000C = new(mem_ctx) ir_variable(glsl_type::int_type, "i", ir_var_auto);
body.emit(r000C);
ir_variable *const r000D = body.make_temp(glsl_type::int_type, "findMSB_retval");
body.emit(assign(r000D, expr(ir_unop_find_msb, swizzle_x(r0002)), 0x01));
body.emit(assign(r0005, r000D, 0x01));
body.emit(assign(r000C, sub(body.constant(int(31)), r000D), 0x01));
/* LOOP BEGIN */
ir_loop *f000E = new(mem_ctx) ir_loop();
exec_list *const f000E_parent_instructions = body.instructions;
body.instructions = &f000E->body_instructions;
/* IF CONDITION */
ir_expression *const r0010 = less(r000C, body.constant(int(1)));
ir_if *f000F = new(mem_ctx) ir_if(operand(r0010).val);
exec_list *const f000F_parent_instructions = body.instructions;
/* THEN INSTRUCTIONS */
body.instructions = &f000F->then_instructions;
body.emit(new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break));
body.instructions = f000F_parent_instructions;
body.emit(f000F);
/* END IF */
/* IF CONDITION */
ir_expression *const r0012 = lshift(swizzle_x(r0002), r000C);
ir_expression *const r0013 = lequal(r0012, swizzle_y(r0001));
ir_if *f0011 = new(mem_ctx) ir_if(operand(r0013).val);
exec_list *const f0011_parent_instructions = body.instructions;
/* THEN INSTRUCTIONS */
body.instructions = &f0011->then_instructions;
ir_expression *const r0014 = lshift(swizzle_x(r0002), r000C);
body.emit(assign(r0001, sub(swizzle_y(r0001), r0014), 0x02));
ir_expression *const r0015 = lshift(body.constant(1u), r000C);
body.emit(assign(r0006, bit_or(swizzle_y(r0006), r0015), 0x02));
body.instructions = f0011_parent_instructions;
body.emit(f0011);
/* END IF */
body.emit(assign(r000C, add(r000C, body.constant(int(-1))), 0x01));
/* LOOP END */
body.instructions = f000E_parent_instructions;
body.emit(f000E);
/* IF CONDITION */
ir_expression *const r0017 = lequal(swizzle_x(r0002), swizzle_y(r0001));
ir_if *f0016 = new(mem_ctx) ir_if(operand(r0017).val);
exec_list *const f0016_parent_instructions = body.instructions;
/* THEN INSTRUCTIONS */
body.instructions = &f0016->then_instructions;
body.emit(assign(r0001, sub(swizzle_y(r0001), swizzle_x(r0002)), 0x02));
body.emit(assign(r0006, bit_or(swizzle_y(r0006), body.constant(1u)), 0x02));
body.instructions = f0016_parent_instructions;
body.emit(f0016);
/* END IF */
body.instructions = f0008_parent_instructions;
body.emit(f0008);
/* END IF */
body.emit(assign(r0004, expr(ir_unop_pack_uint_2x32, r0001), 0x01));
ir_expression *const r0018 = sub(body.constant(int(63)), r0005);
body.emit(assign(r0003, expr(ir_binop_min, body.constant(int(31)), r0018), 0x01));
/* LOOP BEGIN */
ir_loop *f0019 = new(mem_ctx) ir_loop();
exec_list *const f0019_parent_instructions = body.instructions;
body.instructions = &f0019->body_instructions;
/* IF CONDITION */
ir_expression *const r001B = less(r0003, body.constant(int(1)));
ir_if *f001A = new(mem_ctx) ir_if(operand(r001B).val);
exec_list *const f001A_parent_instructions = body.instructions;
/* THEN INSTRUCTIONS */
body.instructions = &f001A->then_instructions;
body.emit(new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break));
body.instructions = f001A_parent_instructions;
body.emit(f001A);
/* END IF */
ir_variable *const r001C = body.make_temp(glsl_type::uint64_t_type, "assignment_tmp");
ir_expression *const r001D = expr(ir_unop_pack_uint_2x32, r0002);
body.emit(assign(r001C, lshift(r001D, r0003), 0x01));
/* IF CONDITION */
ir_expression *const r001F = lequal(r001C, r0004);
ir_if *f001E = new(mem_ctx) ir_if(operand(r001F).val);
exec_list *const f001E_parent_instructions = body.instructions;
/* THEN INSTRUCTIONS */
body.instructions = &f001E->then_instructions;
body.emit(assign(r0004, sub(r0004, r001C), 0x01));
ir_expression *const r0020 = lshift(body.constant(1u), r0003);
body.emit(assign(r0006, bit_or(swizzle_x(r0006), r0020), 0x01));
body.instructions = f001E_parent_instructions;
body.emit(f001E);
/* END IF */
body.emit(assign(r0003, add(r0003, body.constant(int(-1))), 0x01));
/* LOOP END */
body.instructions = f0019_parent_instructions;
body.emit(f0019);
ir_variable *const r0021 = body.make_temp(glsl_type::uint64_t_type, "packUint2x32_retval");
body.emit(assign(r0021, expr(ir_unop_pack_uint_2x32, r0002), 0x01));
/* IF CONDITION */
ir_expression *const r0023 = lequal(r0021, r0004);
ir_if *f0022 = new(mem_ctx) ir_if(operand(r0023).val);
exec_list *const f0022_parent_instructions = body.instructions;
/* THEN INSTRUCTIONS */
body.instructions = &f0022->then_instructions;
ir_expression *const r0024 = expr(ir_unop_pack_uint_2x32, r0002);
body.emit(assign(r0004, sub(r0004, r0024), 0x01));
body.emit(assign(r0006, bit_or(swizzle_x(r0006), body.constant(1u)), 0x01));
body.instructions = f0022_parent_instructions;
body.emit(f0022);
/* END IF */
ir_variable *const r0025 = body.make_temp(glsl_type::uvec4_type, "vec_ctor");
body.emit(assign(r0025, r0006, 0x03));
body.emit(assign(r0025, expr(ir_unop_unpack_uint_2x32, r0004), 0x0c));
body.emit(ret(r0025));
sig->replace_parameters(&sig_parameters);
return sig;
}

View File

@@ -354,6 +354,10 @@ const glsl_type *glsl_type::get_base_type() const
return double_type;
case GLSL_TYPE_BOOL:
return bool_type;
case GLSL_TYPE_UINT64:
return uint64_t_type;
case GLSL_TYPE_INT64:
return int64_t_type;
default:
return error_type;
}
@@ -380,6 +384,10 @@ const glsl_type *glsl_type::get_scalar_type() const
return double_type;
case GLSL_TYPE_BOOL:
return bool_type;
case GLSL_TYPE_UINT64:
return uint64_t_type;
case GLSL_TYPE_INT64:
return int64_t_type;
default:
/* Handle everything else */
return type;
@@ -519,6 +527,31 @@ glsl_type::bvec(unsigned components)
}
const glsl_type *
glsl_type::i64vec(unsigned components)
{
if (components == 0 || components > 4)
return error_type;
static const glsl_type *const ts[] = {
int64_t_type, i64vec2_type, i64vec3_type, i64vec4_type
};
return ts[components - 1];
}
const glsl_type *
glsl_type::u64vec(unsigned components)
{
if (components == 0 || components > 4)
return error_type;
static const glsl_type *const ts[] = {
uint64_t_type, u64vec2_type, u64vec3_type, u64vec4_type
};
return ts[components - 1];
}
const glsl_type *
glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
{
@@ -542,6 +575,10 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
return dvec(rows);
case GLSL_TYPE_BOOL:
return bvec(rows);
case GLSL_TYPE_UINT64:
return u64vec(rows);
case GLSL_TYPE_INT64:
return i64vec(rows);
default:
return error_type;
}
@@ -1239,6 +1276,8 @@ glsl_type::component_slots() const
return this->components();
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
return 2 * this->components();
case GLSL_TYPE_STRUCT:
@@ -1321,6 +1360,8 @@ glsl_type::uniform_locations() const
case GLSL_TYPE_INT:
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
case GLSL_TYPE_BOOL:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
@@ -1350,6 +1391,8 @@ glsl_type::varying_count() const
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_BOOL:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
return 1;
case GLSL_TYPE_STRUCT:
@@ -1923,6 +1966,8 @@ glsl_type::count_attribute_slots(bool is_vertex_input) const
case GLSL_TYPE_BOOL:
return this->matrix_columns;
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
if (this->vector_elements > 2 && !is_vertex_input)
return this->matrix_columns * 2;
else

View File

@@ -54,6 +54,8 @@ enum glsl_base_type {
GLSL_TYPE_INT,
GLSL_TYPE_FLOAT,
GLSL_TYPE_DOUBLE,
GLSL_TYPE_UINT64,
GLSL_TYPE_INT64,
GLSL_TYPE_BOOL,
GLSL_TYPE_SAMPLER,
GLSL_TYPE_IMAGE,
@@ -69,7 +71,9 @@ enum glsl_base_type {
static inline bool glsl_base_type_is_64bit(enum glsl_base_type type)
{
return type == GLSL_TYPE_DOUBLE;
return type == GLSL_TYPE_DOUBLE ||
type == GLSL_TYPE_UINT64 ||
type == GLSL_TYPE_INT64;
}
enum glsl_sampler_dim {
@@ -225,6 +229,8 @@ struct glsl_type {
static const glsl_type *ivec(unsigned components);
static const glsl_type *uvec(unsigned components);
static const glsl_type *bvec(unsigned components);
static const glsl_type *i64vec(unsigned components);
static const glsl_type *u64vec(unsigned components);
/**@}*/
/**
@@ -462,7 +468,7 @@ struct glsl_type {
*/
bool is_numeric() const
{
return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_DOUBLE);
return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_INT64);
}
/**
@@ -474,8 +480,17 @@ struct glsl_type {
}
/**
* Query whether or not type is an integral type, or for struct, interface
* and array types, contains an integral type.
* Query whether or not a type is a 32-bit or 64-bit integer
*/
bool is_integer_32_64() const
{
return (base_type == GLSL_TYPE_UINT) || (base_type == GLSL_TYPE_INT) ||
(base_type == GLSL_TYPE_UINT64) || (base_type == GLSL_TYPE_INT64);
}
/**
* Query whether or not type is an integral type, or for struct and array
* types, contains an integral type.
*/
bool contains_integer() const;

View File

@@ -846,6 +846,8 @@ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
case GLSL_TYPE_BOOL:
load->value = constant->values[matrix_col];
break;

Some files were not shown because too many files have changed in this diff Show More