VERSION: bump for 24.0.0-rc2

radv/rt: Add workaround to make leaves always active
DOOM Eternal builds acceleration structures with inactive primitives and tries to make them active in later AS updates. This is disallowed by the spec and triggers a GPU hang. Fix the hang by working around the bug. Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27034> (cherry picked from commit a9831caa14)
2024-01-17 22:28:20 +00:00 · 2024-01-17 21:42:02 +00:00 · 2024-01-17 21:41:44 +00:00 · 2024-01-17 21:39:08 +00:00 · 2024-01-17 21:39:06 +00:00 · 2024-01-17 21:39:02 +00:00
54 changed files with 3250 additions and 185 deletions
--- a/.ci-farms-disabled/lima
+++ b/.ci-farms-disabled/lima
--- a/.gitlab-ci/container/gitlab-ci.yml
+++ b/.gitlab-ci/container/gitlab-ci.yml
@@ -68,6 +68,10 @@ debian/x86_64_build-base:
    - .debian-container
  variables:
    MESA_IMAGE_TAG: &debian-x86_64_build-base "${DEBIAN_BASE_TAG}--${PKG_REPO_REV}"
+  rules:
+    # python-test requires debian/x86_64_build, which requires this job
+    - !reference [python-test, rules]
+    - !reference [.container, rules]

 .use-debian/x86_64_build-base:
  extends:
@@ -88,9 +92,9 @@ debian/x86_64_build:
  variables:
    MESA_IMAGE_TAG: &debian-x86_64_build ${DEBIAN_BUILD_TAG}
  rules:
-    - !reference [.use-debian/x86_64_build-base, rules]
    # python-test requires this job
    - !reference [python-test, rules]
+    - !reference [.use-debian/x86_64_build-base, rules]

 .use-debian/x86_64_build:
  extends:
--- a/.pick_status.json
+++ b/.pick_status.json
--- a/2
+++ b/2
@@ -1 +1 @@
-24.0.0-devel
+24.0.0-rc2
--- a/src/amd/ci/gitlab-ci.yml
+++ b/src/amd/ci/gitlab-ci.yml
@@ -94,7 +94,7 @@ radeonsi-raven-piglit-quick_shader:x86_64:
    PIGLIT_PROFILES: quick_shader
    PIGLIT_FRACTION: 2

-.radeonsi-raven-va:x86_64:
+radeonsi-raven-va:x86_64:
  extends:
    - .lava-test-deqp:x86_64
    - .radeonsi-raven-test:x86_64
@@ -117,7 +117,7 @@ radeonsi-raven-piglit-quick_shader:x86_64:

 radeonsi-raven-va-full:x86_64:
  extends:
-    - .radeonsi-raven-va:x86_64
+    - radeonsi-raven-va:x86_64
    - .radeonsi-vaapi-manual-rules
  variables:
    JOB_TIMEOUT: 60
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -1883,12 +1883,16 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
      util_next_power_of_two(LINEAR_PITCH_ALIGNMENT / surf->bpe);

   if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch &&
-       surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) {
-      /* Adjust surf_pitch to be in elements units not in pixels */
+       surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR &&
+       in->numMipLevels == 1) {
+      /* Divide surf_pitch (= pitch in pixels) by blk_w to get a
+       * pitch in elements instead because that's what the hardware needs
+       * in resource descriptors.
+       * See the comment in si_descriptors.c.
+       */
      surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w,
                                      linear_alignment);
-      surf->u.gfx9.epitch =
-         MAX2(surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch * surf->blk_w - 1);
+      surf->u.gfx9.epitch = surf->u.gfx9.surf_pitch - 1;
       /* Adjust surf_slice_size and surf_size to reflect the change made to surf_pitch. */
      surf->u.gfx9.surf_slice_size = (uint64_t)surf->u.gfx9.surf_pitch * out.height * surf->bpe;
      surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;
--- a/src/amd/vulkan/bvh/build_helpers.h
+++ b/src/amd/vulkan/bvh/build_helpers.h
@@ -156,6 +156,7 @@

 #define VK_GEOMETRY_TYPE_TRIANGLES_KHR 0
 #define VK_GEOMETRY_TYPE_AABBS_KHR     1
+#define VK_GEOMETRY_TYPE_INSTANCES_KHR 2

 #define VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR 1
 #define VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR         2
--- a/src/amd/vulkan/bvh/leaf.comp
+++ b/src/amd/vulkan/bvh/leaf.comp
@@ -87,6 +87,14 @@ main(void)
      is_active = build_instance(bounds, src_ptr, dst_ptr, global_id);
   }

+#if ALWAYS_ACTIVE
+   if (!is_active && args.geom_data.geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
+      bounds.min = vec3(0.0);
+      bounds.max = vec3(0.0);
+      is_active = true;
+   }
+#endif
+
   if (is_active) {
      REF(radv_ir_node) ir_node = INDEX(radv_ir_node, args.ir, primitive_id);
      DEREF(ir_node).aabb = bounds;
--- a/src/amd/vulkan/bvh/meson.build
+++ b/src/amd/vulkan/bvh/meson.build
@@ -53,7 +53,12 @@ bvh_shaders = [
  [
    'leaf.comp',
    'leaf',
-    [],
+    ['ALWAYS_ACTIVE=0'],
+  ],
+  [
+    'leaf.comp',
+    'leaf_always_active',
+    ['ALWAYS_ACTIVE=1'],
  ],
  [
    'morton.comp',
--- a/src/amd/vulkan/radv_acceleration_structure.c
+++ b/src/amd/vulkan/radv_acceleration_structure.c
@@ -41,6 +41,10 @@ static const uint32_t leaf_spv[] = {
 #include "bvh/leaf.spv.h"
 };

+static const uint32_t leaf_always_active_spv[] = {
+#include "bvh/leaf_always_active.spv.h"
+};
+
 static const uint32_t morton_spv[] = {
 #include "bvh/morton.spv.h"
 };
@@ -538,9 +542,14 @@ radv_device_init_accel_struct_build_state(struct radv_device *device)
   if (device->meta_state.accel_struct_build.radix_sort)
      goto exit;

-   result = create_build_pipeline_spv(device, leaf_spv, sizeof(leaf_spv), sizeof(struct leaf_args),
-                                      &device->meta_state.accel_struct_build.leaf_pipeline,
-                                      &device->meta_state.accel_struct_build.leaf_p_layout);
+   if (device->instance->drirc.force_active_accel_struct_leaves)
+      result = create_build_pipeline_spv(device, leaf_always_active_spv, sizeof(leaf_always_active_spv),
+                                         sizeof(struct leaf_args), &device->meta_state.accel_struct_build.leaf_pipeline,
+                                         &device->meta_state.accel_struct_build.leaf_p_layout);
+   else
+      result = create_build_pipeline_spv(device, leaf_spv, sizeof(leaf_spv), sizeof(struct leaf_args),
+                                         &device->meta_state.accel_struct_build.leaf_pipeline,
+                                         &device->meta_state.accel_struct_build.leaf_p_layout);
   if (result != VK_SUCCESS)
      goto exit;

--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -554,7 +554,7 @@ radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *
         image_info->surf_index = NULL;
      }

-      if (create_info->prime_blit_src && device->physical_device->rad_info.gfx_level == GFX9) {
+      if (create_info->prime_blit_src && !device->physical_device->rad_info.sdma_supports_compression) {
         /* Older SDMA hw can't handle DCC */
         image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
      }
--- a/src/amd/vulkan/radv_instance.c
+++ b/src/amd/vulkan/radv_instance.c
@@ -160,6 +160,7 @@ static const driOptionDescription radv_dri_options[] = {
      DRI_CONF_RADV_OVERRIDE_COMPUTE_SHADER_VERSION(0)
      DRI_CONF_RADV_OVERRIDE_RAY_TRACING_SHADER_VERSION(0)
      DRI_CONF_RADV_SSBO_NON_UNIFORM(false)
+      DRI_CONF_RADV_FORCE_ACTIVE_ACCEL_STRUCT_LEAVES(false)
      DRI_CONF_RADV_APP_LAYER()
   DRI_CONF_SECTION_END
 };
@@ -251,6 +252,9 @@ radv_init_dri_options(struct radv_instance *instance)

   instance->drirc.vk_require_etc2 = driQueryOptionb(&instance->drirc.options, "vk_require_etc2");
   instance->drirc.vk_require_astc = driQueryOptionb(&instance->drirc.options, "vk_require_astc");
+
+   instance->drirc.force_active_accel_struct_leaves =
+      driQueryOptionb(&instance->drirc.options, "radv_force_active_accel_struct_leaves");
 }

 static const struct vk_instance_extension_table radv_instance_extensions_supported = {
--- a/src/amd/vulkan/radv_pipeline_rt.c
+++ b/src/amd/vulkan/radv_pipeline_rt.c
@@ -732,6 +732,8 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *
      combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config);

   postprocess_rt_config(config, device->physical_device->rad_info.gfx_level, device->physical_device->rt_wave_size);
+
+   pipeline->prolog->max_waves = radv_get_max_waves(device, config, &pipeline->prolog->info);
 }

 static VkResult
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -386,6 +386,7 @@ struct radv_instance {
      bool report_llvm9_version_string;
      bool vk_require_etc2;
      bool vk_require_astc;
+      bool force_active_accel_struct_leaves;
      char *app_layer;
      uint8_t override_graphics_shader_version;
      uint8_t override_compute_shader_version;
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -2051,7 +2051,7 @@ radv_shader_upload(struct radv_device *device, struct radv_shader *shader, const
   return true;
 }

-static unsigned
+unsigned
 radv_get_max_waves(const struct radv_device *device, const struct ac_shader_config *conf,
                   const struct radv_shader_info *info)
 {
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -803,6 +803,9 @@ struct radv_shader_part *radv_shader_part_cache_get(struct radv_device *device,
 uint64_t radv_shader_get_va(const struct radv_shader *shader);
 struct radv_shader *radv_find_shader(struct radv_device *device, uint64_t pc);

+unsigned radv_get_max_waves(const struct radv_device *device, const struct ac_shader_config *conf,
+                            const struct radv_shader_info *info);
+
 unsigned radv_get_max_scratch_waves(const struct radv_device *device, struct radv_shader *shader);

 const char *radv_get_shader_name(const struct radv_shader_info *info, gl_shader_stage stage);
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -1606,12 +1606,6 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p
   /* Sync flags. */
   if (flags & CP_DMA_SYNC)
      header |= S_411_CP_SYNC(1);
-   else {
-      if (device->physical_device->rad_info.gfx_level >= GFX9)
-         command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
-      else
-         command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
-   }

   if (flags & CP_DMA_RAW_WAIT)
      command |= S_415_RAW_WAIT(1);
--- a/src/compiler/glsl/gl_nir_link_varyings.c
+++ b/src/compiler/glsl/gl_nir_link_varyings.c
@@ -745,7 +745,7 @@ gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
         if (!validate_explicit_variable_location(consts,
                                                  output_explicit_locations,
                                                  var, prog, producer)) {
-            return;
+            goto out;
         }
      }
   }
@@ -799,7 +799,7 @@ gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
            if (!validate_explicit_variable_location(consts,
                                                     input_explicit_locations,
                                                     input, prog, consumer)) {
-               return;
+               goto out;
            }

            while (idx < slot_limit) {
@@ -807,7 +807,7 @@ gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
                  linker_error(prog,
                               "Invalid location %u in %s shader\n", idx,
                               _mesa_shader_stage_to_string(consumer->Stage));
-                  return;
+                  goto out;
               }

               output = output_explicit_locations[idx][input->data.location_frac].var;
@@ -870,6 +870,7 @@ gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
      }
   }

+ out:
   _mesa_symbol_table_dtor(table);
 }

--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -441,7 +441,7 @@ nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
   } else {
      alu1_actual_src = alu1->src[src1].src;

-      for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++)
+      for (unsigned i = 0; i < nir_src_num_components(alu1_actual_src); i++)
         alu1_swizzle[i] = i;
   }

@@ -458,7 +458,7 @@ nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
   } else {
      alu2_actual_src = alu2->src[src2].src;

-      for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu2, src2); i++)
+      for (unsigned i = 0; i < nir_src_num_components(alu2_actual_src); i++)
         alu2_swizzle[i] = i;
   }

--- a/src/compiler/nir/tests/comparison_pre_tests.cpp
+++ b/src/compiler/nir/tests/comparison_pre_tests.cpp
@@ -579,3 +579,95 @@ TEST_F(comparison_pre_test, non_scalar_add_result)

   EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl));
 }
+
+TEST_F(comparison_pre_test, multi_comps_load)
+{
+   /* Before:
+    *
+    * vec1 32 ssa_0 = load_ubo (...)
+    * vec4 32 ssa_1 = load_ubo (...)
+    * vec1 1  ssa_2 = flt ssa_0, ssa_1.w
+    *
+    * if ssa_2 {
+    *    vec1 32 ssa_3 = fneg ssa_1.x
+    *    vec1 32 ssa_4 = fadd ssa_0, ssa_3
+    * } else {
+    * }
+    */
+   nir_def *ssa_0 = nir_load_ubo(&bld, 1, 32,
+                                 nir_imm_int(&bld, 0),
+                                 nir_imm_int(&bld, 0));
+   nir_def *ssa_1 = nir_load_ubo(&bld, 4, 32,
+                                 nir_imm_int(&bld, 1),
+                                 nir_imm_int(&bld, 0));
+
+   nir_alu_instr *flt = nir_alu_instr_create(bld.shader, nir_op_flt);
+   flt->src[0].src = nir_src_for_ssa(ssa_0);
+   flt->src[1].src = nir_src_for_ssa(ssa_1);
+   memcpy(&flt->src[0].swizzle, xxxx, sizeof(xxxx));
+   memcpy(&flt->src[1].swizzle, wwww, sizeof(wwww));
+   nir_builder_alu_instr_finish_and_insert(&bld, flt);
+   flt->def.num_components = 1;
+   nir_def *ssa_2 = &flt->def;
+
+   nir_if *nif = nir_push_if(&bld, ssa_2);
+   {
+      nir_alu_instr *fneg = nir_alu_instr_create(bld.shader, nir_op_fneg);
+      fneg->src[0].src = nir_src_for_ssa(ssa_1);
+      memcpy(&fneg->src[0].swizzle, xxxx, sizeof(xxxx));
+      nir_builder_alu_instr_finish_and_insert(&bld, fneg);
+      fneg->def.num_components = 1;
+      nir_def *ssa_3 = &fneg->def;
+
+      nir_fadd(&bld, ssa_0, ssa_3);
+   }
+   nir_pop_if(&bld, nif);
+
+   EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, multi_comps_load2)
+{
+   /* Before:
+    *
+    * vec1 32 ssa_0 = load_ubo (...)
+    * vec4 32 ssa_1 = load_ubo (...)
+    * vec1 1  ssa_2 = flt ssa_0, ssa_1.x
+    *
+    * if ssa_2 {
+    *    vec1 32 ssa_3 = fneg ssa_1.w
+    *    vec1 32 ssa_4 = fadd ssa_0, ssa_3
+    * } else {
+    * }
+    */
+   nir_def *ssa_0 = nir_load_ubo(&bld, 1, 32,
+                                 nir_imm_int(&bld, 0),
+                                 nir_imm_int(&bld, 0));
+   nir_def *ssa_1 = nir_load_ubo(&bld, 4, 32,
+                                 nir_imm_int(&bld, 1),
+                                 nir_imm_int(&bld, 0));
+
+   nir_alu_instr *flt = nir_alu_instr_create(bld.shader, nir_op_flt);
+   flt->src[0].src = nir_src_for_ssa(ssa_0);
+   flt->src[1].src = nir_src_for_ssa(ssa_1);
+   memcpy(&flt->src[0].swizzle, xxxx, sizeof(xxxx));
+   memcpy(&flt->src[1].swizzle, xxxx, sizeof(xxxx));
+   nir_builder_alu_instr_finish_and_insert(&bld, flt);
+   flt->def.num_components = 1;
+   nir_def *ssa_2 = &flt->def;
+
+   nir_if *nif = nir_push_if(&bld, ssa_2);
+   {
+      nir_alu_instr *fneg = nir_alu_instr_create(bld.shader, nir_op_fneg);
+      fneg->src[0].src = nir_src_for_ssa(ssa_1);
+      memcpy(&fneg->src[0].swizzle, wwww, sizeof(wwww));
+      nir_builder_alu_instr_finish_and_insert(&bld, fneg);
+      fneg->def.num_components = 1;
+      nir_def *ssa_3 = &fneg->def;
+
+      nir_fadd(&bld, ssa_0, ssa_3);
+   }
+   nir_pop_if(&bld, nif);
+
+   EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl));
+}
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -156,7 +156,7 @@ vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix)
   if (len < 0 || len >= sizeof(filename))
      return;

-   FILE *f = fopen(filename, "w");
+   FILE *f = fopen(filename, "wb");
   if (f == NULL)
      return;

--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -458,6 +458,11 @@ gpu_supports_texture_format(struct etna_screen *screen, uint32_t fmt,
 {
   bool supported = true;

+   /* Requires split sampler support, which the driver doesn't support, yet. */
+   if (!util_format_is_compressed(format) &&
+       util_format_get_blocksizebits(format) > 32)
+      return false;
+
   if (fmt == TEXTURE_FORMAT_ETC1)
      supported = VIV_FEATURE(screen, chipFeatures, ETC1_TEXTURE_COMPRESSION);

@@ -500,6 +505,10 @@ gpu_supports_render_format(struct etna_screen *screen, enum pipe_format format,
   if (fmt == ETNA_NO_MATCH)
      return false;

+   /* Requires split target support, which the driver doesn't support, yet. */
+   if (util_format_get_blocksizebits(format) > 32)
+      return false;
+
   if (sample_count > 1) {
      /* Explicitly enabled. */
      if (!DBG_ENABLED(ETNA_DBG_MSAA))
--- a/src/gallium/drivers/r300/ci/r300-rv530-nohiz-fails.txt
+++ b/src/gallium/drivers/r300/ci/r300-rv530-nohiz-fails.txt
@@ -368,8 +368,6 @@ shaders@glsl-bug-110796,Fail
 shaders@glsl-fs-bug25902,Fail
 shaders@glsl-fwidth,Fail
 shaders@glsl-lod-bias,Fail
-shaders@glsl-max-varyings,Fail
-shaders@glsl-max-varyings >max_varying_components,Fail
 shaders@glsl-orangebook-ch06-bump,Fail
 shaders@glsl-uniform-interstage-limits@subdivide 5,Fail
 shaders@glsl-uniform-interstage-limits@subdivide 5- statechanges,Fail
@@ -841,33 +839,6 @@ spec@glsl-1.10@execution@loops@glsl-vs-loop-300,Fail

 spec@glsl-1.10@execution@variable-indexing@vs-output-array-vec2-index-wr-no-unroll,Fail

-spec@glsl-1.10@execution@varying-packing@simple float array,Fail
-spec@glsl-1.10@execution@varying-packing@simple float separate,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat2 array,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat2 separate,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat2x3 array,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat2x3 separate,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat2x4 array,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat2x4 separate,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat3 array,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat3 separate,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat3x2 array,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat3x2 separate,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat3x4 array,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat3x4 separate,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat4 array,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat4 separate,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat4x2 array,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat4x2 separate,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat4x3 array,Fail
-spec@glsl-1.10@execution@varying-packing@simple mat4x3 separate,Fail
-spec@glsl-1.10@execution@varying-packing@simple vec2 array,Fail
-spec@glsl-1.10@execution@varying-packing@simple vec2 separate,Fail
-spec@glsl-1.10@execution@varying-packing@simple vec3 array,Fail
-spec@glsl-1.10@execution@varying-packing@simple vec3 separate,Fail
-spec@glsl-1.10@execution@varying-packing@simple vec4 array,Fail
-spec@glsl-1.10@execution@varying-packing@simple vec4 separate,Fail
-
 spec@glsl-1.20@execution@clipping@vs-clip-vertex-const-accept,Fail
 spec@glsl-1.20@execution@clipping@vs-clip-vertex-different-from-position,Fail
 spec@glsl-1.20@execution@clipping@vs-clip-vertex-homogeneity,Fail
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -65,11 +65,13 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
            case TGSI_SEMANTIC_TEXCOORD:
                assert(index < ATTR_TEXCOORD_COUNT);
                fs_inputs->texcoord[index] = i;
+                fs_inputs->num_texcoord++;
                break;

            case TGSI_SEMANTIC_GENERIC:
                assert(index < ATTR_GENERIC_COUNT);
                fs_inputs->generic[index] = i;
+                fs_inputs->num_generic++;
                break;

            case TGSI_SEMANTIC_FOG:
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -541,6 +541,14 @@ static void r300_update_rs_block(struct r300_context *r300)
        }
    }

+    for (; i < ATTR_GENERIC_COUNT; i++) {
+        if (fs_inputs->generic[i] != ATTR_UNUSED) {
+            fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, "
+                    "not enough hardware slots (it's not a bug, do not "
+                    "report it).\n", i);
+        }
+    }
+
    gen_offset = 0;
    /* Re-use color varyings for texcoords if possible.
     *
@@ -645,6 +653,14 @@ static void r300_update_rs_block(struct r300_context *r300)
        }
    }

+    for (; i < ATTR_TEXCOORD_COUNT; i++) {
+        if (fs_inputs->texcoord[i] != ATTR_UNUSED) {
+            fprintf(stderr, "r300: ERROR: FS input texcoord %i unassigned, "
+                    "not enough hardware slots (it's not a bug, do not "
+                    "report it).\n", i);
+        }
+    }
+
    /* Rasterize pointcoord. */
    if (fs_inputs->pcoord != ATTR_UNUSED && tex_count < 8) {

@@ -666,14 +682,6 @@ static void r300_update_rs_block(struct r300_context *r300)
        tex_ptr += 2;
    }

-    for (; i < ATTR_GENERIC_COUNT; i++) {
-        if (fs_inputs->generic[i] != ATTR_UNUSED) {
-            fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, "
-                    "not enough hardware slots (it's not a bug, do not "
-                    "report it).\n", i);
-        }
-    }
-
    /* Rasterize fog coordinates. */
    if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) {
        /* Set up the fog coordinates in VAP. */
--- a/src/gallium/drivers/radeonsi/radeon_vcn_enc_1_2.c
+++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc_1_2.c
@@ -920,7 +920,8 @@ static void radeon_enc_slice_header(struct radeon_encoder *enc)
      radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt % 32, 5);

   /* ref_pic_list_modification() */
-   if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) {
+   if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR &&
+       enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) {
      radeon_enc_code_fixed_bits(enc, 0x0, 1);

      /* long-term reference */
@@ -962,6 +963,7 @@ static void radeon_enc_slice_header(struct radeon_encoder *enc)
   }

   if ((enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) &&
+       (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) &&
       (enc->enc_pic.spec_misc.cabac_enable))
      radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);

--- a/src/gallium/drivers/radeonsi/radeon_vcn_enc_3_0.c
+++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc_3_0.c
@@ -301,7 +301,8 @@ static void radeon_enc_slice_header(struct radeon_encoder *enc)
      radeon_enc_code_fixed_bits(enc, 0x1, 1); /* direct_spatial_mv_pred_flag */

   /* ref_pic_list_modification() */
-   if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) {
+   if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR &&
+       enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) {
      radeon_enc_code_fixed_bits(enc, 0x0, 1);

      /* long-term reference */
@@ -347,6 +348,7 @@ static void radeon_enc_slice_header(struct radeon_encoder *enc)
   }

   if ((enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) &&
+       (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) &&
       (enc->enc_pic.spec_misc.cabac_enable))
      radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);

--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -382,17 +382,33 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture
         state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode);
         state[4] |= S_008F20_PITCH(tex->surface.u.gfx9.zs.stencil_epitch);
      } else {
-         uint16_t epitch = tex->surface.u.gfx9.epitch;
-         if (tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM &&
-             block_width == 1) {
-            /* epitch is patched in ac_surface for sdma/vcn blocks to get
-             * a value expressed in elements unit.
-             * But here the texture is used with block_width == 1 so we
-             * need epitch in pixel units.
-             */
-            epitch = (epitch + 1) / tex->surface.blk_w - 1;
-         }
         state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.swizzle_mode);
+
+         uint32_t hw_format = G_008F14_DATA_FORMAT(state[1]);
+         uint16_t epitch = tex->surface.u.gfx9.epitch;
+
+         /* epitch is surf_pitch - 1 and are in elements unit.
+          * For some reason I don't understand, when a packed YUV format
+          * like UYUV is used, we have to double epitch (making it a pixel
+          * pitch instead of an element pitch). Note that it's only done
+          * when sampling the texture using its native format; we don't
+          * need to do this when sampling it as UINT32 (as done by
+          * SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT).
+          * This looks broken, so it's possible that surf_pitch / epitch
+          * are computed incorrectly, but that's the only way I found
+          * to get these use cases to work properly:
+          *   - yuyv dmabuf import (#6131)
+          *   - jpeg vaapi decode
+          *   - yuyv texture sampling (!26947)
+          *   - jpeg vaapi get image (#10375)
+          */
+         if ((tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM ||
+             tex->buffer.b.b.format == PIPE_FORMAT_G8R8_B8R8_UNORM) &&
+             (hw_format == V_008F14_IMG_DATA_FORMAT_GB_GR ||
+                hw_format == V_008F14_IMG_DATA_FORMAT_BG_RG)) {
+            epitch = (epitch + 1) * 2 - 1;
+         }
+
         state[4] |= S_008F20_PITCH(epitch);
      }

--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -4909,7 +4909,7 @@ fixup_io_locations(nir_shader *nir)
            if (var->data.location == VARYING_SLOT_VAR0)
               var->data.driver_location = 0;
            else if (var->data.patch)
-               var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
+               var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
            else
               var->data.driver_location = var->data.location;
         }
@@ -4936,7 +4936,7 @@ fixup_io_locations(nir_shader *nir)
                  size += glsl_count_vec4_slots(var->type, false, false);
            }
            if (var->data.patch)
-               var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
+               var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
            else
               var->data.driver_location = slot;
            found = true;
--- a/src/gallium/frontends/lavapipe/lvp_device.c
+++ b/src/gallium/frontends/lavapipe/lvp_device.c
@@ -2404,7 +2404,7 @@ lvp_nv_dgc_token_to_cmd_type(const VkIndirectCommandsLayoutTokenNV *token)
         assert(!"unknown token type!");
         break;
      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV:
-         return VK_CMD_PUSH_CONSTANTS;
+         return VK_CMD_PUSH_CONSTANTS2_KHR;
      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV:
         return VK_CMD_BIND_INDEX_BUFFER;
      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV:
@@ -2447,7 +2447,7 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetGeneratedCommandsMemoryRequirementsNV(
         size += sizeof(*cmd->u.bind_vertex_buffers2.sizes) + sizeof(*cmd->u.bind_vertex_buffers2.strides);
         break;
      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV:
-         size += token->pushconstantSize;
+         size += token->pushconstantSize + sizeof(VkPushConstantsInfoKHR);
         break;
      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV:
      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV:
--- a/src/gallium/frontends/lavapipe/lvp_execute.c
+++ b/src/gallium/frontends/lavapipe/lvp_execute.c
@@ -3841,15 +3841,17 @@ process_sequence(struct rendering_state *state,
      }
      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV: {
         uint32_t *data = input;
-         cmd_size += token->pushconstantSize;
+         cmd_size += token->pushconstantSize + sizeof(VkPushConstantsInfoKHR);
         if (max_size < size + cmd_size)
            abort();
-         cmd->u.push_constants.layout = token->pushconstantPipelineLayout;
-         cmd->u.push_constants.stage_flags = token->pushconstantShaderStageFlags;
-         cmd->u.push_constants.offset = token->pushconstantOffset;
-         cmd->u.push_constants.size = token->pushconstantSize;
-         cmd->u.push_constants.values = (void*)cmdptr;
-         memcpy(cmd->u.push_constants.values, data, token->pushconstantSize);
+         cmd->u.push_constants2_khr.push_constants_info = (void*)cmdptr;
+         VkPushConstantsInfoKHR *pci = cmd->u.push_constants2_khr.push_constants_info;
+         pci->layout = token->pushconstantPipelineLayout;
+         pci->stageFlags = token->pushconstantShaderStageFlags;
+         pci->offset = token->pushconstantOffset;
+         pci->size = token->pushconstantSize;
+         pci->pValues = (void*)((uint8_t*)cmdptr + sizeof(VkPushConstantsInfoKHR));
+         memcpy((void*)pci->pValues, data, token->pushconstantSize);
         break;
      }
      case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV: {
--- a/src/gallium/frontends/rusticl/core/kernel.rs
+++ b/src/gallium/frontends/rusticl/core/kernel.rs
@@ -317,7 +317,7 @@ where
    res
 }

-fn opt_nir(nir: &mut NirShader, dev: &Device) {
+fn opt_nir(nir: &mut NirShader, dev: &Device, has_explicit_types: bool) {
    let nir_options = unsafe {
        &*dev
            .screen
@@ -342,7 +342,9 @@ fn opt_nir(nir: &mut NirShader, dev: &Device) {
        }

        progress |= nir_pass!(nir, nir_opt_deref);
-        progress |= nir_pass!(nir, nir_opt_memcpy);
+        if has_explicit_types {
+            progress |= nir_pass!(nir, nir_opt_memcpy);
+        }
        progress |= nir_pass!(nir, nir_opt_dce);
        progress |= nir_pass!(nir, nir_opt_undef);
        progress |= nir_pass!(nir, nir_opt_constant_folding);
@@ -451,11 +453,10 @@ fn lower_and_optimize_nir(
    printf_opts.max_buffer_size = dev.printf_buffer_size() as u32;
    nir_pass!(nir, nir_lower_printf, &printf_opts);

-    opt_nir(nir, dev);
+    opt_nir(nir, dev, false);

    let mut args = KernelArg::from_spirv_nir(args, nir);
    let mut internal_args = Vec::new();
-    nir_pass!(nir, nir_lower_memcpy);

    let dv_opts = nir_remove_dead_variables_options {
        can_remove_var: Some(can_remove_var),
@@ -626,7 +627,8 @@ fn lower_and_optimize_nir(
        Some(glsl_get_cl_type_size_align),
    );

-    opt_nir(nir, dev);
+    opt_nir(nir, dev, true);
+    nir_pass!(nir, nir_lower_memcpy);

    nir_pass!(
        nir,
@@ -655,7 +657,7 @@ fn lower_and_optimize_nir(

    nir_pass!(nir, nir_lower_convert_alu_types, None);

-    opt_nir(nir, dev);
+    opt_nir(nir, dev, true);

    /* before passing it into drivers, assign locations as drivers might remove nir_variables or
     * other things we depend on
--- a/src/gallium/targets/lavapipe/meson.build
+++ b/src/gallium/targets/lavapipe/meson.build
@@ -50,7 +50,7 @@ _dev_icd = custom_target(
  command : [
    prog_python, '@INPUT0@',
    '--api-version', '1.1', '--xml', '@INPUT1@',
-    '--lib-path', meson.current_build_dir() / 'libvulkan_lvp.so',
+    '--lib-path', meson.current_build_dir() / icd_file_name,
    '--out', '@OUTPUT@',
  ],
  build_by_default : true,
--- a/src/intel/common/intel_aux_map.c
+++ b/src/intel/common/intel_aux_map.c
@@ -728,7 +728,7 @@ intel_aux_map_add_mapping(struct intel_aux_map_context *ctx, uint64_t main_addre
   if (!success && (main_inc_addr - main_address) > 0) {
      /* If the mapping failed, remove the mapped portion. */
      remove_mapping_locked(ctx, main_address,
-                            main_size_B - (main_inc_addr - main_address),
+                            main_inc_addr - main_address,
                            false /* reset_refcount */, &state_changed);
   }
   pthread_mutex_unlock(&ctx->mutex);
--- a/src/intel/compiler/brw_disasm.c
+++ b/src/intel/compiler/brw_disasm.c
@@ -1055,8 +1055,7 @@ static int
 dest_dpas_3src(FILE *file, const struct intel_device_info *devinfo,
               const brw_inst *inst)
 {
-   uint32_t reg_file =
-      reg_file = brw_inst_dpas_3src_dst_reg_file(devinfo, inst);
+   uint32_t reg_file = brw_inst_dpas_3src_dst_reg_file(devinfo, inst);

   if (reg(file, reg_file, brw_inst_dpas_3src_dst_reg_nr(devinfo, inst)) == -1)
      return 0;
@@ -1551,8 +1550,7 @@ static int
 src0_dpas_3src(FILE *file, const struct intel_device_info *devinfo,
               const brw_inst *inst)
 {
-   uint32_t reg_file =
-      reg_file = brw_inst_dpas_3src_src0_reg_file(devinfo, inst);
+   uint32_t reg_file = brw_inst_dpas_3src_src0_reg_file(devinfo, inst);

   if (reg(file, reg_file, brw_inst_dpas_3src_src0_reg_nr(devinfo, inst)) == -1)
      return 0;
@@ -1573,8 +1571,7 @@ static int
 src1_dpas_3src(FILE *file, const struct intel_device_info *devinfo,
               const brw_inst *inst)
 {
-   uint32_t reg_file =
-      reg_file = brw_inst_dpas_3src_src1_reg_file(devinfo, inst);
+   uint32_t reg_file = brw_inst_dpas_3src_src1_reg_file(devinfo, inst);

   if (reg(file, reg_file, brw_inst_dpas_3src_src1_reg_nr(devinfo, inst)) == -1)
      return 0;
@@ -1595,8 +1592,7 @@ static int
 src2_dpas_3src(FILE *file, const struct intel_device_info *devinfo,
               const brw_inst *inst)
 {
-   uint32_t reg_file =
-      reg_file = brw_inst_dpas_3src_src2_reg_file(devinfo, inst);
+   uint32_t reg_file = brw_inst_dpas_3src_src2_reg_file(devinfo, inst);

   if (reg(file, reg_file, brw_inst_dpas_3src_src2_reg_nr(devinfo, inst)) == -1)
      return 0;
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -5116,13 +5116,15 @@ const struct intel_device_info_pat_entry *
 anv_device_get_pat_entry(struct anv_device *device,
                         enum anv_bo_alloc_flags alloc_flags)
 {
+   if (alloc_flags & ANV_BO_ALLOC_IMPORTED)
+      return &device->info->pat.cached_coherent;
+
   /* PAT indexes has no actual effect in DG2 and DG1, smem caches will always
    * be snopped by GPU and lmem will always be WC.
    * This might change in future discrete platforms.
    */
   if (anv_physical_device_has_vram(device->physical)) {
-      if ((alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM) ||
-          (alloc_flags & ANV_BO_ALLOC_IMPORTED))
+      if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)
         return &device->info->pat.cached_coherent;
      return &device->info->pat.writecombining;
   }
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1479,6 +1479,7 @@ struct anv_gfx_dynamic_state {
      bool     RenderingDisable;
      uint32_t RenderStreamSelect;
      uint32_t ReorderMode;
+      uint32_t ForceRendering;
   } so;

   /* 3DSTATE_SAMPLE_MASK */
--- a/src/intel/vulkan/genX_gfx_state.c
+++ b/src/intel/vulkan/genX_gfx_state.c
@@ -496,8 +496,8 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
      SET(STREAMOUT, so.RenderingDisable, dyn->rs.rasterizer_discard_enable);
      SET(STREAMOUT, so.RenderStreamSelect, dyn->rs.rasterization_stream);

-#if INTEL_NEEDS_WA_14017076903
-      /* Wa_14017076903 :
+#if INTEL_NEEDS_WA_18022508906
+      /* Wa_18022508906 :
       *
       * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
       *
@@ -525,8 +525,9 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
       * Here we force rendering to get SOL_INT::Render_Enable when occlusion
       * queries are active.
       */
-      if (!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0)
-         SET(STREAMOUT, so.ForceRendering, Force_on);
+      SET(STREAMOUT, so.ForceRendering,
+          (!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0) ?
+          Force_on : 0);
 #endif

      switch (dyn->rs.provoking_vertex) {
@@ -1526,6 +1527,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
         SET(so, so, RenderingDisable);
         SET(so, so, RenderStreamSelect);
         SET(so, so, ReorderMode);
+         SET(so, so, ForceRendering);
      }
   }

--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -272,7 +272,7 @@ genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state)
 void
 genX(emit_so_memcpy_end)(struct anv_memcpy_state *state)
 {
-   if (intel_device_info_is_dg2(state->device->info))
+   if (intel_needs_workaround(state->device->info, 16013994831))
      genX(batch_set_preemption)(state->batch, state->device->info, _3D, true);

   anv_batch_emit(state->batch, GENX(MI_BATCH_BUFFER_END), end);
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -817,7 +817,8 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx,
            return;
         }
         if (t->Image[face][baseLevel]->InternalFormat !=
-             baseImage->InternalFormat) {
+             baseImage->InternalFormat ||
+             t->Image[face][baseLevel]->TexFormat != baseImage->TexFormat) {
            incomplete(t, BASE, "Cube face format mismatch");
            return;
         }
@@ -876,7 +877,8 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx,
                  incomplete(t, MIPMAP, "TexImage[%d] is missing", i);
                  return;
               }
-               if (img->InternalFormat != baseImage->InternalFormat) {
+               if (img->InternalFormat != baseImage->InternalFormat ||
+                   img->TexFormat != baseImage->TexFormat) {
                  incomplete(t, MIPMAP, "Format[i] != Format[baseLevel]");
                  return;
               }
--- a/src/panfrost/ci/panfrost-g52-fails.txt
+++ b/src/panfrost/ci/panfrost-g52-fails.txt
@@ -446,36 +446,7 @@ dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.float32,Crash
 dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.int32,Crash
 dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.uint32,Crash

-dEQP-VK.api.buffer_view.access.storage_texel_buffer.a2b10g10r10_uint_pack32,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.a2b10g10r10_unorm_pack32,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.a8b8g8r8_sint_pack32,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.a8b8g8r8_uint_pack32,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.a8b8g8r8_unorm_pack32,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.r16g16b16a16_sfloat,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.r16g16b16a16_sint,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.r16g16b16a16_uint,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.r8g8b8a8_sint,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.r8g8b8a8_uint,Fail
-dEQP-VK.api.buffer_view.access.storage_texel_buffer.r8g8b8a8_unorm,Fail
 dEQP-VK.api.command_buffers.record_many_draws_secondary_2,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.mix_1,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.mix_array0,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_0,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_1,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_2,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_4,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_5,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_array0,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_array1,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_image_array2,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_0,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_1,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_2,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_4,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_5,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_array0,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_array1,Fail
-dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_array2,Fail
 dEQP-VK.glsl.operator.sequence.no_side_effects.highp_bool_vec2_fragment,Fail
 dEQP-VK.glsl.operator.sequence.no_side_effects.highp_float_uint_fragment,Fail
 dEQP-VK.glsl.operator.sequence.no_side_effects.highp_vec4_ivec4_bvec4_fragment,Fail
@@ -488,27 +459,6 @@ dEQP-VK.glsl.operator.sequence.side_effects.highp_vec4_fragment,Fail
 dEQP-VK.glsl.operator.sequence.side_effects.mediump_bool_vec2_fragment,Fail
 dEQP-VK.glsl.operator.sequence.side_effects.mediump_float_uint_fragment,Fail
 dEQP-VK.glsl.operator.sequence.side_effects.mediump_vec4_fragment,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_storage_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_storage_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_uniform_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_uniform_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.out_of_alloc.oob_storage_read,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.out_of_alloc.oob_uniform_read,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_storage_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_storage_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_storage_write.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_uniform_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_uniform_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_storage_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_storage_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_storage_write.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_uniform_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_uniform_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_storage_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_storage_read.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_storage_write.range_3_texels,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_uniform_read.range_1_texel,Fail
-dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_uniform_read.range_3_texels,Fail
 dEQP-VK.texture.explicit_lod.2d.derivatives.linear_linear_mipmap_linear,Fail
 dEQP-VK.texture.explicit_lod.2d.derivatives.linear_linear_mipmap_nearest,Fail
 dEQP-VK.texture.explicit_lod.2d.derivatives.linear_nearest_mipmap_linear,Fail
--- a/src/panfrost/vulkan/panvk_private.h
+++ b/src/panfrost/vulkan/panvk_private.h
@@ -398,7 +398,6 @@ struct panvk_pipeline_layout {
   unsigned num_dyn_ubos;
   unsigned num_dyn_ssbos;
   uint32_t num_imgs;
-   uint32_t num_sets;

   struct {
      uint32_t size;
--- a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c
+++ b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c
@@ -514,7 +514,7 @@ panvk_fill_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
   struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
   const struct panvk_pipeline *pipeline = bind_point_state->pipeline;

-   for (unsigned s = 0; s < pipeline->layout->num_sets; s++) {
+   for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) {
      const struct panvk_descriptor_set *set = desc_state->sets[s];

      if (!set)
--- a/src/panfrost/vulkan/panvk_vX_device.c
+++ b/src/panfrost/vulkan/panvk_vX_device.c
@@ -121,7 +121,7 @@ panvk_queue_submit_batch(struct panvk_queue *queue, struct panvk_batch *batch,
   }

   if (debug & PANVK_DEBUG_TRACE)
-      pandecode_next_frame(0);
+      pandecode_next_frame(pdev->decode_ctx);

   batch->issued = true;
 }
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -1199,6 +1199,9 @@ TODO: document the other workarounds.
        <application name="Baldur's Gate 3" executable="bg3.exe">
            <option name="anv_disable_fcv" value="true" />
        </application>
+        <application name="The Finals" executable="Discovery.exe">
+            <option name="force_vk_vendor" value="-1" />
+        </application>
        <!--
        Disable 16-bit feature on zink and angle so that GLES mediump doesn't
        lower to our inefficent 16-bit shader support.  No need to do so for
--- a/src/util/00-radv-defaults.conf
+++ b/src/util/00-radv-defaults.conf
@@ -106,6 +106,7 @@ Application bugs worked around in this file:
        <application name="DOOM Eternal" application_name_match="DOOMEternal">
            <option name="radv_zero_vram" value="true" />
            <option name="radv_legacy_sparse_binding" value="true" />
+            <option name="radv_force_active_accel_struct_leaves" value="true" />
        </application>

        <application name="No Man's Sky" application_name_match="No Man's Sky">
--- a/src/util/detect_arch.h
+++ b/src/util/detect_arch.h
@@ -97,6 +97,10 @@
 #define DETECT_ARCH_MIPS 1
 #endif

+#if defined(__hppa__)
+#define DETECT_ARCH_HPPA 1
+#endif
+
 #ifndef DETECT_ARCH_X86
 #define DETECT_ARCH_X86 0
 #endif
@@ -137,4 +141,8 @@
 #define DETECT_ARCH_MIPS 0
 #endif

+#ifndef DETECT_ARCH_HPPA
+#define DETECT_ARCH_HPPA 0
+#endif
+
 #endif /* UTIL_DETECT_ARCH_H_ */
--- a/src/util/driconf.h
+++ b/src/util/driconf.h
@@ -716,6 +716,10 @@
 #define DRI_CONF_RADV_CLEAR_LDS(def) \
   DRI_CONF_OPT_B(radv_clear_lds, def, "Clear LDS at the end of shaders. Might decrease performance.")

+#define DRI_CONF_RADV_FORCE_ACTIVE_ACCEL_STRUCT_LEAVES(def) \
+   DRI_CONF_OPT_B(radv_force_active_accel_struct_leaves, def, \
+                  "Force leaf nodes of acceleration structures to be marked active.")
+
 /**
 * \brief ANV specific configuration options
 */
--- a/src/util/tests/half_float_test.cpp
+++ b/src/util/tests/half_float_test.cpp
@@ -46,18 +46,35 @@ static bool issignaling(float x)
 }
 #endif

-/* Sanity test our test values */
-TEST(half_to_float_test, nan_test)
+/* The sign of the bit for signaling is different on some old processors
+ * (PA-RISC, old MIPS without IEEE-754-2008 support).
+ *
+ * Disable the tests on those platforms, because it's not clear how to
+ * correctly handle NaNs when the CPU and GPU differ in their convention.
+ */
+#if DETECT_ARCH_HPPA || ((DETECT_ARCH_MIPS || DETECT_ARCH_MIPS64) && !defined __mips_nan2008)
+#define IEEE754_2008_NAN 0
+#else
+#define IEEE754_2008_NAN 1
+#endif
+
+/* Sanity test our inf test values */
+TEST(half_to_float_test, inf_test)
 {
   EXPECT_TRUE(isinf(TEST_POS_INF));
   EXPECT_TRUE(isinf(TEST_NEG_INF));
+}

+/* Make sure that our 32-bit float nan test value we're using is a
+ * non-signaling NaN.
+ */
+#if IEEE754_2008_NAN
+TEST(half_to_float_test, nan_test)
+#else
+TEST(half_to_float_test, DISABLED_nan_test)
+#endif
+{
   EXPECT_TRUE(isnan(TEST_NAN));
-   /* Make sure that our 32-bit float nan test value we're using is a
-    * non-signaling NaN.  The sign of the bit for signaling was apparently
-    * different on some old processors (PA-RISC, MIPS?).  This test value should
-    * cover Intel, ARM, and PPC, for sure.
-    */
   EXPECT_FALSE(issignaling(TEST_NAN));
 }

@@ -82,12 +99,20 @@ test_half_to_float_limits(float (*func)(uint16_t))
 }

 /* Test the optionally HW instruction-using path. */
+#if IEEE754_2008_NAN
 TEST(half_to_float_test, half_to_float_test)
+#else
+TEST(half_to_float_test, DISABLED_half_to_float_test)
+#endif
 {
   test_half_to_float_limits(_mesa_half_to_float);
 }

+#if IEEE754_2008_NAN
 TEST(half_to_float_test, half_to_float_slow_test)
+#else
+TEST(half_to_float_test, DISABLED_half_to_float_slow_test)
+#endif
 {
   test_half_to_float_limits(_mesa_half_to_float_slow);
 }
--- a/src/virtio/vulkan/vn_ring.c
+++ b/src/virtio/vulkan/vn_ring.c
@@ -633,7 +633,6 @@ vn_ring_submit_command(struct vn_ring *ring,
   vn_cs_encoder_commit(&submit->command);

   size_t reply_offset = 0;
-   submit->reply_shmem = NULL;
   if (submit->reply_size) {
      submit->reply_shmem = vn_instance_reply_shmem_alloc(
         ring->instance, submit->reply_size, &reply_offset);
@@ -653,11 +652,16 @@ vn_ring_submit_command(struct vn_ring *ring,
   mtx_unlock(&ring->mutex);

   if (submit->reply_size) {
-      void *reply_ptr = submit->reply_shmem->mmap_ptr + reply_offset;
-      submit->reply =
-         VN_CS_DECODER_INITIALIZER(reply_ptr, submit->reply_size);
-      if (submit->ring_seqno_valid)
+      if (likely(submit->ring_seqno_valid)) {
+         void *reply_ptr = submit->reply_shmem->mmap_ptr + reply_offset;
+         submit->reply =
+            VN_CS_DECODER_INITIALIZER(reply_ptr, submit->reply_size);
         vn_ring_wait_seqno(ring, submit->ring_seqno);
+      } else {
+         vn_renderer_shmem_unref(ring->instance->renderer,
+                                 submit->reply_shmem);
+         submit->reply_shmem = NULL;
+      }
   }
 }

--- a/src/virtio/vulkan/vn_ring.h
+++ b/src/virtio/vulkan/vn_ring.h
@@ -77,7 +77,7 @@ struct vn_ring_submit_command {
   struct vn_renderer_shmem *reply_shmem;
   struct vn_cs_decoder reply;

-   /* valid when instance ring submission succeeds */
+   /* valid when ring submission succeeds */
   bool ring_seqno_valid;
   uint32_t ring_seqno;
 };
@@ -95,6 +95,8 @@ vn_ring_submit_command_init(struct vn_ring *ring,
   submit->reply_size = reply_size;
   submit->reply_shmem = NULL;

+   submit->ring_seqno_valid = false;
+
   return &submit->command;
 }

--- a/src/vulkan/runtime/vk_command_buffer.c
+++ b/src/vulkan/runtime/vk_command_buffer.c
@@ -200,6 +200,10 @@ VkShaderStageFlags
 vk_shader_stages_from_bind_point(VkPipelineBindPoint pipelineBindPoint)
 {
   switch (pipelineBindPoint) {
+#ifdef VK_ENABLE_BETA_EXTENSIONS
+    case VK_PIPELINE_BIND_POINT_EXECUTION_GRAPH_AMDX:
+      return VK_SHADER_STAGE_COMPUTE_BIT | MESA_VK_SHADER_STAGE_WORKGRAPH_HACK_BIT_FIXME;
+#endif
   case VK_PIPELINE_BIND_POINT_COMPUTE:
      return VK_SHADER_STAGE_COMPUTE_BIT;
   case VK_PIPELINE_BIND_POINT_GRAPHICS:
--- a/src/vulkan/runtime/vk_command_buffer.h
+++ b/src/vulkan/runtime/vk_command_buffer.h
@@ -98,6 +98,8 @@ enum mesa_vk_command_buffer_state {
   MESA_VK_COMMAND_BUFFER_STATE_PENDING,
 };

+/* this needs spec fixes */
+#define MESA_VK_SHADER_STAGE_WORKGRAPH_HACK_BIT_FIXME (1<<30)
 VkShaderStageFlags vk_shader_stages_from_bind_point(VkPipelineBindPoint pipelineBindPoint);

 struct vk_command_buffer {
--- a/src/vulkan/wsi/wsi_common_wayland.c
+++ b/src/vulkan/wsi/wsi_common_wayland.c
@@ -2262,7 +2262,8 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
    */
   if (wsi_wl_surface->chain &&
       wsi_swapchain_to_handle(&wsi_wl_surface->chain->base) != pCreateInfo->oldSwapchain) {
-      return VK_ERROR_NATIVE_WINDOW_IN_USE_KHR;
+      result = VK_ERROR_NATIVE_WINDOW_IN_USE_KHR;
+      goto fail;
   }
   if (pCreateInfo->oldSwapchain) {
      VK_FROM_HANDLE(wsi_wl_swapchain, old_chain, pCreateInfo->oldSwapchain);
@@ -2376,16 +2377,20 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
      uint64_t *drm_modifiers_copy =
         vk_alloc(pAllocator, sizeof(*drm_modifiers) * num_drm_modifiers, 8,
                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-      if (!drm_modifiers_copy)
-         goto fail;
+      if (!drm_modifiers_copy) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto fail_free_wl_chain;
+      }

      typed_memcpy(drm_modifiers_copy, drm_modifiers, num_drm_modifiers);
      chain->drm_modifiers = drm_modifiers_copy;
   }

   if (chain->wsi_wl_surface->display->wp_presentation_notwrapped) {
-      if (!wsi_init_pthread_cond_monotonic(&chain->present_ids.list_advanced))
-         goto fail;
+      if (!wsi_init_pthread_cond_monotonic(&chain->present_ids.list_advanced)) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto fail_free_wl_chain;
+      }
      pthread_mutex_init(&chain->present_ids.lock, NULL);

      wl_list_init(&chain->present_ids.outstanding_list);
@@ -2403,7 +2408,7 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
      result = wsi_wl_image_init(chain, &chain->images[i],
                                 pCreateInfo, pAllocator);
      if (result != VK_SUCCESS)
-         goto fail_image_init;
+         goto fail_free_wl_images;
      chain->images[i].busy = false;
   }

@@ -2411,14 +2416,15 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,

   return VK_SUCCESS;

-fail_image_init:
+fail_free_wl_images:
   wsi_wl_swapchain_images_free(chain);
-
+fail_free_wl_chain:
   wsi_wl_swapchain_chain_free(chain, pAllocator);
 fail:
   vk_free(pAllocator, chain);
   wsi_wl_surface->chain = NULL;

+   assert(result != VK_SUCCESS);
   return result;
 }
Author	SHA1	Message	Date
Eric Engestrom	e716b08f86	VERSION: bump for 24.0.0-rc2	2024-01-17 22:28:20 +00:00
Friedrich Vock	9d1a064663	radv/rt: Add workaround to make leaves always active DOOM Eternal builds acceleration structures with inactive primitives and tries to make them active in later AS updates. This is disallowed by the spec and triggers a GPU hang. Fix the hang by working around the bug. Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27034> (cherry picked from commit `a9831caa14`)	2024-01-17 21:42:02 +00:00
Boris Brezillon	f7f823c787	panvk: Fix access to unitialized panvk_pipeline_layout::num_sets field Commit `73eecffabd` ("panvk: Use the vk_pipeline_layout base struct") reworked the panvk logic to use vk_pipeline_layout, which contains the number of descriptor set layout referenced by a pipeline layout, thus deprecating panvk_pipeline_layout::num_sets. Make panvk_fill_non_vs_attribs() use vk_pipeline_layout::set_count instead of panvk_pipeline_layout::num_sets and kill the latter so we can't introduce new users. Fixes: `73eecffabd` ("panvk: Use the vk_pipeline_layout base struct") Cc: mesa-stable Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Constantine Shablya <constantine.shablya@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27107> (cherry picked from commit `b18bfed2c5`)	2024-01-17 21:41:44 +00:00
Boris Brezillon	b65d7520f6	panvk: Fix tracing pandecode_next_frame() take a decode context. Passing NULL leads to a NULL deref. Fixes: `56be9a55be` ("pan/decode: handle more than one panfrost_device") Cc: mesa-stable Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Constantine Shablya <constantine.shablya@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27107> (cherry picked from commit `35a02560c8`)	2024-01-17 21:39:08 +00:00
Sviatoslav Peleshko	1246e54f1c	nir: Use alu source components count in nir_alu_srcs_negative_equal When we use source from ALU instruction directly, the default swizzle array should be populated with the same amount of components as the src has. Otherwise, if we use nir_ssa_alu_instr_src_components, it can return the destination components count that is lower than component index actually used in that source. This can lead to false equality between 0 (uninitialized) and 0 (.x) in swizzle comparison below. Fixes: `c6ee46a7` ("nir: Add nir_alu_srcs_negative_equal") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8704 Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22655> (cherry picked from commit `6b0bfdfa9e`)	2024-01-17 21:39:06 +00:00
Erico Nunes	4175b4d547	Revert "ci: lima farm is down" This reverts commit `601b826a5e`. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26905> (cherry picked from commit `8bd4cae768`)	2024-01-17 21:39:02 +00:00
Yonggang Luo	9732d1bdcd	compiler/spirv: The spirv shader is binary, should write in binary mode Fixes: `53265c8798` ("spirv: Add a mechanism for dumping failing shaders") Signed-off-by: Yonggang Luo <luoyonggang@gmail.com> Reviewed-by: Jesse Natalie <jenatali@microsoft.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26775> (cherry picked from commit `fd11818828`)	2024-01-17 21:39:00 +00:00
Yiwei Zhang	8974222433	vulkan/wsi/wayland: fix returns and avoid leaks for failed swapchain Cc: mesa-stable Signed-off-by: Yiwei Zhang <zzyiwei@chromium.org> Tested-by: Eric Engestrom <eric@engestrom.ch> Reviewed-by: Ryan Neph <ryanneph@google.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27080> (cherry picked from commit `dc5725ee29`)	2024-01-17 21:38:56 +00:00
Eric Engestrom	ce34ec41cd	ci: fix job dependency error in MRs for bin/ci/* scripts 'debian/x86_64_build' job needs 'debian/x86_64_build-base' job, but 'debian/x86_64_build-base' is not in any previous stage Fixes: `f298a0e709` ("ci: make sure we evaluate the python-test rules first") Fixes: `2c9fdaa830` ("ci: fix python-test dependency error on merge requests") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27042> (cherry picked from commit `2ce0b5ab0a`)	2024-01-17 21:38:53 +00:00
Eric Engestrom	3dabc03b58	.pick_status.json: Update to `10e2dbb63b`	2024-01-17 21:36:44 +00:00
David Rosca	25ae9134dd	radeonsi/vcn: Fix H264 slice header when encoding I frames Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27029> (cherry picked from commit `865abfde63`)	2024-01-16 18:41:37 +00:00
Patrick Lerda	43a00ad0fa	glsl/nir: fix gl_nir_cross_validate_outputs_to_inputs() memory leak For instance, this issue is triggered with vs-to-fs-overlap.shader_test -auto -fbo: Direct leak of 24 byte(s) in 1 object(s) allocated from: #0 0x7fe64f58e9a7 in calloc (/usr/lib64/libasan.so.6+0xb19a7) #1 0x7fe642ca2839 in _mesa_symbol_table_ctor ../src/mesa/program/symbol_table.c:286 #2 0x7fe642ff003d in gl_nir_cross_validate_outputs_to_inputs ../src/compiler/glsl/gl_nir_link_varyings.c:728 #3 0x7fe642d7c7d8 in gl_nir_link_glsl ../src/compiler/glsl/gl_nir_linker.c:1357 #4 0x7fe642be6931 in st_link_glsl_to_nir ../src/mesa/state_tracker/st_glsl_to_nir.cpp:562 #5 0x7fe642be6931 in st_link_shader ../src/mesa/state_tracker/st_glsl_to_nir.cpp:944 #6 0x7fe642acab55 in link_program ../src/mesa/main/shaderapi.c:1336 #7 0x7fe642acab55 in link_program_error ../src/mesa/main/shaderapi.c:1447 #8 0x7fe6424aa389 in _mesa_unmarshal_LinkProgram src/mapi/glapi/gen/marshal_generated2.c:1911 #9 0x7fe641fd912b in glthread_unmarshal_batch ../src/mesa/main/glthread.c:139 #10 0x7fe641f48d48 in util_queue_thread_func ../src/util/u_queue.c:309 #11 0x7fe641fa442a in impl_thrd_routine ../src/c11/impl/threads_posix.c:67 Fixes: `7d1948e9b5` ("glsl: implement cross_validate_outputs_to_inputs() in nir linker") Signed-off-by: Patrick Lerda <patrick9876@free.fr> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27071> (cherry picked from commit `bacace8634`)	2024-01-16 18:41:36 +00:00
Karol Herbst	78fd14d938	rusticl/kernel: run opt/lower_memcpy later to fix a crash nir_opt_memcpy requires explicit types to function properly. So run them after lowering vars to explicit types. Cc: mesa-stable Signed-off-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27068> (cherry picked from commit `f896659894`)	2024-01-16 18:41:35 +00:00
Tatsuyuki Ishi	c5b8590e6d	radv: never set DISABLE_WR_CONFIRM for CP DMA clears and copies This mirrors the changes in `69ff9c16bb` ("radeonsi: never set DISABLE_WR_CONFIRM for CP DMA clears and copies"). Cc: mesa-stable Suggested-by: Vitaliy Triang3l Kuzmin <triang3l@yandex.ru> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27053> (cherry picked from commit `43fb43ba2c`)	2024-01-16 18:41:34 +00:00
Lucas Stach	9888a95130	etnaviv: disable 64bpp render/sampler formats Vivante hardware handles 64bpp render targets and samplers in a odd way by splitting the buffer and using a pair of texture samplers or a pair of MRT outputs to access those resources. This isn't implemented in the driver right now, so we should not advertise support for those formats. CC: mesa-stable Signed-off-by: Lucas Stach <l.stach@pengutronix.de> Reviewed-by: Christian Gmeiner <cgmeiner@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26982> (cherry picked from commit `e481c1269c`)	2024-01-16 18:41:33 +00:00
Eric Engestrom	05ff891088	.pick_status.json: Update to `ff84aef116`	2024-01-16 18:41:30 +00:00
Tapani Pälli	fc4180339c	anv: check for wa 16013994831 in emit_so_memcpy_end We are toggling preemption on/off during streamout, this is also happening on gfx12 platforms, not just dg2. Cc: mesa-stable Signed-off-by: Tapani Pälli <tapani.palli@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27002> (cherry picked from commit `36f428f1de`)	2024-01-15 21:57:32 +00:00
Vinson Lee	b39ee4d766	intel/disasm: Remove duplicate variable reg_file Fix defects reported by Coverity Scan. Evaluation order violation (EVALUATION_ORDER) write_write_typo: In reg_file = reg_file = brw_inst_dpas_3src_dst_reg_file(devinfo, inst), reg_file is written twice with the same value. Fixes: `1c92dad5cb` ("intel/disasm: Disassembly support for DPAS") Signed-off-by: Vinson Lee <vlee@freedesktop.org> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27056> (cherry picked from commit `73835874a8`)	2024-01-15 21:57:31 +00:00
Lionel Landwerlin	5b8984f32f	anv: hide vendor ID for The Finals XeSS workaround. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10436 Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27057> (cherry picked from commit `a34a113059`)	2024-01-15 21:57:30 +00:00
Lionel Landwerlin	eb3d73073f	intel/aux_map: fix fallback unmapping range on failure Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Fixes: `7c6faa1efe` ("intel/aux_map: introduce ref count of L1 entries") Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27057> (cherry picked from commit `ff6041afdf`)	2024-01-15 21:57:29 +00:00
Jesse Natalie	f19b7d8dfc	mesa: Consider mesa format in addition to internal format for mip/cube completeness Prior to `06b526de`, the mesa format was used for these completeness checks. That was to address the case where a different internal format selected the same mesa format, and the texture shouldn't be considered compatible. But this didn't address the case where the same internal format selected a different mesa format, e.g. because the type passed to the TexImage API was different. An old WGL demo app called TexFilter.exe tries to redefine a mipped RGBA16 texture as RGBA8. This incorrect logic caused Mesa to try to copy the RGBA16 data from the smaller mips into the newly created RGBA8 data, because it thought that the texture was still mip-complete, despite the format changing. Cc: mesa-stable Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27023> (cherry picked from commit `4cb9c77e8e`)	2024-01-15 21:57:28 +00:00
José Roberto de Souza	04ffe4771e	anv: Fix PAT entry for userptr in integrated GPUs Fixes: `060439bdf0` ("anv: Add ANV_BO_ALLOC_IMPORTED") Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27040> (cherry picked from commit `49fe060b5f`)	2024-01-15 21:57:27 +00:00
Yiwei Zhang	0ebdd39d85	venus: populate oom from ring submit alloc failures ring_seqno_valid indicates a successful ring cmd submission, and can be used to avoid invalid reply decoding due to failed submit alloc. Otherwise, the garbled VkResult will mislead into initialization failure instead of oom. Below cts failure is fixed: dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail.basic Fixes: `ec131c6e55` ("venus: use instance allocator for ring allocs") Signed-off-by: Yiwei Zhang <zzyiwei@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27026> (cherry picked from commit `ecd50e70d4`)	2024-01-15 21:57:24 +00:00
Matt Turner	fcd78c5281	util/tests: Disable half-float NaN test on hppa/old-mips Bug: https://bugs.gentoo.org/908079 Fixes: `067023dce2` ("util: Add some unit tests of the half-float conversions.") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26991> (cherry picked from commit `5b7c733902`)	2024-01-15 21:56:38 +00:00
Matt Turner	97ebcff41c	util: Add DETECT_ARCH_HPPA macro Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26991> (cherry picked from commit `0540c9de44`)	2024-01-15 21:56:37 +00:00
Pierre-Eric Pelloux-Prayer	6febac5c96	Revert "ci/radeonsi: disable VA-API testing on raven" This reverts commit `9017852de4`. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: David Heidelberg <david.heidelberg@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26947> (cherry picked from commit `e2f39e8aca`)	2024-01-15 21:56:36 +00:00
Pierre-Eric Pelloux-Prayer	ab960ee0bf	radeonsi: compute epitch when modifying surf_pitch In the linear case with no mipmaps addrlib sets epitch to surf_pitch - 1 so lets do the same thing here. The change in si_descriptors.c looks like it's papering over a bug but I couldn't find any other changes that wouldn't break at least one use case. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10375 Fixes: `115b61e51f` ("ac/surface: don't oversize surf_size") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26947> (cherry picked from commit `4e76c4ecb4`)	2024-01-15 21:56:10 +00:00
Tatsuyuki Ishi	fc11cbb37e	radv: Recompute max_waves after postprocessing RT config The max waves for RT prolog need to be recalculated after merging the resource usage of all shaders invoked from it. Note that there is no need to panic, as the info was only used to calculate maximum scratch size and with the RT prolog being low footprint, this likely only caused overestimation rather than underestimation. Fixes: `533ec9843e` ("radv: Precompute shader max_waves.") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26998> (cherry picked from commit `63827751e1`)	2024-01-15 21:56:09 +00:00
Mike Blumenkrantz	1f5604ed45	zink: fix separate shader patch variable location adjustment in spirv, these start at location 0, not location 32 fixes #10414 Fixes: `d9942442f2` ("zink: handle patch variable locations for separate shaders better") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26981> (cherry picked from commit `565ee4fafc`)	2024-01-15 21:56:07 +00:00
Lionel Landwerlin	cc677d7c30	anv: fix disabled Wa_14017076903/18022508906 Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Fixes: `d0669f3ede` ("intel/dev: switch defect identifiers to use lineage numbers") Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27008> (cherry picked from commit `695b4a2992`)	2024-01-15 21:56:05 +00:00
Eric Engestrom	f575e2b9f1	ci: make sure we evaluate the python-test rules first Fixes: `2c9fdaa830` ("ci: fix python-test dependency error on merge requests") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26984> (cherry picked from commit `f298a0e709`)	2024-01-15 21:56:02 +00:00
Timur Kristóf	3753919715	radv: Correctly select SDMA support for PRIME blit. Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10317 Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27015> (cherry picked from commit `436b89e838`)	2024-01-15 21:56:00 +00:00
Pavel Ondračka	757192b046	r300: fix reusing of color varying slots for generic ones This was broken when I added texcoord support, the problem is that we failed to properly count the number of used fs inputs and thus we failed to make the proper decision when to reuse the color varying slot Also fix the error messages, they were incorrect after the rewrite as well. This fixes a bunch of piglits. Fixes: `d4b8e8a481` Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reviewed-by: Filip Gawin <filip.gawin@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27003> (cherry picked from commit `53c17d85ab`)	2024-01-15 21:55:56 +00:00
Mike Blumenkrantz	02b5a2348d	lavapipe: fix devenv icd filename fixes #10408 cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26985> (cherry picked from commit `465e26dd98`)	2024-01-15 21:55:52 +00:00
Mike Blumenkrantz	3c36933195	lavapipe: use pushconstants2 for dgc Fixes: `ec656e1984` ("lavapipe: maint6") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26977> (cherry picked from commit `bf729063c3`)	2024-01-15 21:55:51 +00:00
Mike Blumenkrantz	ae5c0e6600	vk/cmdbuf: add back deleted maint6 workgraph bits this otherwise breaks workgraph support in lavapipe Fixes: `ec656e1984` ("lavapipe: maint6") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26977> (cherry picked from commit `b6bfa73dc7`)	2024-01-15 21:22:38 +00:00
Eric Engestrom	f1064107e9	.pick_status.json: Mark `0557f0d59c` as denominated	2024-01-15 09:44:39 +00:00
Eric Engestrom	6b4f639474	.pick_status.json: Update to `4fe5f06d40`	2024-01-15 09:43:41 +00:00
Eric Engestrom	26a96af808	VERSION: bump for 24.0.0-rc1	2024-01-11 14:19:21 +00:00