bump version for 20.2.0-rc2

aco: execute branch instructions in WQM if necessary
It could happen that only the branch condition was computed in WQM and not the branch instruction. There is now some rendundancy which should be cleaned up. Fixes: 3817fa7a4d ('aco: fix WQM handling in nested loops') Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6260> (cherry picked from commit fdb97d3d29)
2020-08-12 10:06:51 -07:00 · 2020-08-11 09:43:35 -07:00 · 2020-08-11 09:43:35 -07:00 · 2020-08-11 09:43:34 -07:00 · 2020-08-11 09:43:34 -07:00 · 2020-08-11 09:43:33 -07:00
38 changed files with 1448 additions and 356 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -222,7 +222,7 @@ x86_build:
  extends:
    - .use-x86_build-base
  variables:
-    FDO_DISTRIBUTION_TAG: &x86_build "2020-07-28-x86-2"
+    FDO_DISTRIBUTION_TAG: &x86_build "2020-08-08-glvnd"

 .use-x86_build:
  variables:
--- a/.gitlab-ci/container/x86_build.sh
+++ b/.gitlab-ci/container/x86_build.sh
@@ -92,7 +92,7 @@ rm -rf $WAYLAND_PROTOCOLS_VERSION
 # The version of libglvnd-dev in debian is too old
 # Check this page to see when this local compilation can be dropped in favour of the package:
 # https://packages.debian.org/libglvnd-dev
-GLVND_VERSION=1.2.0
+GLVND_VERSION=1.3.2
 wget https://gitlab.freedesktop.org/glvnd/libglvnd/-/archive/v$GLVND_VERSION/libglvnd-v$GLVND_VERSION.tar.gz
 tar -xvf libglvnd-v$GLVND_VERSION.tar.gz && rm libglvnd-v$GLVND_VERSION.tar.gz
 pushd libglvnd-v$GLVND_VERSION; ./autogen.sh; ./configure; make install; popd
--- a/.pick_status.json
+++ b/.pick_status.json
--- a/2
+++ b/2
@@ -1 +1 @@
-20.2.0-devel
+20.2.0-rc2
--- a/bin/pick/ui.py
+++ b/bin/pick/ui.py
@@ -240,8 +240,8 @@ class UI:

            {err}

-            You can either cancel, or resolve the conflicts, commit the
-            changes and select ok."""))
+            You can either cancel, or resolve the conflicts (`git mergetool`), finish the
+            cherry-pick (`git cherry-pick --continue`) and select ok."""))

        can_btn = urwid.Button('Cancel')
        urwid.connect_signal(can_btn, 'click', reset_cb)
--- a/meson.build
+++ b/meson.build
@@ -1553,7 +1553,7 @@ endif

 dep_glvnd = null_dep
 if with_glvnd
-  dep_glvnd = dependency('libglvnd', version : '>= 1.2.0')
+  dep_glvnd = dependency('libglvnd', version : '>= 1.3.2')
  pre_args += '-DUSE_LIBGLVND=1'
 endif

--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -165,12 +165,6 @@ void mark_block_wqm(wqm_ctx &ctx, unsigned block_idx)

   ctx.branch_wqm[block_idx] = true;
   Block& block = ctx.program->blocks[block_idx];
-   aco_ptr<Instruction>& branch = block.instructions.back();
-
-   if (branch->opcode != aco_opcode::p_branch) {
-      assert(!branch->operands.empty() && branch->operands[0].isTemp());
-      set_needs_wqm(ctx, branch->operands[0].getTemp());
-   }

   /* TODO: this sets more branch conditions to WQM than it needs to
    * it should be enough to stop at the "exec mask top level" */
@@ -233,6 +227,11 @@ void get_block_needs(wqm_ctx &ctx, exec_ctx &exec_ctx, Block* block)
         }
      }

+      if (instr->format == Format::PSEUDO_BRANCH && ctx.branch_wqm[block->index]) {
+         needs = WQM;
+         propagate_wqm = true;
+      }
+
      if (propagate_wqm) {
         for (const Operand& op : instr->operands) {
            if (op.isTemp()) {
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -936,6 +936,14 @@ void init_context(isel_context *ctx, nir_shader *shader)

   ctx->allocated.reset(allocated.release());
   ctx->cf_info.nir_to_aco.reset(nir_to_aco.release());
+
+   /* align and copy constant data */
+   while (ctx->program->constant_data.size() % 4u)
+      ctx->program->constant_data.push_back(0);
+   ctx->constant_data_offset = ctx->program->constant_data.size();
+   ctx->program->constant_data.insert(ctx->program->constant_data.end(),
+                                      (uint8_t*)shader->constant_data,
+                                      (uint8_t*)shader->constant_data + shader->constant_data_size);
 }

 Pseudo_instruction *add_startpgm(struct isel_context *ctx)
@@ -1304,16 +1312,6 @@ lower_bit_size_callback(const nir_alu_instr *alu, void *_)
 void
 setup_nir(isel_context *ctx, nir_shader *nir)
 {
-   Program *program = ctx->program;
-
-   /* align and copy constant data */
-   while (program->constant_data.size() % 4u)
-      program->constant_data.push_back(0);
-   ctx->constant_data_offset = program->constant_data.size();
-   program->constant_data.insert(program->constant_data.end(),
-                                 (uint8_t*)nir->constant_data,
-                                 (uint8_t*)nir->constant_data + nir->constant_data_size);
-
   /* the variable setup has to be done before lower_io / CSE */
   setup_variables(ctx, nir);

--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -174,7 +174,7 @@ struct memory_sync_info {
      return (!storage || (semantics & semantic_can_reorder)) && !(semantics & semantic_volatile);
   }
 };
-static_assert(sizeof(memory_sync_info) == 3);
+static_assert(sizeof(memory_sync_info) == 3, "Unexpected padding");

 enum fp_round {
   fp_round_ne = 0,
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@@ -495,6 +495,12 @@ HazardResult perform_hazard_query(hazard_query *query, Instruction *instr, bool
   if (first->bar_classes && second->bar_classes)
      return hazard_fail_barrier;

+   /* Don't move memory accesses to before control barriers. I don't think
+    * this is necessary for the Vulkan memory model, but it might be for GLSL450. */
+   unsigned control_classes = storage_buffer | storage_atomic_counter | storage_image | storage_shared;
+   if (first->has_control_barrier && ((second->access_atomic | second->access_relaxed) & control_classes))
+      return hazard_fail_barrier;
+
   /* don't move memory loads/stores past potentially aliasing loads/stores */
   unsigned aliasing_storage = instr->format == Format::SMEM ?
                               query->aliasing_storage_smem :
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -1233,13 +1233,13 @@ static void
 radv_image_alloc_single_sample_cmask(const struct radv_image *image,
                                     struct radeon_surf *surf)
 {
-	assert(image->info.storage_samples == 1 || surf->cmask_offset);
-
 	if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 ||
 	    image->info.levels > 1 || image->info.depth > 1 ||
 	    radv_image_has_dcc(image) || !radv_image_use_fast_clear_for_image(image))
 		return;

+	assert(image->info.storage_samples == 1);
+
 	surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
 	surf->total_size = surf->cmask_offset + surf->cmask_size;
 	surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
@@ -1702,7 +1702,8 @@ bool radv_layout_can_fast_clear(const struct radv_image *image,
 				bool in_render_loop,
 			        unsigned queue_mask)
 {
-	return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+	return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
+	       queue_mask == (1u << RADV_QUEUE_GENERAL);
 }

 bool radv_layout_dcc_compressed(const struct radv_device *device,
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -102,8 +102,19 @@ si_emit_compute(struct radv_physical_device *physical_device,
 			    S_00B858_SH1_CU_EN(0xffff));
 	}

-	if (physical_device->rad_info.chip_class >= GFX10)
+	if (physical_device->rad_info.chip_class >= GFX9) {
+		radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY,
+				       physical_device->rad_info.chip_class >= GFX10 ? 0x20 : 0);
+	}
+
+	if (physical_device->rad_info.chip_class >= GFX10) {
+		radeon_set_sh_reg(cs, R_00B890_COMPUTE_USER_ACCUM_0, 0);
+		radeon_set_sh_reg(cs, R_00B894_COMPUTE_USER_ACCUM_1, 0);
+		radeon_set_sh_reg(cs, R_00B898_COMPUTE_USER_ACCUM_2, 0);
+		radeon_set_sh_reg(cs, R_00B89C_COMPUTE_USER_ACCUM_3, 0);
 		radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, 0);
+		radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
+	}

 	/* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
 	 * and is now per pipe, so it should be handled in the
@@ -325,6 +336,10 @@ si_emit_graphics(struct radv_device *device,
 				late_alloc_wave64_gs = 0;
 				cu_mask_gs = 0xffff;
 			}
+
+			/* Limit LATE_ALLOC_GS for prevent a hang (hw bug). */
+			if (physical_device->rad_info.chip_class == GFX10)
+				late_alloc_wave64_gs = MIN2(late_alloc_wave64_gs, 64);
 		} else {
 			if (!physical_device->rad_info.use_late_alloc) {
 				late_alloc_wave64 = 0;
@@ -413,6 +428,23 @@ si_emit_graphics(struct radv_device *device,
 				       S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_RD));
 		radeon_set_context_reg(cs, R_028428_CB_COVERAGE_OUT_CONTROL, 0);

+		radeon_set_sh_reg(cs, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
+		radeon_set_sh_reg(cs, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
+		radeon_set_sh_reg(cs, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
+		radeon_set_sh_reg(cs, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
+		radeon_set_sh_reg(cs, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
+		radeon_set_sh_reg(cs, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0);
+		radeon_set_sh_reg(cs, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0);
+		radeon_set_sh_reg(cs, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0);
+		radeon_set_sh_reg(cs, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
+		radeon_set_sh_reg(cs, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
+		radeon_set_sh_reg(cs, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
+		radeon_set_sh_reg(cs, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
+		radeon_set_sh_reg(cs, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
+		radeon_set_sh_reg(cs, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
+		radeon_set_sh_reg(cs, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
+		radeon_set_sh_reg(cs, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
+
 		radeon_set_sh_reg(cs, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
 				  S_00B0C0_SOFT_GROUPING_EN(1) |
 				  S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
--- a/src/egl/egl-entrypoint-check.py
+++ b/src/egl/egl-entrypoint-check.py
@@ -1,10 +1,63 @@
 #!/usr/bin/env python

 import argparse
+from generate.eglFunctionList import EGL_FUNCTIONS as GLVND_ENTRYPOINTS
+

 PREFIX = 'EGL_ENTRYPOINT('
 SUFFIX = ')'

+
+# These entrypoints should *not* be in the GLVND entrypoints
+GLVND_EXCLUDED_ENTRYPOINTS = [
+        # EGL_KHR_debug
+        'eglDebugMessageControlKHR',
+        'eglQueryDebugKHR',
+        'eglLabelObjectKHR',
+    ]
+
+
+def check_entrypoint_sorted(entrypoints):
+    print('Checking that EGL API entrypoints are sorted...')
+
+    for i, _ in enumerate(entrypoints):
+        # Can't compare the first one with the previous
+        if i == 0:
+            continue
+        if entrypoints[i - 1] > entrypoints[i]:
+            print('ERROR: ' + entrypoints[i] + ' should come before ' + entrypoints[i - 1])
+            exit(1)
+
+    print('All good :)')
+
+
+def check_glvnd_entrypoints(egl_entrypoints, glvnd_entrypoints):
+    print('Checking the GLVND entrypoints against the plain EGL ones...')
+    success = True
+
+    for egl_entrypoint in egl_entrypoints:
+        if egl_entrypoint in GLVND_EXCLUDED_ENTRYPOINTS:
+            continue
+        if egl_entrypoint not in glvnd_entrypoints:
+            print('ERROR: ' + egl_entrypoint + ' is missing from the GLVND entrypoints (src/egl/generate/eglFunctionList.py)')
+            success = False
+
+    for glvnd_entrypoint in glvnd_entrypoints:
+        if glvnd_entrypoint not in egl_entrypoints:
+            print('ERROR: ' + glvnd_entrypoint + ' is missing from the plain EGL entrypoints (src/egl/main/eglentrypoint.h)')
+            success = False
+
+    for glvnd_entrypoint in GLVND_EXCLUDED_ENTRYPOINTS:
+        if glvnd_entrypoint in glvnd_entrypoints:
+            print('ERROR: ' + glvnd_entrypoint + ' is should *not* be in the GLVND entrypoints (src/egl/generate/eglFunctionList.py)')
+            success = False
+
+    if success:
+        print('All good :)')
+    else:
+        exit(1)
+
+
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('header')
@@ -20,17 +73,11 @@ def main():
            assert line.endswith(SUFFIX)
            entrypoints.append(line[len(PREFIX):-len(SUFFIX)])

-    print('Checking EGL API entrypoints are sorted')
+    check_entrypoint_sorted(entrypoints)

-    for i, _ in enumerate(entrypoints):
-        # Can't compare the first one with the previous
-        if i == 0:
-            continue
-        if entrypoints[i - 1] > entrypoints[i]:
-            print('ERROR: ' + entrypoints[i] + ' should come before ' + entrypoints[i - 1])
-            exit(1)
+    glvnd_entrypoints = [x[0] for x in GLVND_ENTRYPOINTS]

-    print('All good :)')
+    check_glvnd_entrypoints(entrypoints, glvnd_entrypoints)

 if __name__ == '__main__':
    main()
--- a/src/freedreno/.gitlab-ci/reference/crash.log
+++ b/src/freedreno/.gitlab-ci/reference/crash.log
@@ -2452,7 +2452,7 @@ registers-gmu:
 	00000000	0x12b: 00000000
 	00000000	0x140: 00000000
 indexed-registers:
-  - regs-name: CP_SEQ_STAT
+  - regs-name: CP_SQE_STAT
    dwords: 51
 	 PC: 00c5
 	$01: deadd00d		$11: 00000000
--- a/src/freedreno/decode/crashdec.c
+++ b/src/freedreno/decode/crashdec.c
@@ -101,6 +101,23 @@ regval(const char *name)
 * Line reading and string helpers:
 */

+static char *
+replacestr(char *line, const char *find, const char *replace)
+{
+	char *tail, *s;
+
+	if (!(s = strstr(line, find)))
+		return line;
+
+	tail = s + strlen(find);
+
+	char *newline;
+	asprintf(&newline, "%.*s%s%s", (int)(s - line), line, replace, tail);
+	free(line);
+
+	return newline;
+}
+
 static char *lastline;
 static char *pushedline;

@@ -120,6 +137,10 @@ popline(void)
 	if (getline(&r, &n, in) < 0)
 		exit(0);

+	/* Handle section name typo's from earlier kernels: */
+	r = replacestr(r, "CP_MEMPOOOL", "CP_MEMPOOL");
+	r = replacestr(r, "CP_SEQ_STAT", "CP_SQE_STAT");
+
 	lastline = r;
 	return r;
 }
@@ -471,7 +492,7 @@ decode_clusters(void)
 */

 static void
-dump_cp_seq_stat(uint32_t *stat)
+dump_cp_sqe_stat(uint32_t *stat)
 {
 	printf("\t PC: %04x\n", stat[0]);
 	stat++;
@@ -850,23 +871,23 @@ decode_indexed_registers(void)
 			 * so far) not useful, so skip them if not in verbose mode:
 			 */
 			bool dump = verbose ||
-				!strcmp(name, "CP_SEQ_STAT") ||
+				!strcmp(name, "CP_SQE_STAT") ||
 				!strcmp(name, "CP_DRAW_STATE") ||
 				!strcmp(name, "CP_ROQ") ||
 				0;

-			if (!strcmp(name, "CP_SEQ_STAT"))
-				dump_cp_seq_stat(buf);
+			if (!strcmp(name, "CP_SQE_STAT"))
+				dump_cp_sqe_stat(buf);

 			if (!strcmp(name, "CP_UCODE_DBG_DATA"))
 				dump_cp_ucode_dbg(buf);

-			/* note that name was typo'd in earlier kernels: */
-			if (!strcmp(name, "CP_MEMPOOL") || !strcmp(name, "CP_MEMPOOOL"))
+			if (!strcmp(name, "CP_MEMPOOL"))
 				dump_cp_mem_pool(buf);

 			if (dump)
 				dump_hex_ascii(buf, 4 * sizedwords, 1);
+
 			free(buf);

 			continue;
--- a/src/freedreno/meson.build
+++ b/src/freedreno/meson.build
@@ -30,7 +30,7 @@ if not dep_lua.found()
  dep_lua = dependency('lua52', required: false)
 endif
 if not dep_lua.found()
-  dep_lua = dependency('lua', required: false)
+  dep_lua = dependency('lua', required: false, version : '>=5.2')
 endif

 dep_libarchive = dependency('libarchive', required: false)
--- a/src/freedreno/registers/adreno/meson.build
+++ b/src/freedreno/registers/adreno/meson.build
@@ -24,6 +24,10 @@ xml_files = [
  'a4xx.xml',
  'a5xx.xml',
  'a6xx.xml',
+  'a6xx_gmu.xml',
+  'ocmem.xml',
+  'adreno_control_regs.xml',
+  'adreno_pipe_regs.xml',
  'adreno_common.xml',
  'adreno_pm4.xml',
 ]
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
@@ -1138,6 +1138,7 @@ etna_compile_shader_nir(struct etna_shader_variant *v)

   NIR_PASS_V(s, nir_opt_dce);

+   NIR_PASS_V(s, nir_lower_bool_to_bitsize);
   NIR_PASS_V(s, etna_lower_alu, c->specs->has_new_transcendentals);

   if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS))
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -466,16 +466,9 @@ gpu_supports_render_format(struct etna_screen *screen, enum pipe_format format,
   if (fmt == ETNA_NO_MATCH)
      return false;

-   /* Validate MSAA; number of samples must be allowed, and render target
-    * must have MSAA'able format. */
-   if (sample_count > 1) {
-      if (!VIV_FEATURE(screen, chipFeatures, MSAA))
+   /* MSAA is broken */
+   if (sample_count > 1)
         return false;
-      if (!translate_samples_to_xyscale(sample_count, NULL, NULL))
-         return false;
-      if (translate_ts_format(format) == ETNA_NO_MATCH)
-         return false;
-   }

   if (format == PIPE_FORMAT_R8_UNORM)
      return VIV_FEATURE(screen, chipMinorFeatures5, HALTI5);
--- a/src/gallium/drivers/iris/iris_monitor.c
+++ b/src/gallium/drivers/iris/iris_monitor.c
@@ -283,7 +283,7 @@ iris_get_monitor_result(struct pipe_context *ctx,
   assert(gen_perf_is_query_ready(perf_ctx, monitor->query, batch));

   unsigned bytes_written;
-   gen_perf_get_query_data(perf_ctx, monitor->query,
+   gen_perf_get_query_data(perf_ctx, monitor->query, batch,
                           monitor->result_size,
                           (unsigned*) monitor->result_buffer,
                           &bytes_written);
--- a/src/gallium/drivers/iris/iris_performance_query.c
+++ b/src/gallium/drivers/iris/iris_performance_query.c
@@ -214,7 +214,8 @@ iris_get_perf_query_data(struct pipe_context *pipe,
   struct gen_perf_query_object *obj = perf_query->query;
   struct gen_perf_context *perf_ctx = ice->perf_ctx;

-   gen_perf_get_query_data(perf_ctx, obj, data_size, data, bytes_written);
+   gen_perf_get_query_data(perf_ctx, obj, &ice->batches[IRIS_BATCH_RENDER],
+         data_size, data, bytes_written);
 }

 void
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -1887,6 +1887,16 @@ static void clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts
   *max_gsprims = MIN2(*max_gsprims, 1 + max_reuse);
 }

+unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
+{
+   const struct si_shader_selector *sel = shader->selector;
+
+   if (sel->type == PIPE_SHADER_GEOMETRY && sel->so.num_outputs)
+      return 44;
+
+   return 8;
+}
+
 /**
 * Determine subgroup information like maximum number of vertices and prims.
 *
@@ -1907,19 +1917,15 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
   const unsigned min_verts_per_prim = gs_type == PIPE_SHADER_GEOMETRY ? max_verts_per_prim : 1;

   /* All these are in dwords: */
-   /* We can't allow using the whole LDS, because GS waves compete with
-    * other shader stages for LDS space.
-    *
-    * TODO: We should really take the shader's internal LDS use into
-    *       account. The linker will fail if the size is greater than
-    *       8K dwords.
+   /* GE can only use 8K dwords (32KB) of LDS per workgroup.
    */
-   const unsigned max_lds_size = 8 * 1024 - 768;
+   const unsigned max_lds_size = 8 * 1024 - gfx10_ngg_get_scratch_dw_size(shader);
   const unsigned target_lds_size = max_lds_size;
   unsigned esvert_lds_size = 0;
   unsigned gsprim_lds_size = 0;

   /* All these are per subgroup: */
+   const unsigned min_esverts = gs_sel->screen->info.chip_class >= GFX10_3 ? 29 : 24;
   bool max_vert_out_per_gs_instance = false;
   unsigned max_gsprims_base = 128; /* default prim group size clamp */
   unsigned max_esverts_base = 128;
@@ -2008,7 +2014,7 @@ retry_select_mode:

   /* Round up towards full wave sizes for better ALU utilization. */
   if (!max_vert_out_per_gs_instance) {
-      const unsigned wavesize = gs_sel->screen->ge_wave_size;
+      const unsigned wavesize = si_get_shader_wave_size(shader);
      unsigned orig_max_esverts;
      unsigned orig_max_gsprims;
      do {
@@ -2021,19 +2027,30 @@ retry_select_mode:
            max_esverts =
               MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size);
         max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
+         /* Hardware restriction: minimum value of max_esverts */
+         max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);

         max_gsprims = align(max_gsprims, wavesize);
         max_gsprims = MIN2(max_gsprims, max_gsprims_base);
-         if (gsprim_lds_size)
+         if (gsprim_lds_size) {
+            /* Don't count unusable vertices to the LDS size. Those are vertices above
+             * the maximum number of vertices that can occur in the workgroup,
+             * which is e.g. max_gsprims * 3 for triangles.
+             */
+            unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
            max_gsprims =
-               MIN2(max_gsprims, (max_lds_size - max_esverts * esvert_lds_size) / gsprim_lds_size);
+               MIN2(max_gsprims, (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size);
+         }
         clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, use_adjacency);
         assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
      } while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims);
-   }

-   /* Hardware restriction: minimum value of max_esverts */
-   max_esverts = MAX2(max_esverts, 23 + max_verts_per_prim);
+      /* Verify the restriction. */
+      assert(max_esverts >= min_esverts - 1 + max_verts_per_prim);
+   } else {
+      /* Hardware restriction: minimum value of max_esverts */
+      max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
+   }

   unsigned max_out_vertices =
      max_vert_out_per_gs_instance
@@ -2061,13 +2078,15 @@ retry_select_mode:
   shader->ngg.prim_amp_factor = prim_amp_factor;
   shader->ngg.max_vert_out_per_gs_instance = max_vert_out_per_gs_instance;

-   shader->gs_info.esgs_ring_size = 4 * max_esverts * esvert_lds_size;
+   /* Don't count unusable vertices. */
+   shader->gs_info.esgs_ring_size = MIN2(max_esverts, max_gsprims * max_verts_per_prim) *
+                                    esvert_lds_size;
   shader->ngg.ngg_emit_size = max_gsprims * gsprim_lds_size;

-   assert(shader->ngg.hw_max_esverts >= 24); /* HW limitation */
+   assert(shader->ngg.hw_max_esverts >= min_esverts); /* HW limitation */

   /* If asserts are disabled, we use the same conditions to return false */
   return max_esverts >= max_verts_per_prim && max_gsprims >= 1 &&
          max_out_vertices <= 256 &&
-          shader->ngg.hw_max_esverts >= 24;
+          shader->ngg.hw_max_esverts >= min_esverts;
 }
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -816,7 +816,7 @@ static bool si_shader_binary_open(struct si_screen *screen, struct si_shader *sh
       */
      struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
      sym->name = "esgs_ring";
-      sym->size = shader->gs_info.esgs_ring_size;
+      sym->size = shader->gs_info.esgs_ring_size * 4;
      sym->align = 64 * 1024;
   }

@@ -1392,12 +1392,8 @@ static bool si_build_main_function(struct si_shader_context *ctx, struct si_shad
            ctx->gs_generated_prims[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, "");
         }

-         unsigned scratch_size = 8;
-         if (sel->so.num_outputs)
-            scratch_size = 44;
-
         assert(!ctx->gs_ngg_scratch);
-         LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, scratch_size);
+         LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
         ctx->gs_ngg_scratch =
            LLVMAddGlobalInAddressSpace(ctx->ac.module, ai32, "ngg_scratch", AC_ADDR_SPACE_LDS);
         LLVMSetInitializer(ctx->gs_ngg_scratch, LLVMGetUndef(ai32));
@@ -1425,7 +1421,7 @@ static bool si_build_main_function(struct si_shader_context *ctx, struct si_shad
       * compaction is enabled.
       */
      if (!ctx->gs_ngg_scratch && (sel->so.num_outputs || shader->key.opt.ngg_culling)) {
-         LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, 8);
+         LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
         ctx->gs_ngg_scratch =
            LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
         LLVMSetInitializer(ctx->gs_ngg_scratch, LLVMGetUndef(asi32));
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -219,6 +219,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL
 void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LLVMValueRef *addrs);
 void gfx10_ngg_gs_emit_prologue(struct si_shader_context *ctx);
 void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx);
+unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader);
 bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);

 /* si_shader_llvm.c */
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -763,6 +763,7 @@ static void si_emit_clip_regs(struct si_context *sctx)
   unsigned initial_cdw = sctx->gfx_cs->current.cdw;
   unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
                         S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
+                         S_02881C_BYPASS_VTX_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) |
                         S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) |
                         clipdist_mask | (culldist_mask << 8);

@@ -3747,26 +3748,12 @@ static void gfx10_make_texture_descriptor(
      S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) |
      S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) |
      S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type);
-
-   if (res->target == PIPE_TEXTURE_1D ||
-       res->target == PIPE_TEXTURE_2D) {
-      /* 1D, 2D, and 2D_MSAA can set a custom pitch for shader resources
-       * starting with gfx10.3 (ignored if pitch <= width). Other texture
-       * targets can't. CB and DB can't set a custom pitch for any target.
-       */
-      if (screen->info.chip_class >= GFX10_3)
-         state[4] = S_00A010_DEPTH(tex->surface.u.gfx9.surf_pitch - 1);
-      else
-         state[4] = 0;
-   } else {
-      /* Depth is the last accessible layer on gfx9+. The hw doesn't need
-       * to know the total number of layers.
-       */
-      state[4] = S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ?
-                                   depth - 1 : last_layer) |
-                 S_00A010_BASE_ARRAY(first_layer);
-   }
-
+   /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
+    * to know the total number of layers.
+    */
+   state[4] =
+      S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) |
+      S_00A010_BASE_ARRAY(first_layer);
   state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) |
              S_00A014_MAX_MIP(res->nr_samples > 1 ? util_logbase2(res->nr_samples)
                                                   : tex->buffer.b.b.last_level) |
@@ -5367,6 +5354,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)

   if (sctx->chip_class >= GFX10_3) {
      si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff);
+      si_pm4_set_reg(pm4, 0x28848, 1 << 9); /* This fixes sample shading. */
   }

   sctx->cs_preamble_state = pm4;
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -703,7 +703,7 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *
   out->gs_prims_per_subgroup = gs_prims;
   out->gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
   out->max_prims_per_subgroup = out->gs_inst_prims_in_subgroup * gs->gs_max_out_vertices;
-   out->esgs_ring_size = 4 * esgs_lds_size;
+   out->esgs_ring_size = esgs_lds_size;

   assert(out->max_prims_per_subgroup <= max_out_prims);
 }
--- a/src/intel/perf/gen_perf_query.c
+++ b/src/intel/perf/gen_perf_query.c
@@ -1061,17 +1061,6 @@ gen_perf_wait_query(struct gen_perf_context *perf_ctx,
      perf_cfg->vtbl.batchbuffer_flush(perf_ctx->ctx, __FILE__, __LINE__);

   perf_cfg->vtbl.bo_wait_rendering(bo);
-
-   /* Due to a race condition between the OA unit signaling report
-    * availability and the report actually being written into memory,
-    * we need to wait for all the reports to come in before we can
-    * read them.
-    */
-   if (query->queryinfo->kind == GEN_PERF_QUERY_TYPE_OA ||
-       query->queryinfo->kind == GEN_PERF_QUERY_TYPE_RAW) {
-      while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
-         ;
-   }
 }

 bool
@@ -1087,8 +1076,8 @@ gen_perf_is_query_ready(struct gen_perf_context *perf_ctx,
      return (query->oa.results_accumulated ||
              (query->oa.bo &&
               !perf_cfg->vtbl.batch_references(current_batch, query->oa.bo) &&
-               !perf_cfg->vtbl.bo_busy(query->oa.bo) &&
-               read_oa_samples_for_query(perf_ctx, query, current_batch)));
+               !perf_cfg->vtbl.bo_busy(query->oa.bo)));
+
   case GEN_PERF_QUERY_TYPE_PIPELINE:
      return (query->pipeline_stats.bo &&
              !perf_cfg->vtbl.batch_references(current_batch, query->pipeline_stats.bo) &&
@@ -1513,6 +1502,7 @@ get_pipeline_stats_data(struct gen_perf_context *perf_ctx,
 void
 gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
                        struct gen_perf_query_object *query,
+                        void *current_batch,
                        int data_size,
                        unsigned *data,
                        unsigned *bytes_written)
@@ -1524,6 +1514,17 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
   case GEN_PERF_QUERY_TYPE_OA:
   case GEN_PERF_QUERY_TYPE_RAW:
      if (!query->oa.results_accumulated) {
+         /* Due to the sampling frequency of the OA buffer by the i915-perf
+          * driver, there can be a 5ms delay between the Mesa seeing the query
+          * complete and i915 making all the OA buffer reports available to us.
+          * We need to wait for all the reports to come in before we can do
+          * the post processing removing unrelated deltas.
+          * There is a i915-perf series to address this issue, but it's
+          * not been merged upstream yet.
+          */
+         while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
+            ;
+
         read_gt_frequency(perf_ctx, query);
         uint32_t *begin_report = query->oa.map;
         uint32_t *end_report = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
--- a/src/intel/perf/gen_perf_query.h
+++ b/src/intel/perf/gen_perf_query.h
@@ -76,6 +76,7 @@ void gen_perf_delete_query(struct gen_perf_context *perf_ctx,
                           struct gen_perf_query_object *query);
 void gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
                             struct gen_perf_query_object *query,
+                             void *current_batch,
                             int data_size,
                             unsigned *data,
                             unsigned *bytes_written);
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -28,7 +28,7 @@
 /*
 * Gen-specific function declarations.  This header must *not* be included
 * directly.  Instead, it is included multiple times by anv_private.h.
- * 
+ *
 * In this header file, the usual genx() macro is available.
 */

@@ -36,6 +36,16 @@
 #error This file is included by means other than anv_private.h
 #endif

+extern const uint32_t genX(vk_to_gen_cullmode)[];
+
+extern const uint32_t genX(vk_to_gen_front_face)[];
+
+extern const uint32_t genX(vk_to_gen_primitive_type)[];
+
+extern const uint32_t genX(vk_to_gen_compare_op)[];
+
+extern const uint32_t genX(vk_to_gen_stencil_op)[];
+
 VkResult genX(init_device_state)(struct anv_device *device);

 void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer);
@@ -62,8 +72,6 @@ void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
 void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
                                const struct gen_l3_config *cfg);

-void genX(cmd_buffer_emit_clip)(struct anv_cmd_buffer *cmd_buffer);
-
 void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer);
 void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer);

--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -1883,46 +1883,6 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
         pCreateInfo->pRasterizationState->frontFace;
   }

-   if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE &&
-       subpass->depth_stencil_attachment) {
-      dynamic->depth_test_enable =
-         pCreateInfo->pDepthStencilState->depthTestEnable;
-   }
-
-   if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE &&
-       subpass->depth_stencil_attachment) {
-      dynamic->depth_write_enable =
-         pCreateInfo->pDepthStencilState->depthWriteEnable;
-   }
-
-   if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP &&
-       subpass->depth_stencil_attachment) {
-      dynamic->depth_compare_op =
-         pCreateInfo->pDepthStencilState->depthCompareOp;
-   }
-
-   if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE &&
-       subpass->depth_stencil_attachment) {
-      dynamic->depth_bounds_test_enable =
-         pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
-   }
-
-   if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE &&
-       subpass->depth_stencil_attachment) {
-      dynamic->stencil_test_enable =
-         pCreateInfo->pDepthStencilState->stencilTestEnable;
-   }
-
-   if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP &&
-       subpass->depth_stencil_attachment) {
-      const VkPipelineDepthStencilStateCreateInfo *info =
-         pCreateInfo->pDepthStencilState;
-      memcpy(&dynamic->stencil_op.front, &info->front,
-             sizeof(dynamic->stencil_op.front));
-      memcpy(&dynamic->stencil_op.back, &info->back,
-             sizeof(dynamic->stencil_op.back));
-   }
-
   if (states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) {
      assert(pCreateInfo->pInputAssemblyState);
      bool has_tess = false;
@@ -2007,6 +1967,40 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
         dynamic->stencil_reference.back =
            pCreateInfo->pDepthStencilState->back.reference;
      }
+
+      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {
+         dynamic->depth_test_enable =
+            pCreateInfo->pDepthStencilState->depthTestEnable;
+      }
+
+      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {
+         dynamic->depth_write_enable =
+            pCreateInfo->pDepthStencilState->depthWriteEnable;
+      }
+
+      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {
+         dynamic->depth_compare_op =
+            pCreateInfo->pDepthStencilState->depthCompareOp;
+      }
+
+      if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
+         dynamic->depth_bounds_test_enable =
+            pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
+      }
+
+      if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {
+         dynamic->stencil_test_enable =
+            pCreateInfo->pDepthStencilState->stencilTestEnable;
+      }
+
+      if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {
+         const VkPipelineDepthStencilStateCreateInfo *info =
+            pCreateInfo->pDepthStencilState;
+         memcpy(&dynamic->stencil_op.front, &info->front,
+                sizeof(dynamic->stencil_op.front));
+         memcpy(&dynamic->stencil_op.back, &info->back,
+                sizeof(dynamic->stencil_op.back));
+      }
   }

   const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
--- a/src/intel/vulkan/anv_queue.c
+++ b/src/intel/vulkan/anv_queue.c
@@ -405,14 +405,14 @@ anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
 {
   if (submit->fence_bo_count >= submit->fence_bo_array_length) {
      uint32_t new_len = MAX2(submit->fence_bo_array_length * 2, 64);
-
-      submit->fence_bos =
+      uintptr_t *new_fence_bos =
         vk_realloc(submit->alloc,
                    submit->fence_bos, new_len * sizeof(*submit->fence_bos),
                    8, submit->alloc_scope);
-      if (submit->fence_bos == NULL)
+      if (new_fence_bos == NULL)
         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

+      submit->fence_bos = new_fence_bos;
      submit->fence_bo_array_length = new_len;
   }

@@ -433,14 +433,14 @@ anv_queue_submit_add_syncobj(struct anv_queue_submit* submit,

   if (submit->fence_count >= submit->fence_array_length) {
      uint32_t new_len = MAX2(submit->fence_array_length * 2, 64);
-
-      submit->fences =
+      struct drm_i915_gem_exec_fence *new_fences =
         vk_realloc(submit->alloc,
                    submit->fences, new_len * sizeof(*submit->fences),
                    8, submit->alloc_scope);
-      if (submit->fences == NULL)
+      if (new_fences == NULL)
         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

+      submit->fences = new_fences;
      submit->fence_array_length = new_len;
   }

@@ -483,21 +483,24 @@ anv_queue_submit_add_timeline_wait(struct anv_queue_submit* submit,
 {
   if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
      uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
-
-      submit->wait_timelines =
+      struct anv_timeline **new_wait_timelines =
         vk_realloc(submit->alloc,
                    submit->wait_timelines, new_len * sizeof(*submit->wait_timelines),
                    8, submit->alloc_scope);
-      if (submit->wait_timelines == NULL)
+      if (new_wait_timelines == NULL)
         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

-      submit->wait_timeline_values =
+      submit->wait_timelines = new_wait_timelines;
+
+      uint64_t *new_wait_timeline_values =
         vk_realloc(submit->alloc,
                    submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
                    8, submit->alloc_scope);
-      if (submit->wait_timeline_values == NULL)
+      if (new_wait_timeline_values == NULL)
         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

+      submit->wait_timeline_values = new_wait_timeline_values;
+
      submit->wait_timeline_array_length = new_len;
   }

@@ -519,21 +522,24 @@ anv_queue_submit_add_timeline_signal(struct anv_queue_submit* submit,

   if (submit->signal_timeline_count >= submit->signal_timeline_array_length) {
      uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 64);
-
-      submit->signal_timelines =
+      struct anv_timeline **new_signal_timelines =
         vk_realloc(submit->alloc,
                    submit->signal_timelines, new_len * sizeof(*submit->signal_timelines),
                    8, submit->alloc_scope);
-      if (submit->signal_timelines == NULL)
+      if (new_signal_timelines == NULL)
            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

-      submit->signal_timeline_values =
+      submit->signal_timelines = new_signal_timelines;
+
+      uint64_t *new_signal_timeline_values =
         vk_realloc(submit->alloc,
                    submit->signal_timeline_values, new_len * sizeof(*submit->signal_timeline_values),
                    8, submit->alloc_scope);
-      if (submit->signal_timeline_values == NULL)
+      if (new_signal_timeline_values == NULL)
         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

+      submit->signal_timeline_values = new_signal_timeline_values;
+
      submit->signal_timeline_array_length = new_len;
   }

--- a/src/intel/vulkan/gen7_cmd_buffer.c
+++ b/src/intel/vulkan/gen7_cmd_buffer.c
@@ -198,39 +198,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
   struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic;

-   static const uint32_t vk_to_gen_cullmode[] = {
-      [VK_CULL_MODE_NONE]                       = CULLMODE_NONE,
-      [VK_CULL_MODE_FRONT_BIT]                  = CULLMODE_FRONT,
-      [VK_CULL_MODE_BACK_BIT]                   = CULLMODE_BACK,
-      [VK_CULL_MODE_FRONT_AND_BACK]             = CULLMODE_BOTH
-   };
-   static const uint32_t vk_to_gen_front_face[] = {
-      [VK_FRONT_FACE_COUNTER_CLOCKWISE]         = 1,
-      [VK_FRONT_FACE_CLOCKWISE]                 = 0
-   };
-
-   static const uint32_t vk_to_gen_compare_op[] = {
-      [VK_COMPARE_OP_NEVER]                        = PREFILTEROPNEVER,
-      [VK_COMPARE_OP_LESS]                         = PREFILTEROPLESS,
-      [VK_COMPARE_OP_EQUAL]                        = PREFILTEROPEQUAL,
-      [VK_COMPARE_OP_LESS_OR_EQUAL]                = PREFILTEROPLEQUAL,
-      [VK_COMPARE_OP_GREATER]                      = PREFILTEROPGREATER,
-      [VK_COMPARE_OP_NOT_EQUAL]                    = PREFILTEROPNOTEQUAL,
-      [VK_COMPARE_OP_GREATER_OR_EQUAL]             = PREFILTEROPGEQUAL,
-      [VK_COMPARE_OP_ALWAYS]                       = PREFILTEROPALWAYS,
-   };
-
-   static const uint32_t vk_to_gen_stencil_op[] = {
-      [VK_STENCIL_OP_KEEP]                         = STENCILOP_KEEP,
-      [VK_STENCIL_OP_ZERO]                         = STENCILOP_ZERO,
-      [VK_STENCIL_OP_REPLACE]                      = STENCILOP_REPLACE,
-      [VK_STENCIL_OP_INCREMENT_AND_CLAMP]          = STENCILOP_INCRSAT,
-      [VK_STENCIL_OP_DECREMENT_AND_CLAMP]          = STENCILOP_DECRSAT,
-      [VK_STENCIL_OP_INVERT]                       = STENCILOP_INVERT,
-      [VK_STENCIL_OP_INCREMENT_AND_WRAP]           = STENCILOP_INCR,
-      [VK_STENCIL_OP_DECREMENT_AND_WRAP]           = STENCILOP_DECR,
-   };
-
   if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
                                      ANV_CMD_DIRTY_RENDER_TARGETS |
                                      ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH |
@@ -245,8 +212,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
         .GlobalDepthOffsetConstant = d->depth_bias.bias,
         .GlobalDepthOffsetScale = d->depth_bias.slope,
         .GlobalDepthOffsetClamp = d->depth_bias.clamp,
-         .FrontWinding            = vk_to_gen_front_face[d->front_face],
-         .CullMode                = vk_to_gen_cullmode[d->cull_mode],
+         .FrontWinding            = genX(vk_to_gen_front_face)[d->front_face],
+         .CullMode                = genX(vk_to_gen_cullmode)[d->cull_mode],
      };
      GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);

@@ -307,16 +274,16 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)

         .DepthTestEnable = d->depth_test_enable,
         .DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable,
-         .DepthTestFunction = vk_to_gen_compare_op[d->depth_compare_op],
+         .DepthTestFunction = genX(vk_to_gen_compare_op)[d->depth_compare_op],
         .StencilTestEnable = d->stencil_test_enable,
-         .StencilFailOp = vk_to_gen_stencil_op[d->stencil_op.front.fail_op],
-         .StencilPassDepthPassOp = vk_to_gen_stencil_op[d->stencil_op.front.pass_op],
-         .StencilPassDepthFailOp = vk_to_gen_stencil_op[d->stencil_op.front.depth_fail_op],
-         .StencilTestFunction = vk_to_gen_compare_op[d->stencil_op.front.compare_op],
-         .BackfaceStencilFailOp = vk_to_gen_stencil_op[d->stencil_op.back.fail_op],
-         .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[d->stencil_op.back.pass_op],
-         .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[d->stencil_op.back.depth_fail_op],
-         .BackfaceStencilTestFunction = vk_to_gen_compare_op[d->stencil_op.back.compare_op],
+         .StencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.fail_op],
+         .StencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.pass_op],
+         .StencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.depth_fail_op],
+         .StencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.front.compare_op],
+         .BackfaceStencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.fail_op],
+         .BackfaceStencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.pass_op],
+         .BackfaceStencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.depth_fail_op],
+         .BackfaceStencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.back.compare_op],
      };
      GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);

@@ -359,26 +326,13 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
      }
   }

-   static const uint32_t vk_to_gen_primitive_type[] = {
-      [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
-      [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
-      [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
-      [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
-      [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
-      [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
-      [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
-      [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
-      [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
-      [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
-   };
-
   if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
                                      ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) {
      uint32_t topology;
      if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
         topology = d->primitive_topology;
      else
-         topology = vk_to_gen_primitive_type[d->primitive_topology];
+         topology = genX(vk_to_gen_primitive_type)[d->primitive_topology];

      cmd_buffer->state.gfx.primitive_topology = topology;
   }
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -439,51 +439,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
      anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen8.sf);
   }

-   static const uint32_t vk_to_gen_cullmode[] = {
-      [VK_CULL_MODE_NONE]                       = CULLMODE_NONE,
-      [VK_CULL_MODE_FRONT_BIT]                  = CULLMODE_FRONT,
-      [VK_CULL_MODE_BACK_BIT]                   = CULLMODE_BACK,
-      [VK_CULL_MODE_FRONT_AND_BACK]             = CULLMODE_BOTH
-   };
-   static const uint32_t vk_to_gen_front_face[] = {
-      [VK_FRONT_FACE_COUNTER_CLOCKWISE]         = 1,
-      [VK_FRONT_FACE_CLOCKWISE]                 = 0
-   };
-   static const uint32_t vk_to_gen_primitive_type[] = {
-      [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
-      [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
-      [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
-      [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
-      [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
-      [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
-      [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
-      [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
-      [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
-      [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
-   };
-
-   static const uint32_t vk_to_gen_compare_op[] = {
-      [VK_COMPARE_OP_NEVER]                        = PREFILTEROPNEVER,
-      [VK_COMPARE_OP_LESS]                         = PREFILTEROPLESS,
-      [VK_COMPARE_OP_EQUAL]                        = PREFILTEROPEQUAL,
-      [VK_COMPARE_OP_LESS_OR_EQUAL]                = PREFILTEROPLEQUAL,
-      [VK_COMPARE_OP_GREATER]                      = PREFILTEROPGREATER,
-      [VK_COMPARE_OP_NOT_EQUAL]                    = PREFILTEROPNOTEQUAL,
-      [VK_COMPARE_OP_GREATER_OR_EQUAL]             = PREFILTEROPGEQUAL,
-      [VK_COMPARE_OP_ALWAYS]                       = PREFILTEROPALWAYS,
-   };
-
-   static const uint32_t vk_to_gen_stencil_op[] = {
-      [VK_STENCIL_OP_KEEP]                         = STENCILOP_KEEP,
-      [VK_STENCIL_OP_ZERO]                         = STENCILOP_ZERO,
-      [VK_STENCIL_OP_REPLACE]                      = STENCILOP_REPLACE,
-      [VK_STENCIL_OP_INCREMENT_AND_CLAMP]          = STENCILOP_INCRSAT,
-      [VK_STENCIL_OP_DECREMENT_AND_CLAMP]          = STENCILOP_DECRSAT,
-      [VK_STENCIL_OP_INVERT]                       = STENCILOP_INVERT,
-      [VK_STENCIL_OP_INCREMENT_AND_WRAP]           = STENCILOP_INCR,
-      [VK_STENCIL_OP_DECREMENT_AND_WRAP]           = STENCILOP_DECR,
-   };
-
   if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
                                      ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS |
                                      ANV_CMD_DIRTY_DYNAMIC_CULL_MODE |
@@ -494,8 +449,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
         .GlobalDepthOffsetConstant = d->depth_bias.bias,
         .GlobalDepthOffsetScale = d->depth_bias.slope,
         .GlobalDepthOffsetClamp = d->depth_bias.clamp,
-         .CullMode = vk_to_gen_cullmode[d->cull_mode],
-         .FrontWinding = vk_to_gen_front_face[d->front_face],
+         .CullMode = genX(vk_to_gen_cullmode)[d->cull_mode],
+         .FrontWinding = genX(vk_to_gen_front_face)[d->front_face],
      };
      GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
      anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
@@ -556,16 +511,16 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)

         .DepthTestEnable = d->depth_test_enable,
         .DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable,
-         .DepthTestFunction = vk_to_gen_compare_op[d->depth_compare_op],
+         .DepthTestFunction = genX(vk_to_gen_compare_op)[d->depth_compare_op],
         .StencilTestEnable = d->stencil_test_enable,
-         .StencilFailOp = vk_to_gen_stencil_op[d->stencil_op.front.fail_op],
-         .StencilPassDepthPassOp = vk_to_gen_stencil_op[d->stencil_op.front.pass_op],
-         .StencilPassDepthFailOp = vk_to_gen_stencil_op[d->stencil_op.front.depth_fail_op],
-         .StencilTestFunction = vk_to_gen_compare_op[d->stencil_op.front.compare_op],
-         .BackfaceStencilFailOp = vk_to_gen_stencil_op[d->stencil_op.back.fail_op],
-         .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[d->stencil_op.back.pass_op],
-         .BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[d->stencil_op.back.depth_fail_op],
-         .BackfaceStencilTestFunction = vk_to_gen_compare_op[d->stencil_op.back.compare_op],
+         .StencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.fail_op],
+         .StencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.pass_op],
+         .StencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.depth_fail_op],
+         .StencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.front.compare_op],
+         .BackfaceStencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.fail_op],
+         .BackfaceStencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.pass_op],
+         .BackfaceStencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.depth_fail_op],
+         .BackfaceStencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.back.compare_op],
      };
      GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw,
                                          &wm_depth_stencil);
@@ -625,16 +580,16 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)

         .DepthTestEnable = d->depth_test_enable,
         .DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable,
-         .DepthTestFunction = vk_to_gen_compare_op[d->depth_compare_op],
+         .DepthTestFunction = genX(vk_to_gen_compare_op)[d->depth_compare_op],
         .StencilTestEnable = d->stencil_test_enable,
-         .StencilFailOp = vk_to_gen_stencil_op[d->stencil_op.front.fail_op],
-         .StencilPassDepthPassOp = vk_to_gen_stencil_op[d->stencil_op.front.pass_op],
-         .StencilPassDepthFailOp = vk_to_gen_stencil_op[d->stencil_op.front.depth_fail_op],
-         .StencilTestFunction = vk_to_gen_compare_op[d->stencil_op.front.compare_op],
-         .BackfaceStencilFailOp = vk_to_gen_stencil_op[d->stencil_op.back.fail_op],
-         .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[d->stencil_op.back.pass_op],
-         .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[d->stencil_op.back.depth_fail_op],
-         .BackfaceStencilTestFunction = vk_to_gen_compare_op[d->stencil_op.back.compare_op],
+         .StencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.fail_op],
+         .StencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.pass_op],
+         .StencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.depth_fail_op],
+         .StencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.front.compare_op],
+         .BackfaceStencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.fail_op],
+         .BackfaceStencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.pass_op],
+         .BackfaceStencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.depth_fail_op],
+         .BackfaceStencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.back.compare_op],

      };
      GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dwords, &wm_depth_stencil);
@@ -684,7 +639,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
      if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
         topology = d->primitive_topology;
      else
-         topology = vk_to_gen_primitive_type[d->primitive_topology];
+         topology = genX(vk_to_gen_primitive_type)[d->primitive_topology];

      cmd_buffer->state.gfx.primitive_topology = topology;

--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3263,26 +3263,44 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
   cmd_buffer->state.push_constants_dirty &= ~flushed;
 }

-void
-genX(cmd_buffer_emit_clip)(struct anv_cmd_buffer *cmd_buffer)
+static void
+cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
 {
+   const uint32_t clip_states =
+#if GEN_GEN <= 7
+      ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
+      ANV_CMD_DIRTY_DYNAMIC_CULL_MODE |
+#endif
+      ANV_CMD_DIRTY_DYNAMIC_VIEWPORT |
+      ANV_CMD_DIRTY_PIPELINE;
+
+   if ((cmd_buffer->state.gfx.dirty & clip_states) == 0)
+      return;
+
+#if GEN_GEN <= 7
+   const struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic;
+#endif
+   struct GENX(3DSTATE_CLIP) clip = {
+      GENX(3DSTATE_CLIP_header),
+#if GEN_GEN <= 7
+      .FrontWinding = genX(vk_to_gen_front_face)[d->front_face],
+      .CullMode     = genX(vk_to_gen_cullmode)[d->cull_mode],
+#endif
+   };
+   uint32_t dwords[GENX(3DSTATE_CLIP_length)];
+
   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
-
-   if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
-                                      ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)) {
-      uint32_t dwords[GENX(3DSTATE_CLIP_length)];
-      int32_t count =
+   const struct brw_vue_prog_data *last =
+      anv_pipeline_get_last_vue_prog_data(pipeline);
+   if (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
+      clip.MaximumVPIndex =
         cmd_buffer->state.gfx.dynamic.viewport.count > 0 ?
-            cmd_buffer->state.gfx.dynamic.viewport.count - 1 : 0;
-
-      struct GENX(3DSTATE_CLIP) clip = {
-         GENX(3DSTATE_CLIP_header),
-         .MaximumVPIndex = count,
-      };
-      GENX(3DSTATE_CLIP_pack)(NULL, dwords, &clip);
-      anv_batch_emit_merge(&cmd_buffer->batch, dwords,
-                           pipeline->gen7.clip);
+         cmd_buffer->state.gfx.dynamic.viewport.count - 1 : 0;
   }
+
+   GENX(3DSTATE_CLIP_pack)(NULL, dwords, &clip);
+   anv_batch_emit_merge(&cmd_buffer->batch, dwords,
+                        pipeline->gen7.clip);
 }

 void
@@ -3469,10 +3487,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
   if (dirty)
      cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty);

-   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
-      genX(cmd_buffer_emit_clip)(cmd_buffer);
+   cmd_buffer_emit_clip(cmd_buffer);
+
+   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
      gen8_cmd_buffer_emit_viewport(cmd_buffer);
-   }

   if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT |
                                  ANV_CMD_DIRTY_PIPELINE)) {
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -438,24 +438,6 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
 #endif
 }

-static const uint32_t vk_to_gen_cullmode[] = {
-   [VK_CULL_MODE_NONE]                       = CULLMODE_NONE,
-   [VK_CULL_MODE_FRONT_BIT]                  = CULLMODE_FRONT,
-   [VK_CULL_MODE_BACK_BIT]                   = CULLMODE_BACK,
-   [VK_CULL_MODE_FRONT_AND_BACK]             = CULLMODE_BOTH
-};
-
-static const uint32_t vk_to_gen_fillmode[] = {
-   [VK_POLYGON_MODE_FILL]                    = FILL_MODE_SOLID,
-   [VK_POLYGON_MODE_LINE]                    = FILL_MODE_WIREFRAME,
-   [VK_POLYGON_MODE_POINT]                   = FILL_MODE_POINT,
-};
-
-static const uint32_t vk_to_gen_front_face[] = {
-   [VK_FRONT_FACE_COUNTER_CLOCKWISE]         = 1,
-   [VK_FRONT_FACE_CLOCKWISE]                 = 0
-};
-
 static VkLineRasterizationModeEXT
 vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT *line_info,
                           const VkPipelineMultisampleStateCreateInfo *ms_info)
@@ -574,6 +556,24 @@ gen7_ms_rast_mode(struct anv_graphics_pipeline *pipeline,
 }
 #endif

+const uint32_t genX(vk_to_gen_cullmode)[] = {
+   [VK_CULL_MODE_NONE]                       = CULLMODE_NONE,
+   [VK_CULL_MODE_FRONT_BIT]                  = CULLMODE_FRONT,
+   [VK_CULL_MODE_BACK_BIT]                   = CULLMODE_BACK,
+   [VK_CULL_MODE_FRONT_AND_BACK]             = CULLMODE_BOTH
+};
+
+const uint32_t genX(vk_to_gen_fillmode)[] = {
+   [VK_POLYGON_MODE_FILL]                    = FILL_MODE_SOLID,
+   [VK_POLYGON_MODE_LINE]                    = FILL_MODE_WIREFRAME,
+   [VK_POLYGON_MODE_POINT]                   = FILL_MODE_POINT,
+};
+
+const uint32_t genX(vk_to_gen_front_face)[] = {
+   [VK_FRONT_FACE_COUNTER_CLOCKWISE]         = 1,
+   [VK_FRONT_FACE_CLOCKWISE]                 = 0
+};
+
 static void
 emit_rs_state(struct anv_graphics_pipeline *pipeline,
              const VkPipelineInputAssemblyStateCreateInfo *ia_info,
@@ -681,13 +681,13 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,

   raster.FrontWinding =
      dynamic_states & ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE ?
-         0 : vk_to_gen_front_face[rs_info->frontFace];
+         0 : genX(vk_to_gen_front_face)[rs_info->frontFace];
   raster.CullMode =
      dynamic_states & ANV_CMD_DIRTY_DYNAMIC_CULL_MODE ?
-         0 : vk_to_gen_cullmode[rs_info->cullMode];
+         0 : genX(vk_to_gen_cullmode)[rs_info->cullMode];

-   raster.FrontFaceFillMode = vk_to_gen_fillmode[rs_info->polygonMode];
-   raster.BackFaceFillMode = vk_to_gen_fillmode[rs_info->polygonMode];
+   raster.FrontFaceFillMode = genX(vk_to_gen_fillmode)[rs_info->polygonMode];
+   raster.BackFaceFillMode = genX(vk_to_gen_fillmode)[rs_info->polygonMode];
   raster.ScissorRectangleEnable = true;

 #if GEN_GEN >= 9
@@ -843,7 +843,7 @@ static const uint32_t vk_to_gen_blend_op[] = {
   [VK_BLEND_OP_MAX]                         = BLENDFUNCTION_MAX,
 };

-static const uint32_t vk_to_gen_compare_op[] = {
+const uint32_t genX(vk_to_gen_compare_op)[] = {
   [VK_COMPARE_OP_NEVER]                        = PREFILTEROPNEVER,
   [VK_COMPARE_OP_LESS]                         = PREFILTEROPLESS,
   [VK_COMPARE_OP_EQUAL]                        = PREFILTEROPEQUAL,
@@ -854,7 +854,7 @@ static const uint32_t vk_to_gen_compare_op[] = {
   [VK_COMPARE_OP_ALWAYS]                       = PREFILTEROPALWAYS,
 };

-static const uint32_t vk_to_gen_stencil_op[] = {
+const uint32_t genX(vk_to_gen_stencil_op)[] = {
   [VK_STENCIL_OP_KEEP]                         = STENCILOP_KEEP,
   [VK_STENCIL_OP_ZERO]                         = STENCILOP_ZERO,
   [VK_STENCIL_OP_REPLACE]                      = STENCILOP_REPLACE,
@@ -865,6 +865,19 @@ static const uint32_t vk_to_gen_stencil_op[] = {
   [VK_STENCIL_OP_DECREMENT_AND_WRAP]           = STENCILOP_DECR,
 };

+const uint32_t genX(vk_to_gen_primitive_type)[] = {
+   [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
+   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
+   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
+   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
+   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
+   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
+   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
+   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
+   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
+   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
+};
+
 /* This function sanitizes the VkStencilOpState by looking at the compare ops
 * and trying to determine whether or not a given stencil op can ever actually
 * occur.  Stencil ops which can never occur are set to VK_STENCIL_OP_KEEP.
@@ -1056,7 +1069,7 @@ emit_ds_state(struct anv_graphics_pipeline *pipeline,

      .DepthTestFunction =
         dynamic_states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP ?
-            0 : vk_to_gen_compare_op[info.depthCompareOp],
+            0 : genX(vk_to_gen_compare_op)[info.depthCompareOp],

      .DoubleSidedStencilEnable = true,

@@ -1064,14 +1077,14 @@ emit_ds_state(struct anv_graphics_pipeline *pipeline,
         dynamic_states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE ?
            0 : info.stencilTestEnable,

-      .StencilFailOp = vk_to_gen_stencil_op[info.front.failOp],
-      .StencilPassDepthPassOp = vk_to_gen_stencil_op[info.front.passOp],
-      .StencilPassDepthFailOp = vk_to_gen_stencil_op[info.front.depthFailOp],
-      .StencilTestFunction = vk_to_gen_compare_op[info.front.compareOp],
-      .BackfaceStencilFailOp = vk_to_gen_stencil_op[info.back.failOp],
-      .BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info.back.passOp],
-      .BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info.back.depthFailOp],
-      .BackfaceStencilTestFunction = vk_to_gen_compare_op[info.back.compareOp],
+      .StencilFailOp = genX(vk_to_gen_stencil_op)[info.front.failOp],
+      .StencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[info.front.passOp],
+      .StencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[info.front.depthFailOp],
+      .StencilTestFunction = genX(vk_to_gen_compare_op)[info.front.compareOp],
+      .BackfaceStencilFailOp = genX(vk_to_gen_stencil_op)[info.back.failOp],
+      .BackfaceStencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[info.back.passOp],
+      .BackfaceStencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[info.back.depthFailOp],
+      .BackfaceStencilTestFunction = genX(vk_to_gen_compare_op)[info.back.compareOp],
   };

   if (dynamic_stencil_op) {
@@ -1339,8 +1352,8 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
      !(last->vue_map.slots_valid & VARYING_BIT_LAYER);

 #if GEN_GEN == 7
-   clip.FrontWinding            = vk_to_gen_front_face[rs_info->frontFace];
-   clip.CullMode                = vk_to_gen_cullmode[rs_info->cullMode];
+   clip.FrontWinding            = genX(vk_to_gen_front_face)[rs_info->frontFace];
+   clip.CullMode                = genX(vk_to_gen_cullmode)[rs_info->cullMode];
   clip.ViewportZClipTestEnable = pipeline->depth_clip_enable;
   clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
   clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -323,7 +323,7 @@ brw_get_perf_query_data(struct gl_context *ctx,
    */
   assert(o->Ready);

-   gen_perf_get_query_data(brw->perf_ctx, obj,
+   gen_perf_get_query_data(brw->perf_ctx, obj, &brw->batch,
                           data_size, data, bytes_written);
 }

--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -97,9 +97,12 @@ dst_register(struct st_translate *t, gl_register_file file, GLuint index)
      else
         assert(index < VARYING_SLOT_MAX);

-      assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
-
-      return t->outputs[t->outputMapping[index]];
+      if (t->outputMapping[index] < ARRAY_SIZE(t->outputs))
+         return t->outputs[t->outputMapping[index]];
+      else {
+         assert(t->procType == PIPE_SHADER_VERTEX);
+         return ureg_dst(ureg_DECL_constant(t->ureg, 0));
+      }

   case PROGRAM_ADDRESS:
      return t->address[index];
@@ -149,8 +152,12 @@ src_register(struct st_translate *t,
      }

   case PROGRAM_OUTPUT:
-      assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
-      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
+      if (t->outputMapping[index] < ARRAY_SIZE(t->outputs))
+         return ureg_src(t->outputs[t->outputMapping[index]]);
+      else {
+         assert(t->procType == PIPE_SHADER_VERTEX);
+         return ureg_DECL_constant(t->ureg, 0);
+      }

   case PROGRAM_ADDRESS:
      return ureg_src(t->address[index]);
--- a/src/util/driconf.h
+++ b/src/util/driconf.h
@@ -214,7 +214,7 @@ DRI_CONF_OPT_END

 #define DRI_CONF_FORCE_GL_VENDOR(def) \
 DRI_CONF_OPT_BEGIN(force_gl_vendor, string, def) \
-        DRI_CONF_DESC("Allow GPU vendor to be overridden.") \
+        DRI_CONF_DESC("Override GPU vendor string.") \
 DRI_CONF_OPT_END

 #define DRI_CONF_FORCE_COMPAT_PROFILE(def) \
Author	SHA1	Message	Date
Dylan Baker	e60a1d5bc0	bump version for 20.2.0-rc2	2020-08-12 10:06:51 -07:00
Daniel Schürmann	b454e44579	aco: execute branch instructions in WQM if necessary It could happen that only the branch condition was computed in WQM and not the branch instruction. There is now some rendundancy which should be cleaned up. Fixes: `3817fa7a4d` ('aco: fix WQM handling in nested loops') Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6260> (cherry picked from commit `fdb97d3d29`)	2020-08-11 09:43:35 -07:00
Rhys Perry	ee63146f37	aco: don't move memory accesses to before control barriers Fixes random failures of dEQP-VK.image.qualifiers.volatile.cube_array.r32i and similar tests on Vega. fossil-db (Navi): Totals from 6 (0.00% of 135946) affected shaders: VMEM: 1218 -> 1110 (-8.87%); split: +2.46%, -11.33% SMEM: 174 -> 189 (+8.62%) Copies: 84 -> 87 (+3.57%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Fixes: `cd392a10d0` ('radv/aco,aco: use scoped barriers') Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6174> (cherry picked from commit `7b4c24eb67`)	2020-08-11 09:43:35 -07:00
Samuel Pitoiset	6b6a38a8be	radv: limit LATE_ALLOC_GS to prevent a GPU hang on GFX10 Found by inspection, doesn't fix anything known. Cc: mesa-stable Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6279> (cherry picked from commit `e4c6204d65`)	2020-08-11 09:43:34 -07:00
Samuel Pitoiset	2d2ba264e9	radv/gfx10: add missing initialization of registers Found by inspection. Cc: mesa-stable Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6277> (cherry picked from commit `0256250547`)	2020-08-11 09:43:34 -07:00
Christian Gmeiner	d584839b9b	etnaviv: completely turn off MSAA MSAA worked before etnaviv landed in upstream mesa but got broken over time. Disable MSAA completely until it is fixed again. Fixes problems/crashes with applications that want to make use of MSAA. Cc: <mesa-stable@lists.freedesktop.org> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com> Reviewed-by: Lucas Stach <l.stach@pengutronix.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5608> (cherry picked from commit `044b238507`)	2020-08-11 09:43:33 -07:00
Tapani Pälli	6e5874416d	anv: add a check for depthStencilState before using it v2: move the code under existing correct check! Fixes: `e4590c0750` ("anv: depth/stencil dynamic state support") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3375 Signed-off-by: Tapani Pälli <tapani.palli@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Tested-by: Brian Paul <brianp@vmware.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6240> (cherry picked from commit `2d010d3dc5`)	2020-08-11 09:41:11 -07:00
Lionel Landwerlin	0ba14f96fc	anv: fix up dynamic clip emission There were 2 issues : * We were not emitting the clip state when the pipeline changed * On Gen7 we did not program the front facing & cull mode dynamic values in the clip state Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Fixes: `c34d8ac26e` ("anv: handle dynamic viewport count") Closes https://gitlab.freedesktop.org/mesa/mesa/-/issues/3379 Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6265> (cherry picked from commit `ffc8f2ba4c`)	2020-08-11 09:41:11 -07:00
Lionel Landwerlin	8acf2493a9	anv: centralize vk to gen arrays Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6265> (cherry picked from commit `240c0746d1`)	2020-08-11 09:41:09 -07:00
Rob Clark	e196f50d76	freedreno/crashdec: handle section name typos The fixes tag isn't so much because it was incorrect before, but because I'm going to send a kernel patch to fix the typo, and that will break old crashdec. Fixes: `1ea4ef0d3b` ("freedreno: slurp in decode tools") Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6242> (cherry picked from commit `cbfce486f2`)	2020-08-11 09:35:52 -07:00
Rhys Perry	6b823dfc01	aco: set constant_data_offset correctly in the case of merged shaders setup_nir() is done for all shaders before any of them are selected, so constant_data_offset could be incorrect for the first shader. Fixes incorrect geometry in Mafia III and Max Payne 3. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2768 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6205> (cherry picked from commit `6e70508151`)	2020-08-11 09:35:52 -07:00
Bas Nieuwenhuizen	c612c06cf1	radv: Do not consider layouts fast-clearable on compute queue. We cannot decompress from the compute queue. While I'm pretty sure VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL is only useful on the graphics queue, I cannot find a VU that prevents the transition from happening on another queue, so we need to be careful here. This patch ensures we do the decompression on the barrier that changes the queue ownership. Another problem was that DCC images were considered fast-clearable when not DCC compressed, which resulted in a mess with concurrent queue ownership. Cc: <mesa-stable@lists.freedesktop.org> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3387 Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6252> (cherry picked from commit `e362ccb20c`)	2020-08-11 09:35:51 -07:00
Marcin Ślusarz	16da4a483c	intel/perf: fix performance counters availability after glFinish Currently Linux kernel gathers performance counters at fixed intervals (~5-10ms), so if application uses AMD_performance_monitor extension and immediately after glFinish() asks GL driver for HW performance counter values it might not get any data (values == 0). Fix this by moving the "read counters from kernel" code from "is query ready" to "get counter values" callback with a loop around it. Unfortunately it means that the "read counters from kernel" code can spin for up to 10ms. Ideally kernel should gather performance counters whenever we ask it for counter values, but for now we have deal with what we have. Signed-off-by: Marcin Ślusarz <marcin.slusarz@intel.com> Cc: <mesa-stable@lists.freedesktop.org> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5788> (cherry picked from commit `2fbab5a1b3`)	2020-08-11 09:35:51 -07:00
Bas Nieuwenhuizen	5b25e9de0c	radv: Fix assert that is too strict. The added assert fails on MSAA images if we disable FMASK .... Reordered things. Fixes: `c6aadbae71` "radv: Don't use both DCC and CMASK for single sample images." Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3385 Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6232> (cherry picked from commit `ff4f6202be`)	2020-08-11 09:35:50 -07:00
Eric Engestrom	c09c88fbac	meson: bump required glvnd version https://github.com/KhronosGroup/EGL-Registry/pull/95 has moved a couple of extensions defines and functions to the upstream `eglext.h`, but when `9a74746bd1` sync'ed these files we broke compilation of apps that require these symbols on systems that don't have the updated Khronos headers. On non-GLVND builds, we still provide these headers, so everything's fine, but on GLVND builds the Khronos headers are external so we need to make sure we have a libglvnd version that's recent enough. Fixes: `9a74746bd1` ("EGL: sync headers with Khronos") Signed-off-by: Eric Engestrom <eric@engestrom.ch> Acked-by: Daniel Stone <daniels@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6069> (cherry picked from commit `dd003abd2f`)	2020-08-11 09:35:50 -07:00
Eric Engestrom	f3c3a1ceff	driconf: fix force_gl_vendor description The option is not a toggle to "allow GPU vendor to be overridden", it is the override. Fixes: `dca119f12c` ("mesa/gallium: add dric option to allow overriding GL vendor string") Signed-off-by: Eric Engestrom <eric@engestrom.ch> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6207> (cherry picked from commit `7fbadfc385`)	2020-08-11 09:35:49 -07:00
Eric Engestrom	9f5c75a90f	egl/entrypoint-check: add check that GLVND and plain EGL have the same entrypoints Cc: mesa-stable Signed-off-by: Eric Engestrom <eric@engestrom.ch> Reviewed-by: Emil Velikov <emil.velikov@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4448> (cherry picked from commit `800816d70b`)	2020-08-11 09:35:48 -07:00
Eric Engestrom	53f5f43627	egl/entrypoint-check: split sort-check into a function Cc: mesa-stable Signed-off-by: Eric Engestrom <eric@engestrom.ch> Reviewed-by: Emil Velikov <emil.velikov@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4448> (cherry picked from commit `351d513e30`)	2020-08-11 09:35:47 -07:00
Eric Engestrom	7665280b28	pick-ui: specify git commands in "resolve cherry pick" message Cc: mesa-stable Signed-off-by: Eric Engestrom <eric@engestrom.ch> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6134> (cherry picked from commit `e3069c4257`)	2020-08-11 09:35:47 -07:00
Rob Clark	a6ccd24636	freedreno/registers: add some missing regs to build Needed for installed version of crashdec/cffdump. Fixes: `9c33c53898` ("freedreno/registers: install gzip'd register database") Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6234> (cherry picked from commit `3b6e8670f8`)	2020-08-11 09:35:46 -07:00
Dylan Baker	3f0a10b7da	.pick_status.json: Update to `fdb97d3d29`	2020-08-11 09:35:45 -07:00
Dylan Baker	d4d36010a8	meson/freedreno: Fix lua requirement Freedreno needs at least Lua 5.2, but the current code will report found for 5.1, which doesn't actually work. Fixes: `caa107cb8d` ("freedreno/decode: move dependencies up a level") Reviewed-by: Rob Clark <robclark@freedesktop.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6229> (cherry picked from commit `1e28745bc0`)	2020-08-07 10:44:41 -07:00
Marek Olšák	1edc9549d2	radeonsi: various fixes for gfx10.3 The magic numbers fix sample shading. The bypass flag is optional. Fixes: `a23802bcb9` - ac,radeonsi: start adding support for gfx10.3 Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6137> (cherry picked from commit `0cdd411b6d`)	2020-08-07 10:44:40 -07:00
Marek Olšák	ee4ccf0031	radeonsi: remove the NGG hack decreasing LDS usage to deal with overflows The LDS size can't overflow anymore, so we can use the correct max LDS size. Fixes: `a23802bcb9` - ac,radeonsi: start adding support for gfx10.3 Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6137> (cherry picked from commit `e2e700f605`)	2020-08-07 10:44:40 -07:00
Marek Olšák	9c719ad7c9	radeonsi: add a common function for getting the size of gs_ngg_scratch The next commit will use it. Fixes: `a23802bcb9` - ac,radeonsi: start adding support for gfx10.3 Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6137> (cherry picked from commit `97456e847e`)	2020-08-07 10:44:39 -07:00
Marek Olšák	3bf0368f9e	radeonsi: don't count unusable vertices to the NGG LDS size Now we get optimal LDS usage. Fixes: `a23802bcb9` - ac,radeonsi: start adding support for gfx10.3 Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6137> (cherry picked from commit `68b3e92fef`)	2020-08-07 10:44:39 -07:00
Marek Olšák	b0b55fa939	radeonsi: fix applying the NGG minimum vertex count requirement The code applied the restriction too late, which could overflow LDS size, which started happening more often after the minimum vertex count was increased for Sienna. Incorporate the clamping into the previous code for rounding up the counts. Now the LDS size can never overflow, but it may use vector lanes less efficiently (max_gsprims can be decreased more), which will be addressed in the next commit. Fixes: `4ecc39e1aa` ("radeonsi/gfx10: NGG geometry shader PM4 and upload") Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6137> (cherry picked from commit `64c741ffb7`)	2020-08-07 10:44:38 -07:00
Marek Olšák	3183610228	radeonsi: increase minimum NGG vertex count requirement per workgroup on gfx 10.3 Fixes: `a23802bcb9` - ac,radeonsi: start adding support for gfx10.3 Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6137> (cherry picked from commit `7a468fc0f6`)	2020-08-07 10:44:38 -07:00
Marek Olšák	6eadb68e98	radeonsi: use the same units for esgs_ring_size and ngg_emit_size for consistency Fixes: `a23802bcb9` - ac,radeonsi: start adding support for gfx10.3 Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6137> (cherry picked from commit `633d2aa915`)	2020-08-07 10:44:37 -07:00
Marek Olšák	81df3a4a4c	radeonsi: use correct wave size in gfx10_ngg_calculate_subgroup_info Fixes: `88efb63caf` ("radeonsi/gfx10: implement Wave32") Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6137> (cherry picked from commit `b6fb09fd84`)	2020-08-07 10:44:36 -07:00
Marek Olšák	d0b0165808	Revert "radeonsi: honor a user-specified pitch on gfx10.3" This reverts commit `c4b5fd9ab0`. It breaks mipmapping. This is only meant to be used by OpenCL, which allows setting a user pitch for linear images. In all other cases, don't support a custom pitch. Fixes: `c4b5fd9ab0` "radeonsi: honor a user-specified pitch on gfx10.3" Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6137> (cherry picked from commit `61c671c97e`)	2020-08-07 10:44:36 -07:00
Dylan Baker	9a5b5cdf9c	.pick_status.json: Update to `1e28745bc0`	2020-08-07 10:44:33 -07:00
Christian Gmeiner	1ed360d24b	etnaviv: call nir_lower_bool_to_bitsize Starting with commit `6f394343b1` ("nir/algebraic: i2f(f2i()) -> trunc()") dEQP-GLES2.functional.shaders.operator.binary_operator.div.lowp_int_vertex ends with an unsuppored flt instruction. Use nir_lower_bool_to_bitsize to convert this flt to a flt32 which is supported. This fixes the introduced regression. Cc: 20.2 <mesa-stable@lists.freedesktop.org> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6182> (cherry picked from commit `e63a7882a0`)	2020-08-07 08:52:49 -07:00
Lionel Landwerlin	761c979ae0	anv: fix incorrect realloc failure handling We don't want to leak in case of failure. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reported-by: Ivan Briano <ivan.briano@intel.com> Fixes: `246261f0ad` ("anv: prepare the driver for delayed submissions") Fixes: `34f32a6d66` ("anv: implement VK_KHR_timeline_semaphore") Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6200> (cherry picked from commit `b43bcb05a1`)	2020-08-07 08:52:48 -07:00
Rhys Perry	20663891c1	aco: fix C++11/C++14 compilation static_assert without a message is only available since C++17. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Fixes: `d1f992f3c2` ('aco: rework barriers and replace can_reorder') Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3374 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6216> (cherry picked from commit `21b47cbd99`)	2020-08-07 08:52:48 -07:00
Danylo Piliaiev	c2f85ade7d	st/mesa: Treat vertex outputs absent in outputMapping as zero in mesa_to_tgsi After updating vertex outputs being written based on optimized NIR, they may go out of sync with outputs in mesa IR. Which is translated to TGSI and used together with NIR if draw doesn't have llvm. It's much easier to treat such outputs as zero because there is no pass to entirely get rid of them. Similar to `eeab9c93db` but now for outputs. Fixes: `d684fb37bf` Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3365 Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6187> (cherry picked from commit `782ba8d3ae`)	2020-08-07 08:52:47 -07:00
Dylan Baker	cb341c7f86	.pick_status.json: Update to `9333a8570d`	2020-08-07 08:52:45 -07:00
Dylan Baker	0b8f4381b1	VERSION: bump for 20.2.0-rc1	2020-08-06 09:36:17 -07:00