Version: update to 19.0-rc5

mesa: return NULL if we exceed MaxColorAttachments in get_fb_attachment
This fixes invalid access to Attachment array which would occur if caller would exceed MaxColorAttachments. In practice this should not ever happen because DiscardFramebufferEXT specifies only GL_COLOR_ATTACHMENT0 to be valid and InvalidateFramebuffer will error out before but this should make coverity happy. v2: const, remove _EXT (Ian) CID: 1442559 Fixes: 0c42b5f3cb "mesa: wire up InvalidateFramebuffer" Signed-off-by: Tapani Pälli <tapani.palli@intel.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> (cherry picked from commit 9762a9f893)
2019-02-19 11:15:18 -08:00 · 2019-02-19 07:08:42 -08:00 · 2019-02-19 07:08:23 -08:00 · 2019-02-19 07:08:11 -08:00 · 2019-02-19 07:07:54 -08:00 · 2019-02-19 07:07:38 -08:00
20 changed files with 164 additions and 52 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-19.0.0-rc4
+19.0.0-rc5
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -13,12 +13,12 @@

 is_stable_nomination()
 {
-	git show --summary "$1" | grep -q -i -o "CC:.*mesa-stable"
+	git show --pretty=medium --summary "$1" | grep -q -i -o "CC:.*mesa-stable"
 }

 is_typod_nomination()
 {
-	git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev"
+	git show --pretty=medium --summary "$1" | grep -q -i -o "CC:.*mesa-dev"
 }

 fixes=
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2365,7 +2365,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
 			if (is_16bit) {
 				for (unsigned chan = 0; chan < 4; chan++)
 					values[chan] = LLVMBuildZExt(ctx->ac.builder,
-								      values[chan],
+								      ac_to_integer(&ctx->ac, values[chan]),
 								      ctx->ac.i32, "");
 			}
 			break;
@@ -2376,7 +2376,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
 			if (is_16bit) {
 				for (unsigned chan = 0; chan < 4; chan++)
 					values[chan] = LLVMBuildSExt(ctx->ac.builder,
-								      values[chan],
+								      ac_to_integer(&ctx->ac, values[chan]),
 								      ctx->ac.i32, "");
 			}
 			break;
@@ -2429,12 +2429,8 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
 	} else
 		memcpy(&args->out[0], values, sizeof(values[0]) * 4);

-	for (unsigned i = 0; i < 4; ++i) {
-		if (!(args->enabled_channels & (1 << i)))
-			continue;
-
+	for (unsigned i = 0; i < 4; ++i)
 		args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
-	}
 }

 static void
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -524,6 +524,14 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
 		col_format |= cf << (4 * i);
 	}

+	if (!col_format && blend->need_src_alpha & (1 << 0)) {
+		/* When a subpass doesn't have any color attachments, write the
+		 * alpha channel of MRT0 when alpha coverage is enabled because
+		 * the depth attachment needs it.
+		 */
+		col_format |= V_028714_SPI_SHADER_32_ABGR;
+	}
+
 	/* If the i-th target format is set, all previous target formats must
 	 * be non-zero to avoid hangs.
 	 */
@@ -689,6 +697,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,

 	if (vkms && vkms->alphaToCoverageEnable) {
 		blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
+		blend.need_src_alpha |= 0x1;
 	}

 	blend.cb_target_mask = 0;
@@ -3192,11 +3201,11 @@ radv_compute_db_shader_control(const struct radv_device *device,
 	bool disable_rbplus = device->physical_device->has_rbplus &&
 	                      !device->physical_device->rbplus_allowed;

-	/* Do not enable the gl_SampleMask fragment shader output if MSAA is
-	 * disabled.
+	/* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
+	 * but this appears to break Project Cars (DXVK). See
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=109401
 	 */
-	bool mask_export_enable = ms->num_samples > 1 &&
-				  ps->info.info.ps.writes_sample_mask;
+	bool mask_export_enable = ps->info.info.ps.writes_sample_mask;

 	return  S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
 		S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -665,6 +665,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
 			assert(num < ws->num_buffers);
 			handles[num].bo_handle = bo->bo_handle;
 			handles[num].bo_priority = bo->priority;
+			num++;
 		}

 		r = amdgpu_bo_list_create_raw(ws->dev, ws->num_buffers,
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -121,7 +121,7 @@ vir_emit_thrsw(struct v3d_compile *c)
         */
        c->last_thrsw = vir_NOP(c);
        c->last_thrsw->qpu.sig.thrsw = true;
-        c->last_thrsw_at_top_level = (c->execute.file == QFILE_NULL);
+        c->last_thrsw_at_top_level = !c->in_control_flow;
 }

 static uint32_t
@@ -1158,7 +1158,9 @@ emit_frag_end(struct v3d_compile *c)

                inst->src[vir_get_implicit_uniform_src(inst)] =
                        vir_uniform_ui(c, tlb_specifier | 0xffffff00);
+                c->writes_z = true;
        } else if (c->s->info.fs.uses_discard ||
+                   !c->s->info.fs.early_fragment_tests ||
                   c->fs_key->sample_alpha_to_coverage ||
                   !has_any_tlb_color_write) {
                /* Emit passthrough Z if it needed to be delayed until shader
@@ -1188,6 +1190,7 @@ emit_frag_end(struct v3d_compile *c)

                inst->src[vir_get_implicit_uniform_src(inst)] =
                        vir_uniform_ui(c, tlb_specifier | 0xffffff00);
+                c->writes_z = true;
        }

        /* XXX: Performance improvement: Merge Z write and color writes TLB
@@ -2103,10 +2106,10 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
        else
                else_block = vir_new_block(c);

-        bool was_top_level = false;
+        bool was_uniform_control_flow = false;
        if (c->execute.file == QFILE_NULL) {
                c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
-                was_top_level = true;
+                was_uniform_control_flow = true;
        }

        /* Set up the flags for the IF condition (taking the THEN branch). */
@@ -2122,7 +2125,7 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
        /* Update the flags+cond to mean "Taking the ELSE branch (!cond) and
         * was previously active (execute Z) for updating the exec flags.
         */
-        if (was_top_level) {
+        if (was_uniform_control_flow) {
                cond = v3d_qpu_cond_invert(cond);
        } else {
                struct qinst *inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0),
@@ -2176,7 +2179,7 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
        vir_link_blocks(c->cur_block, after_block);

        vir_set_emit_block(c, after_block);
-        if (was_top_level)
+        if (was_uniform_control_flow)
                c->execute = c->undef;
        else
                ntq_activate_execute_for_block(c);
@@ -2185,12 +2188,15 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
 static void
 ntq_emit_if(struct v3d_compile *c, nir_if *nif)
 {
+        bool was_in_control_flow = c->in_control_flow;
+        c->in_control_flow = true;
        if (c->execute.file == QFILE_NULL &&
            nir_src_is_dynamically_uniform(nif->condition)) {
                ntq_emit_uniform_if(c, nif);
        } else {
                ntq_emit_nonuniform_if(c, nif);
        }
+        c->in_control_flow = was_in_control_flow;
 }

 static void
@@ -2267,10 +2273,13 @@ static void ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list);
 static void
 ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
 {
-        bool was_top_level = false;
+        bool was_in_control_flow = c->in_control_flow;
+        c->in_control_flow = true;
+
+        bool was_uniform_control_flow = false;
        if (c->execute.file == QFILE_NULL) {
                c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
-                was_top_level = true;
+                was_uniform_control_flow = true;
        }

        struct qblock *save_loop_cont_block = c->loop_cont_block;
@@ -2307,7 +2316,7 @@ ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
        vir_link_blocks(c->cur_block, c->loop_break_block);

        vir_set_emit_block(c, c->loop_break_block);
-        if (was_top_level)
+        if (was_uniform_control_flow)
                c->execute = c->undef;
        else
                ntq_activate_execute_for_block(c);
@@ -2316,6 +2325,8 @@ ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
        c->loop_cont_block = save_loop_cont_block;

        c->loops++;
+
+        c->in_control_flow = was_in_control_flow;
 }

 static void
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -519,6 +519,7 @@ struct v3d_compile {
        uint32_t centroid_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];

        bool uses_center_w;
+        bool writes_z;

        struct v3d_ubo_range *ubo_ranges;
        bool *ubo_range_used;
@@ -531,6 +532,7 @@ struct v3d_compile {
         * yes, otherwise a block number + 1 that the channel jumped to.
         */
        struct qreg execute;
+        bool in_control_flow;

        struct qreg line_x, point_x, point_y;

@@ -716,7 +718,7 @@ struct v3d_fs_prog_data {
        uint32_t centroid_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1];

        bool writes_z;
-        bool discard;
+        bool disable_ez;
        bool uses_center_w;
 };

--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -777,21 +777,9 @@ v3d_fs_set_prog_data(struct v3d_compile *c,
                     struct v3d_fs_prog_data *prog_data)
 {
        v3d_set_fs_prog_data_inputs(c, prog_data);
-        prog_data->writes_z = (c->s->info.outputs_written &
-                               (1 << FRAG_RESULT_DEPTH));
-        prog_data->discard = (c->s->info.fs.uses_discard ||
-                              c->fs_key->sample_alpha_to_coverage);
+        prog_data->writes_z = c->writes_z;
+        prog_data->disable_ez = !c->s->info.fs.early_fragment_tests;
        prog_data->uses_center_w = c->uses_center_w;
-
-        /* If the shader has some side effects and hasn't allowed early
-         * fragment tests, disable them.
-         */
-        if (!c->s->info.fs.early_fragment_tests &&
-            (c->s->info.num_images ||
-             c->s->info.num_ssbos ||
-             c->s->info.num_abos)) {
-                prog_data->discard = true;
-        }
 }

 static void
@@ -888,6 +876,15 @@ v3d_nir_lower_fs_early(struct v3d_compile *c)
 {
        if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb)
                v3d_fixup_fs_output_types(c);
+
+        /* If the shader has no non-TLB side effects, we can promote it to
+         * enabling early_fragment_tests even if the user didn't.
+         */
+        if (!(c->s->info.num_images ||
+              c->s->info.num_ssbos ||
+              c->s->info.num_abos)) {
+                c->s->info.fs.early_fragment_tests = true;
+        }
 }

 static void
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -618,11 +618,11 @@ optimizations = [
   # Reassociate constants in add/mul chains so they can be folded together.
   # For now, we mostly only handle cases where the constants are separated by
   # a single non-constant.  We could do better eventually.
-   (('~fmul', '#a', ('fmul', b, '#c')), ('fmul', ('fmul', a, c), b)),
-   (('imul', '#a', ('imul', b, '#c')), ('imul', ('imul', a, c), b)),
-   (('~fadd', '#a',          ('fadd', b, '#c')),  ('fadd', ('fadd', a,          c),           b)),
-   (('~fadd', '#a', ('fneg', ('fadd', b, '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
-   (('iadd', '#a', ('iadd', b, '#c')), ('iadd', ('iadd', a, c), b)),
+   (('~fmul', '#a', ('fmul', 'b(is_not_const)', '#c')), ('fmul', ('fmul', a, c), b)),
+   (('imul', '#a', ('imul', 'b(is_not_const)', '#c')), ('imul', ('imul', a, c), b)),
+   (('~fadd', '#a',          ('fadd', 'b(is_not_const)', '#c')),  ('fadd', ('fadd', a,          c),           b)),
+   (('~fadd', '#a', ('fneg', ('fadd', 'b(is_not_const)', '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))),
+   (('iadd', '#a', ('iadd', 'b(is_not_const)', '#c')), ('iadd', ('iadd', a, c), b)),

   # By definition...
   (('bcsel', ('ige', ('find_lsb', a), 0), ('find_lsb', a), -1), ('find_lsb', a)),
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -313,6 +313,13 @@ opt_if_loop_last_continue(nir_loop *loop)
   if (!then_ends_in_continue && !else_ends_in_continue)
      return false;

+   /* if the block after the if/else is empty we bail, otherwise we might end
+    * up looping forever
+    */
+   if (&nif->cf_node == nir_cf_node_prev(&last_block->cf_node) &&
+       exec_list_is_empty(&last_block->instr_list))
+      return false;
+
   /* Move the last block of the loop inside the last if-statement */
   nir_cf_list tmp;
   nir_cf_extract(&tmp, nir_after_cf_node(if_node),
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -3595,6 +3595,7 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,

      case SpvCapabilityInt64Atomics:
         spv_check_supported(int64_atomics, cap);
+         break;

      case SpvCapabilityInt8:
         spv_check_supported(int8, cap);
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -185,6 +185,16 @@ static void si_emit_guardband(struct si_context *ctx)
 	const unsigned hw_screen_offset_alignment =
 		ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16);

+	/* Indexed by quantization modes */
+	static unsigned max_viewport_size[] = {65535, 16383, 4095};
+
+	/* Ensure that the whole viewport stays representable in
+	 * absolute coordinates.
+	 * See comment in si_set_viewport_states.
+	 */
+	assert(vp_as_scissor.maxx <= max_viewport_size[vp_as_scissor.quant_mode] &&
+	       vp_as_scissor.maxy <= max_viewport_size[vp_as_scissor.quant_mode]);
+
 	hw_screen_offset_x = CLAMP(hw_screen_offset_x, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
 	hw_screen_offset_y = CLAMP(hw_screen_offset_y, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET);

@@ -219,7 +229,6 @@ static void si_emit_guardband(struct si_context *ctx)
 	 *
 	 * The viewport range is [-max_viewport_size/2, max_viewport_size/2].
 	 */
-	static unsigned max_viewport_size[] = {65535, 16383, 4095};
 	assert(vp_as_scissor.quant_mode < ARRAY_SIZE(max_viewport_size));
 	max_range = max_viewport_size[vp_as_scissor.quant_mode] / 2;
 	left   = (-max_range - vp.translate[0]) / vp.scale[0];
@@ -333,6 +342,8 @@ static void si_set_viewport_states(struct pipe_context *pctx,
 		unsigned h = scissor->maxy - scissor->miny;
 		unsigned max_extent = MAX2(w, h);

+		int max_corner = MAX2(scissor->maxx, scissor->maxy);
+
 		unsigned center_x = (scissor->maxx + scissor->minx) / 2;
 		unsigned center_y = (scissor->maxy + scissor->miny) / 2;
 		unsigned max_center = MAX2(center_x, center_y);
@@ -358,7 +369,22 @@ static void si_set_viewport_states(struct pipe_context *pctx,
 		if (ctx->family == CHIP_RAVEN)
 			max_extent = 16384; /* Use QUANT_MODE == 16_8. */

-		if (max_extent <= 1024) /* 4K scanline area for guardband */
+		/* Another constraint is that all coordinates in the viewport
+		 * are representable in fixed point with respect to the
+		 * surface origin.
+		 *
+		 * It means that PA_SU_HARDWARE_SCREEN_OFFSET can't be given
+		 * an offset that would make the upper corner of the viewport
+		 * greater than the maximum representable number post
+		 * quantization, ie 2^quant_bits.
+		 *
+		 * This does not matter for 14.10 and 16.8 formats since the
+		 * offset is already limited at 8k, but it means we can't use
+		 * 12.12 if we are drawing to some pixels outside the lower
+		 * 4k x 4k of the render target.
+		 */
+
+		if (max_extent <= 1024 && max_corner < 4096) /* 4K scanline area for guardband */
 			scissor->quant_mode = SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH;
 		else if (max_extent <= 4096) /* 16K scanline area for guardband */
 			scissor->quant_mode = SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH;
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -369,6 +369,8 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
      return 32;
   case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
      return 1 << 27;
+   case PIPE_CAP_MAX_VARYINGS:
+      return 32;

   case PIPE_CAP_VENDOR_ID:
      return 0xFFFFFFFF;
--- a/src/gallium/drivers/v3d/v3dx_draw.c
+++ b/src/gallium/drivers/v3d/v3dx_draw.c
@@ -203,8 +203,13 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                 * shader needs to write the Z value (even just discards).
                 */
                shader.fragment_shader_does_z_writes =
-                        (v3d->prog.fs->prog_data.fs->writes_z ||
-                         v3d->prog.fs->prog_data.fs->discard);
+                        v3d->prog.fs->prog_data.fs->writes_z;
+                /* Set if the EZ test must be disabled (due to shader side
+                 * effects and the early_z flag not being present in the
+                 * shader).
+                 */
+                shader.turn_off_early_z_test =
+                        v3d->prog.fs->prog_data.fs->disable_ez;

                shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
                        v3d->prog.fs->prog_data.fs->uses_center_w;
--- a/src/intel/compiler/brw_fs_cmod_propagation.cpp
+++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp
@@ -255,6 +255,13 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
            if (inst->opcode == BRW_OPCODE_AND)
               break;

+            /* Not safe to use inequality operators if the types are different
+             */
+            if (scan_inst->dst.type != inst->src[0].type &&
+                inst->conditional_mod != BRW_CONDITIONAL_Z &&
+                inst->conditional_mod != BRW_CONDITIONAL_NZ)
+               break;
+
            /* Comparisons operate differently for ints and floats */
            if (scan_inst->dst.type != inst->dst.type &&
                (scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -512,6 +512,15 @@ fs_visitor::optimize_extract_to_float(nir_alu_instr *instr,
       src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16)
      return false;

+   /* If either opcode has source modifiers, bail.
+    *
+    * TODO: We can potentially handle source modifiers if both of the opcodes
+    * we're combining are signed integers.
+    */
+   if (instr->src[0].abs || instr->src[0].negate ||
+       src0->src[0].abs || src0->src[0].negate)
+      return false;
+
   unsigned element = nir_src_as_uint(src0->src[1].src);

   /* Element type to extract.*/
--- a/src/intel/compiler/test_fs_cmod_propagation.cpp
+++ b/src/intel/compiler/test_fs_cmod_propagation.cpp
@@ -889,3 +889,35 @@ TEST_F(cmod_propagation_test, subtract_delete_compare_derp)
   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 1)->opcode);
   EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
 }
+
+TEST_F(cmod_propagation_test, signed_unsigned_comparison_mismatch)
+{
+   const fs_builder &bld = v->bld;
+   fs_reg dest0 = v->vgrf(glsl_type::int_type);
+   fs_reg src0 = v->vgrf(glsl_type::int_type);
+   src0.type = BRW_REGISTER_TYPE_W;
+
+   bld.ASR(dest0, negate(src0), brw_imm_d(15));
+   bld.CMP(bld.null_reg_ud(), retype(dest0, BRW_REGISTER_TYPE_UD),
+           brw_imm_ud(0u), BRW_CONDITIONAL_LE);
+
+   /* = Before =
+    * 0: asr(8)          dest:D   -src0:W 15D
+    * 1: cmp.le.f0(8)    null:UD  dest:UD 0UD
+    *
+    * = After =
+    * (no changes)
+    */
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+
+   EXPECT_FALSE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_ASR, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 1)->conditional_mod);
+}
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1449,10 +1449,10 @@ _anv_combine_address(struct anv_batch *batch, void *location,
 */

 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
-#define GEN9_MOCS 2
+#define GEN9_MOCS (2 << 1)

 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
-#define GEN9_EXTERNAL_MOCS 1
+#define GEN9_EXTERNAL_MOCS (1 << 1)

 /* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */
 #define GEN10_MOCS GEN9_MOCS
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -1,4 +1,4 @@
-# Copyright © 2017-2018 Intel Corporation
+# Copyright © 2017-2019 Intel Corporation

 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -178,7 +178,10 @@ endif

 libanv_common = static_library(
  'anv_common',
-  [libanv_files, anv_entrypoints, anv_extensions_c, anv_extensions_h, sha1_h],
+  [
+    libanv_files, anv_entrypoints, anv_extensions_c, anv_extensions_h, sha1_h,
+    gen_xml_pack,
+  ],
  include_directories : [
    inc_common, inc_intel, inc_compiler, inc_drm_uapi, inc_vulkan_util,
    inc_vulkan_wsi,
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -4663,8 +4663,12 @@ get_fb_attachment(struct gl_context *ctx, struct gl_framebuffer *fb,
   case GL_COLOR_ATTACHMENT12:
   case GL_COLOR_ATTACHMENT13:
   case GL_COLOR_ATTACHMENT14:
-   case GL_COLOR_ATTACHMENT15:
-      return &fb->Attachment[BUFFER_COLOR0 + attachment - GL_COLOR_ATTACHMENT0];
+   case GL_COLOR_ATTACHMENT15: {
+      const unsigned i = attachment - GL_COLOR_ATTACHMENT0;
+      if (i >= ctx->Const.MaxColorAttachments)
+         return NULL;
+      return &fb->Attachment[BUFFER_COLOR0 + i];
+   }
   case GL_DEPTH:
   case GL_DEPTH_ATTACHMENT:
   case GL_DEPTH_STENCIL_ATTACHMENT:
Author	SHA1	Message	Date
Dylan Baker	2e7833ad91	Version: update to 19.0-rc5	2019-02-19 11:15:18 -08:00
Tapani Pälli	0a2e4b02ca	mesa: return NULL if we exceed MaxColorAttachments in get_fb_attachment This fixes invalid access to Attachment array which would occur if caller would exceed MaxColorAttachments. In practice this should not ever happen because DiscardFramebufferEXT specifies only GL_COLOR_ATTACHMENT0 to be valid and InvalidateFramebuffer will error out before but this should make coverity happy. v2: const, remove _EXT (Ian) CID: 1442559 Fixes: `0c42b5f3cb` "mesa: wire up InvalidateFramebuffer" Signed-off-by: Tapani Pälli <tapani.palli@intel.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> (cherry picked from commit `9762a9f893`)	2019-02-19 07:08:42 -08:00
Rhys Perry	c7fc61d15b	radv: ensure export arguments are always float So that the signature is correct and consistent, the inputs to a export intrinsic should always be 32-bit floats. This and the previous commit fixes a large amount crashes from dEQP-VK.spirv_assembly.instruction.graphics.16bit_storage.input_output_int_* tests Fixes: `b722b29f10` ('radv: add support for 16bit input/output') Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `0ca550e01a`)	2019-02-19 07:08:23 -08:00
Rhys Perry	1b093b567f	radv: bitcast 16-bit outputs to integers 16-bit outputs are stored as 16-bit floats in the outputs array, so they have to be bitcast. Fixes: `b722b29f10` ('radv: add support for 16bit input/output') Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `64065aa504`)	2019-02-19 07:08:11 -08:00
Eric Anholt	d73e48b63f	v3d: Fix the check for "is the last thrsw inside control flow" The execute.file check used to be good enough, until I stopped setting up the execute mask for uniform ifs. No known tests fixed, noticed while doing a refactor. Fixes: `0805060573` ("v3d: Handle dynamically uniform IF statements with uniform control flow.") (cherry picked from commit `441294962c`)	2019-02-19 07:07:54 -08:00
Eric Anholt	ba24ca67f6	v3d: Use the early_fragment_tests flag for the shader's disable-EZ field. Apparently we need disable-EZ flagged, not just "does Z writes". Fixes dEQP-GLES31.functional.image_load_store.early_fragment_tests.no_early_fragment_tests_depth_fbo on 7278, even though it passed in simulation. Signed-off-by: Eric Anholt <eric@anholt.net> Fixes: `051a41d3d5` ("v3d: Add support for the early_fragment_tests flag.") (cherry picked from commit `cd5e0b2729`)	2019-02-19 07:07:38 -08:00
Samuel Pitoiset	110500cc8a	radv: fix writing the alpha channel of MRT0 when alpha coverage is enabled This version is better and safer. Cc: 18.3 19.0 <mesa-stable@lists.freedesktop.org> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `47616810ed`)	2019-02-19 07:07:22 -08:00
Samuel Pitoiset	0b9f6ebfbb	radv: write the alpha channel of MRT0 when alpha coverage is enabled Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109597 Cc: 18.3 19.0 <mesa-stable@lists.freedesktop.org> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `0d8f096293`)	2019-02-19 07:07:13 -08:00
Kenneth Graunke	69ebf4569a	nir: Don't reassociate add/mul chains containing only constants The idea here is to reassociate a * (b * c) into (a * c) * b, when b is a non-constant value, but a and c are constants, allowing them to be combined. But nothing was enforcing that 'b' must be non-constant, which meant that running opt_algebraic in a loop would never terminate if the IR contained non-folded constant expressions like 256 * 0.5 * 2. Normally, we call constant folding in such a loop too, but IMO it's better for nir_opt_algebraic to be robust and not rely on that. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109581 Fixes: `32e266a9a5` i965: Compile fp64 funcs only if we do not have 64-bit hardware support Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> (cherry picked from commit `535251487b`)	2019-02-19 07:07:04 -08:00
Matt Turner	385b736238	intel/compiler/test: Add unit test for mismatched signedness comparison v2 (idr): Move adding the test to after adding the fix. Reordering the two commits prevents possible headaches for git-bisect with scripts that always do 'ninja check'. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109404 Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> (cherry picked from commit `ac21dd4aee`)	2019-02-15 12:03:53 -08:00
Matt Turner	4cf1a40f9a	intel/compiler: Avoid propagating inequality cmods if types are different v2: Fix silly bug in logic. s/\|\|/&&/ All but one of the affected shaders is in an Unreal4 demo. The other is in Tomb Raider. All of the cases that Ian investigated appear to be sequences like the following if (int(uint(some_float)) < 0) /* other relations too */ ... At least in Tomb Raider, it's not obvious that this sequence came from the original shader. In some of the Unreal demos, the shader contains code like if (int(uint(textureLod(...))) > 0) ... which explicitly generates the offending sequence. All Gen6+ platforms had similar results (Skylake shown): total instructions in shared programs: 15437170 -> 15437187 (<.01%) instructions in affected programs: 4492 -> 4509 (0.38%) helped: 0 HURT: 17 HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.05% max: 0.73% x̄: 0.66% x̃: 0.73% 95% mean confidence interval for instructions value: 1.00 1.00 95% mean confidence interval for instructions %-change: 0.57% 0.75% Instructions are HURT. total cycles in shared programs: 383007996 -> 383007992 (<.01%) cycles in affected programs: 20542 -> 20538 (-0.02%) helped: 6 HURT: 7 helped stats (abs) min: 2 max: 6 x̄: 5.33 x̃: 6 helped stats (rel) min: 0.11% max: 0.36% x̄: 0.32% x̃: 0.36% HURT stats (abs) min: 4 max: 4 x̄: 4.00 x̃: 4 HURT stats (rel) min: 0.27% max: 0.27% x̄: 0.27% x̃: 0.27% 95% mean confidence interval for cycles value: -3.30 2.69 95% mean confidence interval for cycles %-change: -0.19% 0.19% Inconclusive result (value mean confidence interval includes 0). No changes on Iron Lake or GM45. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109404 Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Tested-by: nagrigoriadis@gmail.com Tested-by: Danylo Piliaiev <danylo.piliaiev@gmail.com> (cherry picked from commit `2dff9a66b6`)	2019-02-15 12:03:44 -08:00
Jason Ekstrand	81e053b757	intel/fs: Bail in optimize_extract_to_float if we have modifiers This fixes a bug in runscape where we were optimizing x >> 16 to an extract and then negating and converting to float. The NIR to fs pass was dropping the negate on the floor breaking a geometry shader and causing it to render nothing. Fixes: `1f862e923c` "i965/fs: Optimize float conversions of byte/word..." Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109601 Tested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com> (cherry picked from commit `367b0ede4d`)	2019-02-15 10:03:26 -08:00
Ilia Mirkin	f30fb27665	swr: set PIPE_CAP_MAX_VARYINGS correctly Unfortunately swr was missed in the original commit. The number of varyings should generally match up to what's reported as the shader caps for fragment inputs. Fixes: `6010d7b8e8` (gallium: add PIPE_CAP_MAX_VARYINGS) Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Reviewed-by: Alok Hota <alok.hota@intel.com> Cc: 19.0 <mesa-stable@lists.freedesktop.org> (cherry picked from commit `8c859367df`)	2019-02-15 10:03:20 -08:00
Kenneth Graunke	1039285288	anv: Put MOCS in the correct location My patch to switch from struct-based MOCS to numeric MOCS accidentally divided all MOCS entries by 2 in the Vulkan driver. MOCS on Gen9+ is just an array index into a table. But in the hardware packets, the index starts at bit 1. So we need to shift it. Fixes: `0b44644ca6` (genxml: Consistently use a numeric "MOCS" field) Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> (cherry picked from commit `39aee57523`)	2019-02-15 10:03:08 -08:00
Ian Romanick	59812ac38d	spirv: Add missing break Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Fixes: `c6465fec0c` ("spirv: add SpvCapabilityInt64Atomics") CID: 1442555 (cherry picked from commit `9a918050e0`)	2019-02-14 09:30:54 -08:00
Dylan Baker	c19ce6e5e2	meson: Add dependency on genxml to anvil Currently the Intel "anvil" driver races with the generation of genxml files, while i965 has an explicit dependency. This patch adds the same dependency to anvil. Fixes: `d1992255bb` ("meson: Add build Intel "anv" vulkan driver") Acked-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Eric Engestrom <eric.engestrom@intel.com> (cherry picked from commit `279060cd32`)	2019-02-14 09:30:44 -08:00
Samuel Pitoiset	eba57c29b0	radv: always export gl_SampleMask when the fragment shader uses it For some reasons, this breaks trees rendering in Project Cars. Fixes: `85010585cd` ("radv: only enable gl_SampleMask if MSAA is enabled too") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109401 Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `334da034d8`)	2019-02-14 09:30:38 -08:00
Samuel Pitoiset	e304007d87	radv/winsys: fix BO list creation when RADV_DEBUG=allbos is set Fixes: `50fd253bd6` ("radv/winsys: Add priority handling during submit.") Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `5e18000d1b`)	2019-02-14 09:30:33 -08:00
Dylan Baker	b4419fdba5	get-pick-list: Add --pretty=medium to the arguments for Cc patches Because none of them have been picked up for 19.0 due to this bug being reintroduced. v2: - Fix fixes tags Fixes: `e6b3a3b201` ("bin/get-pick-list.sh: handle "typod" usecase.") Fixes: `fac10169bb` ("bin/get-pick-list.sh: prefix output with "[stable] "") Reviewed-by: Andres Gomez <agomez@igalia.com> Reviewed-by: Emil Velikov <emil.velikov@collabora.com> (cherry picked from commit `aff52dd2c6`)	2019-02-13 14:14:35 -08:00
Karol Herbst	7ac15d9e42	nir/opt_if: don't mark progress if nothing changes if we have something like this: loop { ... if x { break; } else { continue; } } opt_if_loop_last_continue returns true marking progress allthough nothing changes. Fixes: `5921a19d4b` "nir: add if opt opt_if_loop_last_continue()" Signed-off-by: Karol Herbst <kherbst@redhat.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> (cherry picked from commit `7e08f22a72`)	2019-02-13 14:14:35 -08:00
Oscar Blumberg	6b48451110	radeonsi: Fix guardband computation for large render targets Stop using 12.12 quantization for viewports that are not contained in the lower 4k corner of the render target as the hardware needs to keep both absolute and relative coordinates representable. Signed-off-by: Marek Olšák <marek.olsak@amd.com> Cc: 18.3 19.0 <mesa-stable@lists.freedesktop.org> (cherry picked from commit `3c540e0a74`)	2019-02-13 14:14:35 -08:00