Bump version for 19.0-rc3

Revert "intel/compiler: More peephole select"
This reverts commit 8fb8ebfbb0.
2019-02-12 12:39:36 -08:00 · 2019-02-12 09:42:59 -08:00 · 2019-02-12 09:42:16 -08:00 · 2019-02-11 16:26:01 -08:00 · 2019-02-11 16:24:42 -08:00 · 2019-02-11 16:24:42 -08:00
42 changed files with 255 additions and 202 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -22,6 +22,7 @@
 SUBDIRS = src

 AM_DISTCHECK_CONFIGURE_FLAGS = \
+	--enable-autotools \
 	--enable-dri \
 	--enable-dri3 \
 	--enable-egl \
--- a/2
+++ b/2
@@ -1 +1 @@
-19.0.0-devel
+19.0.0-rc3
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -0,0 +1,3 @@
+# Both of these were already merged with different shas
+da48cba61ef6fefb799bf96e6364b70dbf4ec712
+c812c740e60c14060eb89db66039111881a0f42f
--- a/configure.ac
+++ b/configure.ac
@@ -122,7 +122,7 @@ LLVM_REQUIRED_OPENCL=3.9.0
 LLVM_REQUIRED_R600=3.9.0
 LLVM_REQUIRED_RADEONSI=7.0.0
 LLVM_REQUIRED_RADV=7.0.0
-LLVM_REQUIRED_SWR=6.0.0
+LLVM_REQUIRED_SWR=7.0.0

 dnl Check for progs
 AC_PROG_CPP
@@ -2845,8 +2845,8 @@ if test -n "$with_gallium_drivers"; then
 fi

 # XXX: Keep in sync with LLVM_REQUIRED_SWR
-AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x6.0.0 -a \
-                                              "x$LLVM_VERSION" != x6.0.1)
+AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x7.0.0 -a \
+                                              "x$LLVM_VERSION" != x7.0.1)

 if test "x$enable_llvm" = "xyes" -a "$with_gallium_drivers"; then
    llvm_require_version $LLVM_REQUIRED_GALLIUM "gallium"
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -923,6 +923,14 @@ ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
 				  ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
 }

+LLVMValueRef
+ac_build_gep_ptr(struct ac_llvm_context *ctx,
+	         LLVMValueRef base_ptr,
+	         LLVMValueRef index)
+{
+	return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
+}
+
 LLVMValueRef
 ac_build_gep0(struct ac_llvm_context *ctx,
 	      LLVMValueRef base_ptr,
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -223,6 +223,11 @@ ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
 		       LLVMValueRef attr_number,
 		       LLVMValueRef params);

+LLVMValueRef
+ac_build_gep_ptr(struct ac_llvm_context *ctx,
+	         LLVMValueRef base_ptr,
+	         LLVMValueRef index);
+
 LLVMValueRef
 ac_build_gep0(struct ac_llvm_context *ctx,
 	      LLVMValueRef base_ptr,
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2006,18 +2006,23 @@ static void
 visit_store_var(struct ac_nir_context *ctx,
 		nir_intrinsic_instr *instr)
 {
-        nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+	nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+	nir_variable *var = nir_deref_instr_get_variable(deref);

 	LLVMValueRef temp_ptr, value;
-	int idx = var->data.driver_location;
-	unsigned comp = var->data.location_frac;
+	int idx = 0;
+	unsigned comp = 0;
 	LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1]));
 	int writemask = instr->const_index[0];
 	LLVMValueRef indir_index;
 	unsigned const_index;

-	get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), false,
-	                 NULL, NULL, &const_index, &indir_index);
+	if (var) {
+		get_deref_offset(ctx, deref, false,
+		                 NULL, NULL, &const_index, &indir_index);
+		idx = var->data.driver_location;
+		comp = var->data.location_frac;
+	}

 	if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {

@@ -2030,7 +2035,7 @@ visit_store_var(struct ac_nir_context *ctx,

 	writemask = writemask << comp;

-	switch (var->data.mode) {
+	switch (deref->mode) {
 	case nir_var_shader_out:

 		if (ctx->stage == MESA_SHADER_TESS_CTRL) {
@@ -2039,8 +2044,8 @@ visit_store_var(struct ac_nir_context *ctx,
 			unsigned const_index = 0;
 			const bool is_patch = var->data.patch;

-			get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
-			                 false, NULL, is_patch ? NULL : &vertex_index,
+			get_deref_offset(ctx, deref, false, NULL,
+			                 is_patch ? NULL : &vertex_index,
 			                 &const_index, &indir_index);

 			ctx->abi->store_tcs_outputs(ctx->abi, var,
@@ -3818,6 +3823,73 @@ static void visit_jump(struct ac_llvm_context *ctx,
 	}
 }

+static LLVMTypeRef
+glsl_base_to_llvm_type(struct ac_llvm_context *ac,
+		       enum glsl_base_type type)
+{
+	switch (type) {
+	case GLSL_TYPE_INT:
+	case GLSL_TYPE_UINT:
+	case GLSL_TYPE_BOOL:
+	case GLSL_TYPE_SUBROUTINE:
+		return ac->i32;
+	case GLSL_TYPE_INT16:
+	case GLSL_TYPE_UINT16:
+		return ac->i16;
+	case GLSL_TYPE_FLOAT:
+		return ac->f32;
+	case GLSL_TYPE_FLOAT16:
+		return ac->f16;
+	case GLSL_TYPE_INT64:
+	case GLSL_TYPE_UINT64:
+		return ac->i64;
+	case GLSL_TYPE_DOUBLE:
+		return ac->f64;
+	default:
+		unreachable("unknown GLSL type");
+	}
+}
+
+static LLVMTypeRef
+glsl_to_llvm_type(struct ac_llvm_context *ac,
+		  const struct glsl_type *type)
+{
+	if (glsl_type_is_scalar(type)) {
+		return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
+	}
+
+	if (glsl_type_is_vector(type)) {
+		return LLVMVectorType(
+		   glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
+		   glsl_get_vector_elements(type));
+	}
+
+	if (glsl_type_is_matrix(type)) {
+		return LLVMArrayType(
+		   glsl_to_llvm_type(ac, glsl_get_column_type(type)),
+		   glsl_get_matrix_columns(type));
+	}
+
+	if (glsl_type_is_array(type)) {
+		return LLVMArrayType(
+		   glsl_to_llvm_type(ac, glsl_get_array_element(type)),
+		   glsl_get_length(type));
+	}
+
+	assert(glsl_type_is_struct(type));
+
+	LLVMTypeRef member_types[glsl_get_length(type)];
+
+	for (unsigned i = 0; i < glsl_get_length(type); i++) {
+		member_types[i] =
+			glsl_to_llvm_type(ac,
+					  glsl_get_struct_field(type, i));
+	}
+
+	return LLVMStructTypeInContext(ac->context, member_types,
+				       glsl_get_length(type), false);
+}
+
 static void visit_deref(struct ac_nir_context *ctx,
                        nir_deref_instr *instr)
 {
@@ -3839,9 +3911,27 @@ static void visit_deref(struct ac_nir_context *ctx,
 		result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
 		                       get_src(ctx, instr->arr.index));
 		break;
-	case nir_deref_type_cast:
-		result = get_src(ctx, instr->parent);
+	case nir_deref_type_ptr_as_array:
+		result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
+		                          get_src(ctx, instr->arr.index));
 		break;
+	case nir_deref_type_cast: {
+		result = get_src(ctx, instr->parent);
+
+		LLVMTypeRef pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
+		LLVMTypeRef type = LLVMPointerType(pointee_type, AC_ADDR_SPACE_LDS);
+
+		if (LLVMTypeOf(result) != type) {
+			if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
+				result = LLVMBuildBitCast(ctx->ac.builder, result,
+				                          type, "");
+			} else {
+				result = LLVMBuildIntToPtr(ctx->ac.builder, result,
+				                           type, "");
+			}
+		}
+		break;
+	}
 	default:
 		unreachable("Unhandled deref_instr deref type");
 	}
@@ -3990,73 +4080,6 @@ ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
 	}
 }

-static LLVMTypeRef
-glsl_base_to_llvm_type(struct ac_llvm_context *ac,
-		       enum glsl_base_type type)
-{
-	switch (type) {
-	case GLSL_TYPE_INT:
-	case GLSL_TYPE_UINT:
-	case GLSL_TYPE_BOOL:
-	case GLSL_TYPE_SUBROUTINE:
-		return ac->i32;
-	case GLSL_TYPE_INT16:
-	case GLSL_TYPE_UINT16:
-		return ac->i16;
-	case GLSL_TYPE_FLOAT:
-		return ac->f32;
-	case GLSL_TYPE_FLOAT16:
-		return ac->f16;
-	case GLSL_TYPE_INT64:
-	case GLSL_TYPE_UINT64:
-		return ac->i64;
-	case GLSL_TYPE_DOUBLE:
-		return ac->f64;
-	default:
-		unreachable("unknown GLSL type");
-	}
-}
-
-static LLVMTypeRef
-glsl_to_llvm_type(struct ac_llvm_context *ac,
-		  const struct glsl_type *type)
-{
-	if (glsl_type_is_scalar(type)) {
-		return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
-	}
-
-	if (glsl_type_is_vector(type)) {
-		return LLVMVectorType(
-		   glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
-		   glsl_get_vector_elements(type));
-	}
-
-	if (glsl_type_is_matrix(type)) {
-		return LLVMArrayType(
-		   glsl_to_llvm_type(ac, glsl_get_column_type(type)),
-		   glsl_get_matrix_columns(type));
-	}
-
-	if (glsl_type_is_array(type)) {
-		return LLVMArrayType(
-		   glsl_to_llvm_type(ac, glsl_get_array_element(type)),
-		   glsl_get_length(type));
-	}
-
-	assert(glsl_type_is_struct(type));
-
-	LLVMTypeRef member_types[glsl_get_length(type)];
-
-	for (unsigned i = 0; i < glsl_get_length(type); i++) {
-		member_types[i] =
-			glsl_to_llvm_type(ac,
-					  glsl_get_struct_field(type, i));
-	}
-
-	return LLVMStructTypeInContext(ac->context, member_types,
-				       glsl_get_length(type), false);
-}
-
 static void
 setup_locals(struct ac_nir_context *ctx,
 	     struct nir_function *func)
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -159,7 +159,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
                NIR_PASS(progress, shader, nir_opt_if);
                NIR_PASS(progress, shader, nir_opt_dead_cf);
                NIR_PASS(progress, shader, nir_opt_cse);
-                NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
+                NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true);
                NIR_PASS(progress, shader, nir_opt_algebraic);
                NIR_PASS(progress, shader, nir_opt_constant_folding);
                NIR_PASS(progress, shader, nir_opt_undef);
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -101,7 +101,7 @@ gather_intrinsic_load_deref_info(const nir_shader *nir,
 	case MESA_SHADER_VERTEX: {
 		nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));

-		if (var->data.mode == nir_var_shader_in) {
+		if (var && var->data.mode == nir_var_shader_in) {
 			unsigned idx = var->data.location;
 			uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa);

@@ -150,7 +150,7 @@ gather_intrinsic_store_deref_info(const nir_shader *nir,
 {
 	nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));

-	if (var->data.mode == nir_var_shader_out) {
+	if (var && var->data.mode == nir_var_shader_out) {
 		unsigned idx = var->data.location;

 		switch (nir->info.stage) {
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -543,7 +543,7 @@ static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
 	cs->handles[cs->num_buffers].bo_handle = bo;
 	cs->handles[cs->num_buffers].bo_priority = priority;

-	hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
+	hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
 	cs->buffer_hash_table[hash] = cs->num_buffers;

 	++cs->num_buffers;
--- a/src/broadcom/common/v3d_cpu_tiling.h
+++ b/src/broadcom/common/v3d_cpu_tiling.h
@@ -159,9 +159,8 @@ v3d_store_utile(void *gpu, uint32_t gpu_stride,
                         * d0-d7.
                         */
                        "vstm %[gpu], {q0, q1, q2, q3}\n"
-                        :
+                        : [cpu]         "+r"(cpu)
                        : [gpu]         "r"(gpu),
-                          [cpu]         "r"(cpu),
                          [cpu_stride]  "r"(cpu_stride)
                        : "q0", "q1", "q2", "q3");
                return;
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1455,7 +1455,7 @@ v3d_optimize_nir(struct nir_shader *s)
                NIR_PASS(progress, s, nir_opt_dce);
                NIR_PASS(progress, s, nir_opt_dead_cf);
                NIR_PASS(progress, s, nir_opt_cse);
-                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
+                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
                NIR_PASS(progress, s, nir_opt_algebraic);
                NIR_PASS(progress, s, nir_opt_constant_folding);
                NIR_PASS(progress, s, nir_opt_undef);
--- a/src/broadcom/compiler/v3d_nir_lower_image_load_store.c
+++ b/src/broadcom/compiler/v3d_nir_lower_image_load_store.c
@@ -156,7 +156,7 @@ pack_sint(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
          int num_components)
 {
        color = nir_channels(b, color, (1 << num_components) - 1);
-        color = nir_format_clamp_uint(b, color, bits);
+        color = nir_format_clamp_sint(b, color, bits);
        return pack_bits(b, color, bits, num_components, true);
 }

--- a/src/compiler/Android.glsl.gen.mk
+++ b/src/compiler/Android.glsl.gen.mk
@@ -104,6 +104,6 @@ $(intermediates)/glsl/ir_expression_operation_strings.h: $(LOCAL_PATH)/glsl/ir_e
 	@mkdir -p $(dir $@)
 	$(hide) $(MESA_PYTHON2) $< strings > $@

-$(intermediates)/compiler/glsl/float64_glsl.h: $(LOCAL_PATH)/glsl/xxd.py
+$(intermediates)/glsl/float64_glsl.h: $(LOCAL_PATH)/glsl/xxd.py
 	@mkdir -p $(dir $@)
 	$(hide) $(MESA_PYTHON2) $< $(MESA_TOP)/src/compiler/glsl/float64.glsl $@ -n float64_source > $@
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2825,7 +2825,7 @@ should_print_nir(void)
 static inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; }
 static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
 static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
-static inline bool should_skip_nir(const char *pass_name) { return false; }
+static inline bool should_skip_nir(UNUSED const char *pass_name) { return false; }
 static inline bool should_clone_nir(void) { return false; }
 static inline bool should_serialize_deserialize_nir(void) { return false; }
 static inline bool should_print_nir(void) { return false; }
@@ -3316,7 +3316,7 @@ bool nir_opt_move_comparisons(nir_shader *shader);
 bool nir_opt_move_load_ubo(nir_shader *shader);

 bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
-                             bool indirect_load_ok, bool expensive_alu_ok);
+                             bool indirect_load_ok);

 bool nir_opt_remove_phis(nir_shader *shader);

--- a/src/compiler/nir/nir_deref.c
+++ b/src/compiler/nir/nir_deref.c
@@ -574,10 +574,9 @@ nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
         _mesa_hash_table_clear(state.cache, NULL);

      nir_foreach_instr_safe(instr, block) {
-         if (instr->type == nir_instr_type_deref) {
-            nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr));
+         if (instr->type == nir_instr_type_deref &&
+             nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
            continue;
-         }

         state.builder.cursor = nir_before_instr(instr);
         nir_foreach_src(instr, rematerialize_deref_src, &state);
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -59,8 +59,7 @@

 static bool
 block_check_for_allowed_instrs(nir_block *block, unsigned *count,
-                               bool alu_ok, bool indirect_load_ok,
-                               bool expensive_alu_ok)
+                               bool alu_ok, bool indirect_load_ok)
 {
   nir_foreach_instr(instr, block) {
      switch (instr->type) {
@@ -118,25 +117,6 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,
         case nir_op_vec3:
         case nir_op_vec4:
            break;
-
-         case nir_op_fcos:
-         case nir_op_fdiv:
-         case nir_op_fexp2:
-         case nir_op_flog2:
-         case nir_op_fmod:
-         case nir_op_fpow:
-         case nir_op_frcp:
-         case nir_op_frem:
-         case nir_op_frsq:
-         case nir_op_fsin:
-         case nir_op_idiv:
-         case nir_op_irem:
-         case nir_op_udiv:
-            if (!alu_ok || !expensive_alu_ok)
-               return false;
-
-            break;
-
         default:
            if (!alu_ok) {
               /* It must be a move-like operation. */
@@ -180,8 +160,7 @@ block_check_for_allowed_instrs(nir_block *block, unsigned *count,

 static bool
 nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
-                              unsigned limit, bool indirect_load_ok,
-                              bool expensive_alu_ok)
+                              unsigned limit, bool indirect_load_ok)
 {
   if (nir_cf_node_is_first(&block->cf_node))
      return false;
@@ -202,9 +181,9 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,
   /* ... and those blocks must only contain "allowed" instructions. */
   unsigned count = 0;
   if (!block_check_for_allowed_instrs(then_block, &count, limit != 0,
-                                       indirect_load_ok, expensive_alu_ok) ||
+                                       indirect_load_ok) ||
       !block_check_for_allowed_instrs(else_block, &count, limit != 0,
-                                       indirect_load_ok, expensive_alu_ok))
+                                       indirect_load_ok))
      return false;

   if (count > limit)
@@ -271,15 +250,14 @@ nir_opt_peephole_select_block(nir_block *block, nir_shader *shader,

 static bool
 nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,
-                             bool indirect_load_ok, bool expensive_alu_ok)
+                             bool indirect_load_ok)
 {
   nir_shader *shader = impl->function->shader;
   bool progress = false;

   nir_foreach_block_safe(block, impl) {
      progress |= nir_opt_peephole_select_block(block, shader, limit,
-                                                indirect_load_ok,
-                                                expensive_alu_ok);
+                                                indirect_load_ok);
   }

   if (progress) {
@@ -295,15 +273,14 @@ nir_opt_peephole_select_impl(nir_function_impl *impl, unsigned limit,

 bool
 nir_opt_peephole_select(nir_shader *shader, unsigned limit,
-                        bool indirect_load_ok, bool expensive_alu_ok)
+                        bool indirect_load_ok)
 {
   bool progress = false;

   nir_foreach_function(function, shader) {
      if (function->impl)
         progress |= nir_opt_peephole_select_impl(function->impl, limit,
-                                                  indirect_load_ok,
-                                                  expensive_alu_ok);
+                                                  indirect_load_ok);
   }

   return progress;
--- a/src/freedreno/Makefile.am
+++ b/src/freedreno/Makefile.am
@@ -45,6 +45,7 @@ TESTS =
 BUILT_SOURCES =
 CLEANFILES =
 EXTRA_DIST = \
+	meson.build \
 	drm/meson.build \
 	ir3/ir3_nir_trig.py \
 	ir3/meson.build
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -97,7 +97,7 @@ ir3_optimize_loop(nir_shader *s)
 			progress |= OPT(s, nir_opt_gcm, true);
 		else if (gcm == 2)
 			progress |= OPT(s, nir_opt_gcm, false);
-		progress |= OPT(s, nir_opt_peephole_select, 16, true, true);
+		progress |= OPT(s, nir_opt_peephole_select, 16, true);
 		progress |= OPT(s, nir_opt_intrinsics);
 		progress |= OPT(s, nir_opt_algebraic);
 		progress |= OPT(s, nir_opt_constant_folding);
--- a/src/gallium/drivers/freedreno/Makefile.am
+++ b/src/gallium/drivers/freedreno/Makefile.am
@@ -23,4 +23,6 @@ libfreedreno_la_SOURCES = \
 	$(a6xx_SOURCES) \
 	$(ir3_SOURCES)

-EXTRA_DIST = meson.build
+EXTRA_DIST = \
+	ir3/ir3_cmdline.c \
+	meson.build
--- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
@@ -339,7 +339,6 @@ clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring,
 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 	OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
 	OUT_RINGP(ring, patch_type, &batch->gmem_patches);
-	OUT_RING(ring, 0);

 	OUT_PKT3(ring, CP_SET_CONSTANT, 4);
 	OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
@@ -74,7 +74,7 @@ ir2_optimize_loop(nir_shader *s)
 		progress |= OPT(s, nir_opt_dce);
 		progress |= OPT(s, nir_opt_cse);
 		/* progress |= OPT(s, nir_opt_gcm, true); */
-		progress |= OPT(s, nir_opt_peephole_select, UINT_MAX, true, true);
+		progress |= OPT(s, nir_opt_peephole_select, UINT_MAX, true);
 		progress |= OPT(s, nir_opt_intrinsics);
 		progress |= OPT(s, nir_opt_algebraic);
 		progress |= OPT(s, nir_opt_constant_folding);
--- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c
@@ -438,7 +438,7 @@ emit_blit_texture(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
 		OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |
 				 A6XX_RB_2D_DST_INFO_TILE_MODE(dtile) |
 				 A6XX_RB_2D_DST_INFO_COLOR_SWAP(dswap));
-		OUT_RELOC(ring, dst->bo, doff, 0, 0);    /* RB_2D_DST_LO/HI */
+		OUT_RELOCW(ring, dst->bo, doff, 0, 0);    /* RB_2D_DST_LO/HI */
 		OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(dpitch));
 		OUT_RING(ring, 0x00000000);
 		OUT_RING(ring, 0x00000000);
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1063,22 +1063,6 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
      }
   }

-   if (chipset >= NVISA_GK104_CHIPSET) {
-      //
-      // If TEX requires more than 4 sources, the 2nd register tuple must be
-      // aligned to 4, even if it consists of just a single 4-byte register.
-      //
-      // XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case.
-      //
-      int s = i->srcCount(0xff, true);
-      if (s > 4 && s < 7) {
-         if (i->srcExists(s)) // move potential predicate out of the way
-            i->moveSources(s, 7 - s);
-         while (s < 7)
-            i->setSrc(s++, bld.loadImm(NULL, 0));
-      }
-   }
-
   return true;
 }

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -2341,9 +2341,19 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
            if (!tex->tex.target.isArray() && tex->tex.useOffsets)
               s++;
         }
-         n = tex->srcCount(0xff) - s;
+         n = tex->srcCount(0xff, true) - s;
+         // TODO: Is this necessary? Perhaps just has to be aligned to the
+         // level that the first arg is, not necessarily to 4. This
+         // requirement has not been rigorously verified, as it has been on
+         // Kepler.
+         if (n > 0 && n < 3) {
+            if (tex->srcExists(n + s)) // move potential predicate out of the way
+               tex->moveSources(n + s, 3 - n);
+            while (n < 3)
+               tex->setSrc(s + n++, new_LValue(func, FILE_GPR));
+         }
      } else {
-         s = tex->srcCount(0xff);
+         s = tex->srcCount(0xff, true);
         n = 0;
      }

@@ -2366,14 +2376,18 @@ RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
   } else
   if (isTextureOp(tex->op)) {
      int n = tex->srcCount(0xff, true);
-      if (n > 4) {
-         condenseSrcs(tex, 0, 3);
-         if (n > 5) // NOTE: first call modified positions already
-            condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
-      } else
-      if (n > 1) {
-         condenseSrcs(tex, 0, n - 1);
+      int s = n > 4 ? 4 : n;
+      if (n > 4 && n < 7) {
+         if (tex->srcExists(n)) // move potential predicate out of the way
+            tex->moveSources(n, 7 - n);
+
+         while (n < 7)
+            tex->setSrc(n++, new_LValue(func, FILE_GPR));
      }
+      if (s > 1)
+         condenseSrcs(tex, 0, s - 1);
+      if (n > 4)
+         condenseSrcs(tex, 1, n - s);
   }
 }

@@ -2510,6 +2524,7 @@ RegAlloc::InsertConstraintsPass::insertConstraintMove(Instruction *cst, int s)
   assert(cst->getSrc(s)->defs.size() == 1); // still SSA

   Instruction *defi = cst->getSrc(s)->defs.front()->getInsn();
+
   bool imm = defi->op == OP_MOV &&
      defi->src(0).getFile() == FILE_IMMEDIATE;
   bool load = defi->op == OP_LOAD &&
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -1333,7 +1333,7 @@ void si_init_perfcounters(struct si_screen *screen)
 	for (i = 0; i < num_blocks; ++i) {
 		struct si_pc_block *block = &pc->blocks[i];
 		block->b = &blocks[i];
-		block->num_instances = block->b->instances;
+		block->num_instances = MAX2(1, block->b->instances);

 		if (!strcmp(block->b->b->name, "CB") ||
 		    !strcmp(block->b->b->name, "DB"))
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -834,7 +834,7 @@ si_lower_nir(struct si_shader_selector* sel)
 		NIR_PASS(progress, sel->nir, nir_opt_if);
 		NIR_PASS(progress, sel->nir, nir_opt_dead_cf);
 		NIR_PASS(progress, sel->nir, nir_opt_cse);
-		NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true, true);
+		NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8, true);

 		/* Needed for algebraic lowering */
 		NIR_PASS(progress, sel->nir, nir_opt_algebraic);
--- a/src/gallium/drivers/v3d/v3d_resource.c
+++ b/src/gallium/drivers/v3d/v3d_resource.c
@@ -780,7 +780,7 @@ v3d_resource_create_with_modifiers(struct pipe_screen *pscreen,
                rsc->tiled = false;
        } else {
                fprintf(stderr, "Unsupported modifier requested\n");
-                return NULL;
+                goto fail;
        }

        rsc->internal_format = prsc->format;
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1591,7 +1591,7 @@ vc4_optimize_nir(struct nir_shader *s)
                NIR_PASS(progress, s, nir_opt_dce);
                NIR_PASS(progress, s, nir_opt_dead_cf);
                NIR_PASS(progress, s, nir_opt_cse);
-                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
+                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
                NIR_PASS(progress, s, nir_opt_algebraic);
                NIR_PASS(progress, s, nir_opt_constant_folding);
                NIR_PASS(progress, s, nir_opt_undef);
--- a/src/gallium/drivers/vc4/vc4_query.c
+++ b/src/gallium/drivers/vc4/vc4_query.c
@@ -132,7 +132,7 @@ vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries,

        /* We can't mix HW and non-HW queries. */
        if (nhwqueries && nhwqueries != num_queries)
-                return NULL;
+                goto err_free_query;

        if (!nhwqueries)
                return (struct pipe_query *)query;
--- a/src/gallium/state_trackers/xvmc/attributes.c
+++ b/src/gallium/state_trackers/xvmc/attributes.c
@@ -90,15 +90,15 @@ Status XvMCSetAttribute(Display *dpy, XvMCContext *context, Atom attribute, int
   if (!attr)
      return XvMCBadContext;

-   if (strcmp(attr, XV_BRIGHTNESS))
+   if (strcmp(attr, XV_BRIGHTNESS) == 0)
      context_priv->procamp.brightness = value / 1000.0f;
-   else if (strcmp(attr, XV_CONTRAST))
+   else if (strcmp(attr, XV_CONTRAST) == 0)
      context_priv->procamp.contrast = value / 1000.0f + 1.0f;
-   else if (strcmp(attr, XV_SATURATION))
+   else if (strcmp(attr, XV_SATURATION) == 0)
      context_priv->procamp.saturation = value / 1000.0f + 1.0f;
-   else if (strcmp(attr, XV_HUE))
+   else if (strcmp(attr, XV_HUE) == 0)
      context_priv->procamp.hue = value / 1000.0f;
-   else if (strcmp(attr, XV_COLORSPACE))
+   else if (strcmp(attr, XV_COLORSPACE) == 0)
      context_priv->color_standard = value ?
         VL_CSC_COLOR_STANDARD_BT_601 :
         VL_CSC_COLOR_STANDARD_BT_709;
@@ -134,15 +134,15 @@ Status XvMCGetAttribute(Display *dpy, XvMCContext *context, Atom attribute, int
   if (!attr)
      return XvMCBadContext;

-   if (strcmp(attr, XV_BRIGHTNESS))
+   if (strcmp(attr, XV_BRIGHTNESS) == 0)
      *value = context_priv->procamp.brightness * 1000;
-   else if (strcmp(attr, XV_CONTRAST))
+   else if (strcmp(attr, XV_CONTRAST) == 0)
      *value = context_priv->procamp.contrast * 1000 - 1000;
-   else if (strcmp(attr, XV_SATURATION))
+   else if (strcmp(attr, XV_SATURATION) == 0)
      *value = context_priv->procamp.saturation * 1000 + 1000;
-   else if (strcmp(attr, XV_HUE))
+   else if (strcmp(attr, XV_HUE) == 0)
      *value = context_priv->procamp.hue * 1000;
-   else if (strcmp(attr, XV_COLORSPACE))
+   else if (strcmp(attr, XV_COLORSPACE) == 0)
      *value = context_priv->color_standard == VL_CSC_COLOR_STANDARD_BT_709;
   else
      return BadName;
--- a/src/gallium/state_trackers/xvmc/tests/xvmc_bench.c
+++ b/src/gallium/state_trackers/xvmc/tests/xvmc_bench.c
@@ -123,11 +123,11 @@ void ParseArgs(int argc, char **argv, struct Config *config)

 			while (token && !fail)
 			{
-				if (strcmp(token, "i"))
+				if (strcmp(token, "i") == 0)
 					config->mb_types |= MB_TYPE_I;
-				else if (strcmp(token, "p"))
+				else if (strcmp(token, "p") == 0)
 					config->mb_types |= MB_TYPE_P;
-				else if (strcmp(token, "b"))
+				else if (strcmp(token, "b") == 0)
 					config->mb_types |= MB_TYPE_B;
 				else
 					fail = 1;
--- a/src/intel/Makefile.vulkan.am
+++ b/src/intel/Makefile.vulkan.am
@@ -253,6 +253,7 @@ VULKAN_TESTS = \
 	vulkan/tests/block_pool_no_free \
 	vulkan/tests/state_pool_no_free \
 	vulkan/tests/state_pool_free_list_only \
+	vulkan/tests/state_pool_padding \
 	vulkan/tests/state_pool

 VULKAN_TEST_LDADD = \
@@ -274,6 +275,10 @@ vulkan_tests_state_pool_free_list_only_CFLAGS = $(VULKAN_CFLAGS)
 vulkan_tests_state_pool_free_list_only_CPPFLAGS = $(VULKAN_CPPFLAGS)
 vulkan_tests_state_pool_free_list_only_LDADD = $(VULKAN_TEST_LDADD)

+vulkan_tests_state_pool_padding_CFLAGS = $(VULKAN_CFLAGS)
+vulkan_tests_state_pool_padding_CPPFLAGS = $(VULKAN_CPPFLAGS)
+vulkan_tests_state_pool_padding_LDADD = $(VULKAN_TEST_LDADD)
+
 vulkan_tests_state_pool_CFLAGS = $(VULKAN_CFLAGS)
 vulkan_tests_state_pool_CPPFLAGS = $(VULKAN_CPPFLAGS)
 vulkan_tests_state_pool_LDADD = $(VULKAN_TEST_LDADD)
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp
@@ -667,15 +667,14 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
       * messages adding a node interference to the grf127_send_hack_node.
       * This node has a fixed asignment to grf127.
       *
-       * We don't apply it to SIMD16 because previous code avoids any register
-       * overlap between sources and destination.
+       * We don't apply it to SIMD16 instructions because previous code avoids
+       * any register overlap between sources and destination.
       */
      ra_set_node_reg(g, grf127_send_hack_node, 127);
-      if (dispatch_width == 8) {
-         foreach_block_and_inst(block, fs_inst, inst, cfg) {
-            if (inst->is_send_from_grf() && inst->dst.file == VGRF)
-               ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node);
-         }
+      foreach_block_and_inst(block, fs_inst, inst, cfg) {
+         if (inst->exec_size < 16 && inst->is_send_from_grf() &&
+             inst->dst.file == VGRF)
+            ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node);
      }

      if (spilled_any_registers) {
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -570,18 +570,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
      OPT(nir_opt_dce);
      OPT(nir_opt_cse);

-      /* Passing 0 to the peephole select pass causes it to convert
-       * if-statements that contain only move instructions in the branches
-       * regardless of the count.
-       *
-       * Passing 1 to the peephole select pass causes it to convert
-       * if-statements that contain at most a single ALU instruction (total)
-       * in both branches.  Before Gen6, some math instructions were
-       * prohibitively expensive and the results of compare operations need an
-       * extra resolve step.  For these reasons, this pass is more harmful
-       * than good on those platforms.
-       *
-       * For indirect loads of uniforms (push constants), we assume that array
+      /* For indirect loads of uniforms (push constants), we assume that array
       * indices will nearly always be in bounds and the cost of the load is
       * low.  Therefore there shouldn't be a performance benefit to avoid it.
       * However, in vec4 tessellation shaders, these loads operate by
@@ -590,9 +579,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
      const bool is_vec4_tessellation = !is_scalar &&
         (nir->info.stage == MESA_SHADER_TESS_CTRL ||
          nir->info.stage == MESA_SHADER_TESS_EVAL);
-      OPT(nir_opt_peephole_select, 0, !is_vec4_tessellation, false);
-      OPT(nir_opt_peephole_select, 1, !is_vec4_tessellation,
-          compiler->devinfo->gen >= 6);
+      OPT(nir_opt_peephole_select, 0, !is_vec4_tessellation);

      OPT(nir_opt_intrinsics);
      OPT(nir_opt_idiv_const, 32);
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1211,13 +1211,30 @@ emit_3dstate_streamout(struct anv_pipeline *pipeline,
            hole_dwords -= 4;
         }

+         int varying = output->location;
+         uint8_t component_mask = output->component_mask;
+         /* VARYING_SLOT_PSIZ contains three scalar fields packed together:
+          * - VARYING_SLOT_LAYER    in VARYING_SLOT_PSIZ.y
+          * - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z
+          * - VARYING_SLOT_PSIZ     in VARYING_SLOT_PSIZ.w
+          */
+         if (varying == VARYING_SLOT_LAYER) {
+            varying = VARYING_SLOT_PSIZ;
+            component_mask = 1 << 1; // SO_DECL_COMPMASK_Y
+         } else if (varying == VARYING_SLOT_VIEWPORT) {
+            varying = VARYING_SLOT_PSIZ;
+            component_mask = 1 << 2; // SO_DECL_COMPMASK_Z
+         } else if (varying == VARYING_SLOT_PSIZ) {
+            component_mask = 1 << 3; // SO_DECL_COMPMASK_W
+         }
+
         next_offset[buffer] = output->offset +
-                               __builtin_popcount(output->component_mask) * 4;
+                               __builtin_popcount(component_mask) * 4;

         so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
            .OutputBufferSlot = buffer,
-            .RegisterIndex = vue_map->varying_to_slot[output->location],
-            .ComponentMask = output->component_mask,
+            .RegisterIndex = vue_map->varying_to_slot[varying],
+            .ComponentMask = component_mask,
         };
      }

--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -111,7 +111,7 @@ set_adaptive_sync_property(xcb_connection_t *conn, xcb_drawable_t drawable,
   xcb_intern_atom_reply_t* reply;
   xcb_void_cookie_t check;

-   cookie = xcb_intern_atom(conn, 0, sizeof(name), name);
+   cookie = xcb_intern_atom(conn, 0, strlen(name), name);
   reply = xcb_intern_atom_reply(conn, cookie, NULL);
   if (reply == NULL)
      return;
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -34,6 +34,8 @@ AM_CFLAGS = \
 	-I$(top_builddir)/src/util \
 	-I$(top_srcdir)/src/mesa/drivers/dri/common \
 	-I$(top_srcdir)/src/gtest/include \
+	-I$(top_builddir)/src/compiler \
+	-I$(top_srcdir)/src/compiler \
 	-I$(top_builddir)/src/compiler/glsl \
 	-I$(top_builddir)/src/compiler/nir \
 	-I$(top_srcdir)/src/compiler/nir \
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -42,7 +42,7 @@
 #include "compiler/glsl/ir.h"
 #include "compiler/glsl/program.h"
 #include "compiler/glsl/glsl_to_nir.h"
-#include "compiler/glsl/float64_glsl.h"
+#include "glsl/float64_glsl.h"

 #include "brw_program.h"
 #include "brw_context.h"
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -4691,6 +4691,29 @@ discard_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
      if (!att)
         continue;

+      /* If we're asked to invalidate just depth or just stencil, but the
+       * attachment is packed depth/stencil, then we can only use
+       * Driver.DiscardFramebuffer if the attachments list includes both depth
+       * and stencil and they both point at the same renderbuffer.
+       */
+      if ((attachments[i] == GL_DEPTH_ATTACHMENT ||
+           attachments[i] == GL_STENCIL_ATTACHMENT) &&
+          (!att->Renderbuffer ||
+           att->Renderbuffer->_BaseFormat == GL_DEPTH_STENCIL)) {
+         GLenum other_format = (attachments[i] == GL_DEPTH_ATTACHMENT ?
+                                GL_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT);
+         bool has_both = false;
+         for (int j = 0; j < numAttachments; j++) {
+            if (attachments[j] == other_format)
+               has_both = true;
+            break;
+         }
+
+         if (fb->Attachment[BUFFER_DEPTH].Renderbuffer !=
+             fb->Attachment[BUFFER_STENCIL].Renderbuffer || !has_both)
+            continue;
+      }
+
      ctx->Driver.DiscardFramebuffer(ctx, fb, att);
   }
 }
--- a/src/mesa/state_tracker/st_cb_rasterpos.c
+++ b/src/mesa/state_tracker/st_cb_rasterpos.c
@@ -208,6 +208,10 @@ new_draw_rastpos_stage(struct gl_context *ctx, struct draw_context *draw)
   rs->prim.end = 1;
   rs->prim.start = 0;
   rs->prim.count = 1;
+   rs->prim.pad = 0;
+   rs->prim.num_instances = 1;
+   rs->prim.base_instance = 0;
+   rs->prim.is_indirect = 0;

   return rs;
 }
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -327,7 +327,7 @@ st_nir_opts(nir_shader *nir, bool scalar)
      NIR_PASS(progress, nir, nir_opt_if);
      NIR_PASS(progress, nir, nir_opt_dead_cf);
      NIR_PASS(progress, nir, nir_opt_cse);
-      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
+      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true);

      NIR_PASS(progress, nir, nir_opt_algebraic);
      NIR_PASS(progress, nir, nir_opt_constant_folding);
Author	SHA1	Message	Date
Dylan Baker	56a47e3421	Bump version for 19.0-rc3	2019-02-12 12:39:36 -08:00
Dylan Baker	ca36eb12fd	Revert "intel/compiler: More peephole select" This reverts commit `8fb8ebfbb0`.	2019-02-12 09:42:59 -08:00
Dylan Baker	9dd433dfa7	Revert "nir/opt_peephole_select: Don't peephole_select expensive math instructions" This reverts commit `378f996771`. This also remove the default true argument from the a2xx nir backend, which was introduced after this commit. There should be no change in functionality.	2019-02-12 09:42:16 -08:00
Dylan Baker	f59c77ef8c	Revert "intel/compiler: More peephole_select for pre-Gen6" This reverts commit `af07141b33`.	2019-02-11 16:26:01 -08:00
Dylan Baker	61c22ba94b	cherry-ignore: Add some patches	2019-02-11 16:24:42 -08:00
Jason Ekstrand	ad2b712a56	nir/deref: Rematerialize parents in rematerialize_derefs_in_use_blocks When nir_rematerialize_derefs_in_use_blocks_impl was first written, I attempted to optimize things a bit by not bothering to re-materialize the sources of deref instructions figuring that the final caller would take care of that. However, in the case of more complex deref chains where the first link or two lives in block A and then another link and the load/store_deref intrinsic live in block B it doesn't work. The code in rematerialize_deref_in_block looks at the tail of the chain, sees that it's already in block B and skips it, not realizing that part of the chain also lives in block A. The easy solution here is to just rematerialize deref sources of deref instructions as well. This may potentially lead to a few more deref instructions being created by the conditions required for that to actually happen are fairly unlikely and, thanks to the caching, it's all linear time regardless. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109603 Fixes: `7d1d1208c2` "nir: Add a small pass to rematerialize derefs per-block" Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> (cherry picked from commit `9e6a6ef0d4`)	2019-02-11 16:24:42 -08:00
Ian Romanick	07e299a0a0	nir: Silence zillions of unused parameter warnings in release builds Fixes: `cd56d79b59` "nir: check NIR_SKIP to skip passes by name" Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com> (cherry picked from commit `78169870e4`)	2019-02-11 09:07:09 -08:00
Ilia Mirkin	36d99d9ad0	nvc0/ir: fix second tex argument after levelZero optimization We used to pre-set a bunch of extra arguments to a texture instruction in order to force the RA to allocate a register at the boundary of 4. However with the levelZero optimization, which removes a LOD argument when it's uniformly equal to zero, we undid that logic by removing an extra argument. As a result, we could end up with insufficient alignment on the second wide texture argument. Instead we switch to a different method of achieving the same result. The logic runs during the constraint analysis of the RA, and adds unset sources as necessary right before being merged into a wide argument. Fixes MISALIGNED_REG errors in Hitman when run with bindless textures enabled on a GK208. Fixes: `9145873b15` ("nvc0/ir: use levelZero flag when the lod is set to 0") Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Cc: 19.0 <mesa-stable@lists.freedesktop.org> (cherry picked from commit `5de5beedf2`)	2019-02-07 09:51:39 -08:00
Kristian H. Kristensen	94f0908216	freedreno/a6xx: Emit blitter dst with OUT_RELOCW We're writing to the bo and the kernel needs to know for fd_bo_cpu_prep() to work. Fixes: `f93e431272` ("freedreno/a6xx: Enable blitter") Reviewed-by: Rob Clark <robdclark@gmail.com> Signed-off-by: Kristian H. Kristensen <hoegsberg@chromium.org> (cherry picked from commit `357ea7da51`)	2019-02-07 09:51:39 -08:00
Bas Nieuwenhuizen	f880c74717	amd/common: handle nir_deref_cast for shared memory from integers. Can happen e.g. after a phi. Fixes: `a2b5cc3c39` "radv: enable variable pointers" Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `8d1718590b`)	2019-02-07 09:51:39 -08:00
Bas Nieuwenhuizen	6f36d3bbc0	amd/common: Handle nir_deref_type_ptr_as_array for shared memory. Fixes: `a2b5cc3c39` "radv: enable variable pointers" Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `830fd0efc1`)	2019-02-07 09:51:39 -08:00
Bas Nieuwenhuizen	b4e8a3294c	amd/common: Add gep helper for pointer increment. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `e00d9a9a72`)	2019-02-07 09:51:39 -08:00
Bas Nieuwenhuizen	ef6809ba88	amd/common: Fix stores to derefs with unknown variable. Fixes: `a2b5cc3c39` "radv: enable variable pointers" Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `dbdb44d575`)	2019-02-07 09:38:23 -08:00
Bas Nieuwenhuizen	7254d2f4a3	radv: Fix the shader info pass for not having the variable. For example with VK_EXT_buffer_device_address or VK_KHR_variable_pointers. Fixes: `a2b5cc3c39` "radv: enable variable pointers" Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `00253ab2c4`)	2019-02-07 09:37:37 -08:00
Eric Engestrom	dbc43e3897	xvmc: fix string comparison Fixes: `6fca18696d` "g3dvl: Update XvMC unit tests." Cc: Younes Manton <younes.m@gmail.com> Signed-off-by: Eric Engestrom <eric.engestrom@intel.com> (cherry picked from commit `40b53a7203`)	2019-02-07 09:37:17 -08:00
Eric Engestrom	262fd16b99	xvmc: fix string comparison Fixes: `c7b65dcaff` "xvmc: Define some Xv attribs to allow users to specify color standard and procamp" Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Eric Engestrom <eric.engestrom@intel.com> (cherry picked from commit `110a6e1839`)	2019-02-07 09:37:07 -08:00
Jonathan Marek	452f9b9984	freedreno: a2xx: fix fast clear Fixes: `912a9c8d` Signed-off-by: Jonathan Marek <jonathan@marek.ca> Cc: 19.0 <mesa-stable@lists.freedesktop.org> (cherry picked from commit `3361305f57`)	2019-02-06 09:54:31 -08:00
Dylan Baker	131f12d49f	Version: Bump for rc2	2019-02-05 11:49:03 -08:00
Emil Velikov	f8f68c41a1	anv: wire up the state_pool_padding test Cc: Jason Ekstrand <jason@jlekstrand.net> Fixes: `927ba12b53` ("anv/tests: Adding test for the state_pool padding.") Signed-off-by: Emil Velikov <emil.velikov@collabora.com> Reviewed-by: Eric Engestrom <eric.engestrom@intel.com> Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com><Paste> Reviewed-by: Dylan Baker <dylan@pnwbakers.com> (cherry picked from commit `8943eb8f03`)	2019-02-05 11:41:54 -08:00
Michel Dänzer	15e2fc16e9	loader/dri3: Use strlen instead of sizeof for creating VRR property atom sizeof counts the terminating null character as well, so that also contributed to the ID computed for the X11 atom. But the convention is for only the non-null characters to contribute to the atom ID. Fixes: `2e12fe425f` "loader/dri3: Enable adaptive_sync via _VARIABLE_REFRESH property" Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com> Reviewed-by: Eric Anholt <eric@anholt.net> (cherry picked from commit `c0a540f320`)	2019-02-05 11:41:48 -08:00
Marek Olšák	3f5099180d	radeonsi: fix crashing performance counters (division by zero) Fixes: `e2b9329f17` "radeonsi: move remaining perfcounter code into si_perfcounter.c" (cherry picked from commit `742d6cdb42`)	2019-02-05 09:05:51 -08:00
Danylo Piliaiev	9667d89fe6	anv: Fix VK_EXT_transform_feedback working with varyings packed in PSIZ Transform feedback did not set correct SO_DECL.ComponentMask for varyings packed in VARYING_SLOT_PSIZ: gl_Layer - VARYING_SLOT_LAYER in VARYING_SLOT_PSIZ.y gl_ViewportIndex - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z gl_PointSize - VARYING_SLOT_PSIZ in VARYING_SLOT_PSIZ.w Fixes: `36ee2fd61c` "anv: Implement the basic form of VK_EXT_transform_feedback" Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> (cherry picked from commit `64d3b148fe`)	2019-02-04 09:16:37 -08:00
Jason Ekstrand	c6649ca94d	intel/fs: Do the grf127 hack on SIMD8 instructions in SIMD16 mode Previously, we only applied the fix to shaders with a dispatch mode of SIMD8 but the code it relies on for SIMD16 mode only applies to SIMD16 instructions. If you have a SIMD8 instruction in a SIMD16 shader, neither would trigger and the restriction could still be hit. Fixes: `232ed89802` "i965/fs: Register allocator shoudn't use grf127..." Reviewed-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (cherry picked from commit `b4f0d062cd`)	2019-02-04 09:16:21 -08:00
Neha Bhende	89f84f98e0	st/mesa: Fix topogun-1.06-orc-84k-resize.trace crash We need to initialize all fields in rs->prim explicitly while creating new rastpos stage. Fixes: `bac8534267` ("st/mesa: allow glDrawElements to work with GL_SELECT feedback") v2: Initializing all fields in rs->prim as per Ilia. Reviewed-by: Brian Paul <brianp@vmware.com> Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu> (cherry picked from commit `69d736b17a`)	2019-02-01 09:19:29 -08:00
Ernestas Kulik	c824f8031c	v3d: Fix leak in resource setup error path Reported by Coverity: in the case of unsupported modifier request, the code does not jump to the “fail” label to destroy the acquired resource. CID: 1435704 Signed-off-by: Ernestas Kulik <ernestas.kulik@gmail.com> Fixes: `45bb8f2957` ("broadcom: Add V3D 3.3 gallium driver called "vc5", for BCM7268.") (cherry picked from commit `90458bef54`)	2019-01-31 11:12:29 -08:00
Eric Anholt	7fdb08375f	v3d: Fix image_load_store clamping of signed integer stores. This was copy-and-paste fail, that oddly showed up in the CTS's reinterprets of r32f, rgba8, and srgba8 to rgba8i, but not r32ui and r32i to rgba8i or reinterprets to other signed int formats. Fixes: `6281f26f06` ("v3d: Add support for shader_image_load_store.") (cherry picked from commit `ab4d5775b0`)	2019-01-31 11:09:28 -08:00
Eric Anholt	535cc4f1d5	mesa: Skip partial InvalidateFramebuffer of packed depth/stencil. One of the CTS cases tries to invalidate just stencil of packed depth/stencil, and we incorrectly lost the depth contents. Fixes dEQP-GLES3.functional.fbo.invalidate.whole.unbind_read_stencil Fixes: `0c42b5f3cb` ("mesa: wire up InvalidateFramebuffer") Reviewed-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `db2ae51121`)	2019-01-31 11:09:05 -08:00
Rob Clark	7f91ae20b9	freedreno: more fixing release tarball Fixes: `aa0fed10d3` freedreno: move ir3 to common location Signed-off-by: Rob Clark <robdclark@gmail.com> (cherry picked from commit `39cfdf9930`)	2019-01-31 11:08:53 -08:00
Rob Clark	0a72505a9e	freedreno: fix release tarball Fixes: `b4476138d5` freedreno: move drm to common location Reviewed-by: Eric Engestrom <eric.engestrom@intel.com> Signed-off-by: Rob Clark <robdclark@gmail.com> (cherry picked from commit `e252656d14`)	2019-01-31 11:08:11 -08:00
Samuel Pitoiset	31d0079a20	radv/winsys: fix hash when adding internal buffers This fixes serious stuttering in Shadow Of The Tomb Raider. Fixes: `50fd253bd6` ("radv/winsys: Add priority handling during submit.") Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `9c762c01c8`)	2019-01-31 11:07:40 -08:00
Ernestas Kulik	4d1dd3b0cd	vc4: Fix leak in HW queries error path Reported by Coverity: in the case where there exist hardware and non-hardware queries, the code does not jump to err_free_query and leaks the query. CID: 1430194 Signed-off-by: Ernestas Kulik <ernestas.kulik@gmail.com> Fixes: `9ea90ffb98` ("broadcom/vc4: Add support for HW perfmon") (cherry picked from commit `f6e49d5ad0`)	2019-01-31 11:07:26 -08:00
Emil Velikov	45d1aa2f6c	vc4: Declare the last cpu pointer as being modified in NEON asm. Earlier commit addressed 7 of the 8 instances available. v2: Rebase patch back to master (by anholt) Cc: Carsten Haitzler (Rasterman) <raster@rasterman.com> Cc: Eric Anholt <eric@anholt.net> Fixes: `300d3ae8b1` ("vc4: Declare the cpu pointers as being modified in NEON asm.") Signed-off-by: Emil Velikov <emil.velikov@collabora.com> (cherry picked from commit `385843ac3c`)	2019-01-31 10:59:58 -08:00
Dylan Baker	2fddad9e3f	VERSION: bump to 19.0.0-rc1	2019-01-30 14:10:12 -08:00
Dylan Baker	2b603ee4f1	android,autotools,i965: Fix location of float64_glsl.h Android.mk and autotools disagree about where generated files should go, which wasn't a problem until we wanted to build a dist tarball. This corrects the problme by changing the output and include paths to be the same on android and autotools (meson already has the correct include path). Fixes: `7d7b30835c` ("automake: Fix path to generated source")	2019-01-30 14:10:12 -08:00
Dylan Baker	e7f6a5d17f	automake: Add --enable-autotools to distcheck flags Fixes: `e68777c87c` ("autotools: Deprecate the use of autotools")	2019-01-30 09:45:14 -08:00
Dylan Baker	1f5f12687f	configure: Bump SWR LLVM requirement to 7 It is currently impossible to build a dist tarball that works when SWR requires LLVM 6. To generate the tarball we'd need to configure with LLVM 6, which is fine. But to build the dist check we need LLVM 7, as RadeonSI and RadV require that version. Unfortunately the headers genererated with LLVM 6 don't compile with LLVM 7, the API has changed between the two versions. I weighed a couple of options here. One would be to ship an unbootstrapped tarball generated with meson. This would fix the issue by not bootstrapping, so whatever version of LLVM used would work because the SWR headers would be generated at compile time. Unfortunately this would involve some heavy modifications to the infastructure used to upload the tarballs, and I've decided not to persue this.	2019-01-30 09:27:14 -08:00