Update version to 18.2.0-rc4

Signed-off-by: Andres Gomez <agomez@igalia.com>
cherry-ignore: autotools: don't ship the git_sha1.h generated in git in the tarballs
2018-08-22 16:59:30 +03:00 · 2018-08-22 16:58:27 +03:00 · 2018-08-21 23:18:19 +03:00 · 2018-08-21 15:46:01 +03:00 · 2018-08-20 13:33:07 +03:00 · 2018-08-18 00:03:00 +03:00
24 changed files with 125 additions and 68 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-18.2.0-rc3
+18.2.0-rc4
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -0,0 +1,3 @@
+# fixes:  This commit has more than one Fixes tag but the commit it
+#         addresses didn't land in branch.
+6ff1c479968819b93c46d24bd898e89ce14ac401 autotools: don't ship the git_sha1.h generated in git in the tarballs
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -149,7 +149,8 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
 	char features[256];
 	const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
 	LLVMTargetRef target = ac_get_llvm_target(triple);
-	bool barrier_does_waitcnt = family != CHIP_VEGA20;
+	bool barrier_does_waitcnt = (tm_options & AC_TM_AUTO_WAITCNT_BEFORE_BARRIER) &&
+				    family != CHIP_VEGA20;

 	snprintf(features, sizeof(features),
 		 "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s%s",
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -65,6 +65,7 @@ enum ac_target_machine_options {
 	AC_TM_CHECK_IR = (1 << 5),
 	AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
 	AC_TM_CREATE_LOW_OPT = (1 << 7),
+	AC_TM_AUTO_WAITCNT_BEFORE_BARRIER = (1 << 8),
 };

 enum ac_float_mode {
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2307,6 +2307,7 @@ VkResult radv_BeginCommandBuffer(
 	cmd_buffer->state.last_num_instances = -1;
 	cmd_buffer->state.last_vertex_offset = -1;
 	cmd_buffer->state.last_first_instance = -1;
+	cmd_buffer->state.predication_type = -1;
 	cmd_buffer->usage_flags = pBeginInfo->flags;

 	/* setup initial configuration into command buffer */
@@ -4126,15 +4127,18 @@ static void radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer,

 	if (radv_image_has_dcc(image)) {
 		uint32_t value = 0xffffffffu; /* Fully expanded mode. */
+		bool need_decompress_pass = false;

 		if (radv_layout_dcc_compressed(image, dst_layout,
 					       dst_queue_mask)) {
 			value = 0x20202020u;
+			need_decompress_pass = true;
 		}

 		radv_initialize_dcc(cmd_buffer, image, value);

-		radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image, false);
+		radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image,
+						  need_decompress_pass);
 	}

 	if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) {
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -480,6 +480,9 @@ radv_handle_per_app_options(struct radv_instance *instance,
 			 */
 			instance->perftest_flags |= RADV_PERFTEST_SISCHED;
 		}
+	} else if (!strcmp(name, "DOOM_VFR")) {
+		/* Work around a Doom VFR game bug */
+		instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
 	}
 }

--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -603,7 +603,7 @@ radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
               pipeline = cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
 	}

-	if (radv_image_has_dcc(image)) {
+	if (!decompress_dcc && radv_image_has_dcc(image)) {
 		old_predicating = cmd_buffer->state.predicating;

 		radv_emit_set_predication_state_from_image(cmd_buffer, image, true);
@@ -671,7 +671,7 @@ radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
 					&cmd_buffer->pool->alloc);

 	}
-	if (radv_image_has_dcc(image)) {
+	if (!decompress_dcc && radv_image_has_dcc(image)) {
 		cmd_buffer->state.predicating = old_predicating;

 		radv_emit_set_predication_state_from_image(cmd_buffer, image, false);
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2006,7 +2006,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
 						MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
 				}
 			} else {
-				unreachable("Invalid vertex attribute divisor of 0.");
+				buffer_index = ctx->ac.i32_0;
 			}

 			buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.start_instance, buffer_index, "");
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -673,7 +673,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
 			if (!cs->num_buffers)
 				continue;

-			if (unique_bo_count == 0) {
+			if (unique_bo_count == 0 && !cs->num_virtual_buffers) {
 				memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle));
 				unique_bo_count = cs->num_buffers;
 				continue;
--- a/src/compiler/glsl/ir_constant_expression.cpp
+++ b/src/compiler/glsl/ir_constant_expression.cpp
@@ -826,7 +826,7 @@ ir_dereference_array::constant_expression_value(void *mem_ctx,
         const unsigned component = idx->value.u[0];

         return new(mem_ctx) ir_constant(array, component);
-      } else {
+      } else if (array->type->is_array()) {
         const unsigned index = idx->value.u[0];
         return array->get_array_element(index)->clone(mem_ctx, NULL);
      }
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -1134,6 +1134,25 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
   return (config_count != 0);
 }

+#ifdef HAVE_DRM_GRALLOC
+static int
+droid_open_device_drm_gralloc(struct dri2_egl_display *dri2_dpy)
+{
+   int fd = -1, err = -EINVAL;
+
+   if (dri2_dpy->gralloc->perform)
+         err = dri2_dpy->gralloc->perform(dri2_dpy->gralloc,
+                                          GRALLOC_MODULE_PERFORM_GET_DRM_FD,
+                                          &fd);
+   if (err || fd < 0) {
+      _eglLog(_EGL_WARNING, "fail to get drm fd");
+      fd = -1;
+   }
+
+   return (fd >= 0) ? fcntl(fd, F_DUPFD_CLOEXEC, 3) : -1;
+}
+#endif /* HAVE_DRM_GRALLOC */
+
 static const struct dri2_egl_display_vtbl droid_display_vtbl = {
   .authenticate = NULL,
   .create_window_surface = droid_create_window_surface,
@@ -1384,7 +1403,11 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp)

   disp->DriverData = (void *) dri2_dpy;

+#ifdef HAVE_DRM_GRALLOC
+   dri2_dpy->fd = droid_open_device_drm_gralloc(dri2_dpy);
+#else
   dri2_dpy->fd = droid_open_device(disp);
+#endif
   if (dri2_dpy->fd < 0) {
      err = "DRI2: failed to open device";
      goto cleanup;
--- a/src/egl/meson.build
+++ b/src/egl/meson.build
@@ -99,10 +99,10 @@ endif

 if with_platform_x11
  files_egl += files('drivers/dri2/platform_x11.c')
+  incs_for_egl += inc_loader
  if with_dri3
    files_egl += files('drivers/dri2/platform_x11_dri3.c')
    link_for_egl += libloader_dri3_helper
-    incs_for_egl += inc_loader
  endif
  deps_for_egl += [dep_x11_xcb, dep_xcb_dri2, dep_xcb_xfixes]
 endif
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -715,7 +715,6 @@ static void compute_emit_cs(struct r600_context *rctx,
 		rctx->cmd_buf_is_compute = true;
 	}

-	r600_need_cs_space(rctx, 0, true);
 	if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) {
 		r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty);
 		current = rctx->cs_shader_state.shader->sel->current;
@@ -742,16 +741,22 @@ static void compute_emit_cs(struct r600_context *rctx,
 		}
 		rctx->cs_block_grid_sizes[3] = rctx->cs_block_grid_sizes[7] = 0;
 		rctx->driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true;
+
+		evergreen_emit_atomic_buffer_setup_count(rctx, current, combined_atomics, &atomic_used_mask);
+		r600_need_cs_space(rctx, 0, true, util_bitcount(atomic_used_mask));
+
 		if (need_buf_const) {
 			eg_setup_buffer_constants(rctx, PIPE_SHADER_COMPUTE);
 		}
 		r600_update_driver_const_buffers(rctx, true);

-		if (evergreen_emit_atomic_buffer_setup(rctx, current, combined_atomics, &atomic_used_mask)) {
+		evergreen_emit_atomic_buffer_setup(rctx, true, combined_atomics, atomic_used_mask);
+		if (atomic_used_mask) {
 			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 			radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
 		}
-	}
+	} else
+		r600_need_cs_space(rctx, 0, true, 0);

 	/* Initialize all the compute-related registers.
 	 *
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -109,7 +109,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,

 		r600_need_cs_space(rctx,
 				   10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +
-				   R600_MAX_PFP_SYNC_ME_DWORDS, FALSE);
+				   R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0);

 		/* Flush the caches for the first copy only. */
 		if (rctx->b.flags) {
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -4030,7 +4030,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,

 		if (!buffers || !buffers[idx].buffer) {
 			pipe_resource_reference(&abuf->buffer, NULL);
-			astate->enabled_mask &= ~(1 << i);
 			continue;
 		}
 		buf = &buffers[idx];
@@ -4038,7 +4037,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
 		pipe_resource_reference(&abuf->buffer, buf->buffer);
 		abuf->buffer_offset = buf->buffer_offset;
 		abuf->buffer_size = buf->buffer_size;
-		astate->enabled_mask |= (1 << i);
 	}
 }

@@ -4868,20 +4866,15 @@ static void cayman_write_count_to_gds(struct r600_context *rctx,
 	radeon_emit(cs, reloc);
 }

-bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
-					struct r600_pipe_shader *cs_shader,
-					struct r600_shader_atomic *combined_atomics,
-					uint8_t *atomic_used_mask_p)
+void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
+					      struct r600_pipe_shader *cs_shader,
+					      struct r600_shader_atomic *combined_atomics,
+					      uint8_t *atomic_used_mask_p)
 {
-	struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
-	unsigned pkt_flags = 0;
 	uint8_t atomic_used_mask = 0;
 	int i, j, k;
 	bool is_compute = cs_shader ? true : false;

-	if (is_compute)
-		pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
-
 	for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) {
 		uint8_t num_atomic_stage;
 		struct r600_pipe_shader *pshader;
@@ -4914,8 +4907,25 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
 			}
 		}
 	}
+	*atomic_used_mask_p = atomic_used_mask;
+}
+
+void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
+					bool is_compute,
+					struct r600_shader_atomic *combined_atomics,
+					uint8_t atomic_used_mask)
+{
+	struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
+	unsigned pkt_flags = 0;
+	uint32_t mask;
+
+	if (is_compute)
+		pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
+
+	mask = atomic_used_mask;
+	if (!mask)
+		return;

-	uint32_t mask = atomic_used_mask;
 	while (mask) {
 		unsigned atomic_index = u_bit_scan(&mask);
 		struct r600_shader_atomic *atomic = &combined_atomics[atomic_index];
@@ -4927,8 +4937,6 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
 		else
 			evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags);
 	}
-	*atomic_used_mask_p = atomic_used_mask;
-	return true;
 }

 void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
@@ -4940,7 +4948,7 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
 	struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
 	uint32_t pkt_flags = 0;
 	uint32_t event = EVENT_TYPE_PS_DONE;
-	uint32_t mask = astate->enabled_mask;
+	uint32_t mask;
 	uint64_t dst_offset;
 	unsigned reloc;

--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -31,7 +31,7 @@


 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
-			boolean count_draw_in)
+			boolean count_draw_in, unsigned num_atomics)
 {
 	/* Flush the DMA IB if it's not empty. */
 	if (radeon_emitted(ctx->b.dma.cs, 0))
@@ -61,6 +61,9 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 		num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
 	}

+	/* add atomic counters, 8 pre + 8 post per counter + 16 post if any counters */
+	num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0);
+
 	/* Count in r600_suspend_queries. */
 	num_dw += ctx->b.num_cs_dw_queries_suspend;

@@ -526,7 +529,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,

 		r600_need_cs_space(rctx,
 				   10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +
-				   3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE);
+				   3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0);

 		/* Flush the caches for the first copy only. */
 		if (rctx->b.flags) {
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -446,8 +446,6 @@ struct r600_shader_state {
 };

 struct r600_atomic_buffer_state {
-	uint32_t enabled_mask;
-	uint32_t dirty_mask;
 	struct pipe_shader_buffer buffer[EG_MAX_ATOMIC_BUFFERS];
 };

@@ -773,7 +771,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
 			    struct pipe_fence_handle **fence);
 void r600_begin_new_cs(struct r600_context *ctx);
 void r600_flush_emit(struct r600_context *ctx);
-void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
+void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in, unsigned num_atomics);
 void r600_emit_pfp_sync_me(struct r600_context *rctx);
 void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 			     struct pipe_resource *dst, uint64_t dst_offset,
@@ -1067,10 +1065,14 @@ void r600_delete_shader_selector(struct pipe_context *ctx,
 				 struct r600_pipe_shader_selector *sel);

 struct r600_shader_atomic;
-bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
-					struct r600_pipe_shader *cs_shader,
+void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
+					      struct r600_pipe_shader *cs_shader,
+					      struct r600_shader_atomic *combined_atomics,
+					      uint8_t *atomic_used_mask_p);
+void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
+					bool is_compute,
 					struct r600_shader_atomic *combined_atomics,
-					uint8_t *atomic_used_mask_p);
+					uint8_t atomic_used_mask);
 void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
 				       bool is_compute,
 				       struct r600_shader_atomic *combined_atomics,
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -2085,8 +2085,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		: (rctx->tes_shader)? rctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]
 		: info->mode;

-	if (rctx->b.chip_class >= EVERGREEN)
-		evergreen_emit_atomic_buffer_setup(rctx, NULL, combined_atomics, &atomic_used_mask);
+	if (rctx->b.chip_class >= EVERGREEN) {
+		evergreen_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, &atomic_used_mask);
+	}

 	if (index_size) {
 		index_offset += info->start * index_size;
@@ -2172,7 +2173,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		evergreen_setup_tess_constants(rctx, info, &num_patches);

 	/* Emit states. */
-	r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE);
+	r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE, util_bitcount(atomic_used_mask));
 	r600_flush_emit(rctx);

 	mask = rctx->dirty_atoms;
@@ -2180,6 +2181,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]);
 	}

+	if (rctx->b.chip_class >= EVERGREEN) {
+		evergreen_emit_atomic_buffer_setup(rctx, false, combined_atomics, atomic_used_mask);
+	}
+		
 	if (rctx->b.chip_class == CAYMAN) {
 		/* Copied from radeonsi. */
 		unsigned primgroup_size = 128; /* recommended without a GS */
@@ -3284,7 +3289,7 @@ static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable
 static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
                                   bool include_draw_vbo)
 {
-	r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo);
+	r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo, 0);
 }

 /* keep this at the end of this file, please */
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -114,6 +114,7 @@ static void si_init_compiler(struct si_screen *sscreen,
 				       sscreen->info.chip_class <= VI;

 	enum ac_target_machine_options tm_options =
+		AC_TM_AUTO_WAITCNT_BEFORE_BARRIER |
 		(sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
 		(sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) |
 		(sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) |
--- a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
+++ b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
@@ -176,6 +176,8 @@ kms_sw_displaytarget_create(struct sw_winsys *ws,

   list_inithead(&kms_sw_dt->planes);
   kms_sw_dt->ref_count = 1;
+   kms_sw_dt->mapped = MAP_FAILED;
+   kms_sw_dt->ro_mapped = MAP_FAILED;

   kms_sw_dt->format = format;

@@ -262,7 +264,7 @@ kms_sw_displaytarget_map(struct sw_winsys *ws,

   prot = (flags == PIPE_TRANSFER_READ) ? PROT_READ : (PROT_READ | PROT_WRITE);
   void **ptr = (flags == PIPE_TRANSFER_READ) ? &kms_sw_dt->ro_mapped : &kms_sw_dt->mapped;
-   if (!*ptr) {
+   if (*ptr == MAP_FAILED) {
      void *tmp = mmap(0, kms_sw_dt->size, prot, MAP_SHARED,
                       kms_sw->fd, map_req.offset);
      if (tmp == MAP_FAILED)
@@ -332,6 +334,8 @@ kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd,
      FREE(kms_sw_dt);
      return NULL;
   }
+   kms_sw_dt->mapped = MAP_FAILED;
+   kms_sw_dt->ro_mapped = MAP_FAILED;
   kms_sw_dt->size = lseek_ret;
   kms_sw_dt->ref_count = 1;
   kms_sw_dt->handle = handle;
@@ -368,10 +372,14 @@ kms_sw_displaytarget_unmap(struct sw_winsys *ws,
   DEBUG_PRINT("KMS-DEBUG: unmapped buffer %u (was %p)\n", kms_sw_dt->handle, kms_sw_dt->mapped);
   DEBUG_PRINT("KMS-DEBUG: unmapped buffer %u (was %p)\n", kms_sw_dt->handle, kms_sw_dt->ro_mapped);

-   munmap(kms_sw_dt->mapped, kms_sw_dt->size);
-   kms_sw_dt->mapped = NULL;
-   munmap(kms_sw_dt->ro_mapped, kms_sw_dt->size);
-   kms_sw_dt->ro_mapped = NULL;
+   if (kms_sw_dt->mapped != MAP_FAILED) {
+      munmap(kms_sw_dt->mapped, kms_sw_dt->size);
+      kms_sw_dt->mapped = MAP_FAILED;
+   }
+   if (kms_sw_dt->ro_mapped != MAP_FAILED) {
+      munmap(kms_sw_dt->ro_mapped, kms_sw_dt->size);
+      kms_sw_dt->ro_mapped = MAP_FAILED;
+   }
 }

 static struct sw_displaytarget *
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -713,18 +713,6 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
   nir_validate_shader(*producer);
   nir_validate_shader(*consumer);

-   const bool p_is_scalar =
-      compiler->scalar_stage[(*producer)->info.stage];
-   const bool c_is_scalar =
-      compiler->scalar_stage[(*consumer)->info.stage];
-
-   if (p_is_scalar && c_is_scalar) {
-      NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
-      NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
-      *producer = brw_nir_optimize(*producer, compiler, p_is_scalar);
-      *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar);
-   }
-
   NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
   NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);

@@ -741,7 +729,12 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
      NIR_PASS_V(*consumer, nir_lower_indirect_derefs,
                 brw_nir_no_indirect_mask(compiler, (*consumer)->info.stage));

+      const bool p_is_scalar =
+         compiler->scalar_stage[(*producer)->info.stage];
      *producer = brw_nir_optimize(*producer, compiler, p_is_scalar);
+
+      const bool c_is_scalar =
+         compiler->scalar_stage[(*consumer)->info.stage];
      *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar);
   }
 }
--- a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
+++ b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
@@ -340,18 +340,16 @@ try_lower_tex_ycbcr(struct anv_pipeline_layout *layout,
   if (binding->immutable_samplers == NULL)
      return false;

-   unsigned texture_index = tex->texture_index;
+   assert(tex->texture_index == 0);
+   unsigned array_index = 0;
   if (deref->deref_type != nir_deref_type_var) {
      assert(deref->deref_type == nir_deref_type_array);
      nir_const_value *const_index = nir_src_as_const_value(deref->arr.index);
      if (!const_index)
         return false;
-      size_t hw_binding_size =
-         anv_descriptor_set_binding_layout_get_hw_size(binding);
-      texture_index += MIN2(const_index->u32[0], hw_binding_size - 1);
+      array_index = MIN2(const_index->u32[0], binding->array_size - 1);
   }
-   const struct anv_sampler *sampler =
-      binding->immutable_samplers[texture_index];
+   const struct anv_sampler *sampler = binding->immutable_samplers[array_index];

   if (sampler->conversion == NULL)
      return false;
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -496,7 +496,6 @@ bo_alloc_internal(struct brw_bufmgr *bufmgr,
                  uint32_t stride)
 {
   struct brw_bo *bo;
-   unsigned int page_size = getpagesize();
   int ret;
   struct bo_cache_bucket *bucket;
   bool alloc_from_cache;
@@ -522,12 +521,12 @@ bo_alloc_internal(struct brw_bufmgr *bufmgr,
    * allocation up.
    */
   if (bucket == NULL) {
-      bo_size = size;
-      if (bo_size < page_size)
-         bo_size = page_size;
+      unsigned int page_size = getpagesize();
+      bo_size = size == 0 ? page_size : ALIGN(size, page_size);
   } else {
      bo_size = bucket->size;
   }
+   assert(bo_size);

   mtx_lock(&bufmgr->lock);
   /* Get a buffer out of the cache if available */
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -695,7 +695,7 @@ brw_initialize_context_constants(struct brw_context *brw)
   /* ARB_viewport_array, OES_viewport_array */
   if (devinfo->gen >= 6) {
      ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
-      ctx->Const.ViewportSubpixelBits = 0;
+      ctx->Const.ViewportSubpixelBits = 8;

      /* Cast to float before negating because MaxViewportWidth is unsigned.
       */
Author	SHA1	Message	Date
Andres Gomez	86aa912dda	Update version to 18.2.0-rc4 Signed-off-by: Andres Gomez <agomez@igalia.com>	2018-08-22 16:59:30 +03:00
Andres Gomez	2ec87de498	cherry-ignore: autotools: don't ship the git_sha1.h generated in git in the tarballs fixes: This commit has more than one Fixes tag but the commit it addresses didn't land in branch. Signed-off-by: Andres Gomez <agomez@igalia.com>	2018-08-22 16:58:27 +03:00
Dave Airlie	54cd81dfc5	r600/eg: rework atomic counter emission with flushes With the current code, we didn't do the space checks prior to atomic counter setup emission, but we also didn't add atomic counters to the space check so we could get a flush later as well. These flushes would be bad, and lead to problems with parallel tests. We have to ensure the atomic counter copy in, draw emits and counter copy out are kept in the same command submission unit. This reworks the code to drop some useless masks, make the counting separate to the emits, and make the space checker handle atomic counter space. [airlied: want this in 18.2] Fixes: `06993e4ee` (r600: add support for hw atomic counters. (v3)) (cherry picked from commit `32529e6084`)	2018-08-21 23:18:19 +03:00
Danylo Piliaiev	5457e58a64	i965: Advertise 8 bits subpixel precision for viewport bounds on gen6+ We use floating-points for viewport bounds so VIEWPORT_SUBPIXEL_BITS should reflect this. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105975 Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (cherry picked from commit `25ec806eb2`)	2018-08-21 15:46:01 +03:00
Samuel Pitoiset	1e9c422894	radv: do not use CP predication for DCC decompressions This fixes a regression with some Unity demos. Not sure what the root cause of the problem is, especially because the driver doesn't perform any fast color clears. So, it shouldn't be needed to decompress DCC. RadeonSI says that the decompression is relatively cheap if the surface has been decompressed already. One possible improvement is to two use predicates, one for DCC and one for FCE that could be cleared when DCC, FMASK or CMASK are performed by the driver. That might skip some unnecessary decompression passes (not DCC though). Fixes: `ff7daadca1` ("radv: enable/disable predication for the DCC decompression pass") CC: 18.2 <mesa-stable@lists.freedesktop.org> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107563 Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `0aacb5eab6`)	2018-08-20 13:33:07 +03:00
Ray Strode	4320851198	gallium/winsys/kms: don't unmap what wasn't mapped At the moment, depending on pipe transfer flags, the dumb buffer map address can end up at either kms_sw_dt->ro_mapped or kms_sw_dt->mapped. When it's time to unmap the dumb buffer, both locations get unmapped, even though one is probably initialized to 0. That leads to the code segment getting unmapped at runtime and crashes when trying to call into unrelated code. This commit addresses the problem by using MAP_FAILED instead of NULL for ro_mapped and mapped when the dumb buffer is unmapped, and only unmapping mapped addresses at unmap time. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107098 Signed-off-by: Ray Strode <rstrode@redhat.com> Fixes: `d891f28df9` ("gallium/winsys/kms: Fix possible leak in map/unmap.") Cc: Lepton Wu <lepton@chromium.org> Reviewed-by: Emil Velikov <emil.velikov@collabora.com> (cherry picked from commit `9baff597ce`)	2018-08-18 00:03:00 +03:00
Jason Ekstrand	f69fcede0a	anv/lower_ycbcr: Use the binding array size for bounds checks Because lower_ycbcr gets called before apply_pipeline_layout, the indices are all logical and the binding layout HW size is actually too big for the bounds check. We should just use the regular logical array size instead. Fixes: `f3e91e78a3` "anv: add nir lowering pass for ycbcr textures" Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (cherry picked from commit `320dacb0a0`)	2018-08-18 00:01:16 +03:00
Samuel Pitoiset	26c07daf9d	radv/winsys: fix creating the BO list for virtual buffers When the number of unique BO is 0, we optimize the list creation by copying all buffers of the current CS directly into it. But this is only valid if the CS doesn't have virtual buffers, otherwise they are not added and hw might report VM faults. This fixes VM faults with: dEQP-VK.sparse_resources.image_sparse_binding.2d.rgba8ui.1024_128_1 CC: <mesa-stable@lists.freedesktop.org> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `d27e1584ce`)	2018-08-17 23:35:38 +03:00
Alexander Tsoy	f3fc2d40fe	meson: fix build for egl platform_x11 without dri3 and gbm Compiling EGL's platform_x11 without dri3 and gbm yields this compile failure: platform_x11 needs inc_loader: ../mesa-18.2.0-rc2/src/egl/drivers/dri2/platform_x11.c:48:10: fatal error: loader.h: No such file or directory #include "loader.h" ^~~~~~~~~~ Fixes: `108d257a16` ("meson: build libEGL") Bugzilla: https://bugs.gentoo.org/663534 Reviewed-by: Matt Turner <mattst88@gmail.com> (cherry picked from commit `9a96bf0ecd`)	2018-08-16 23:23:19 +03:00
Samuel Pitoiset	4477635b69	radv: initialize the DCC predicate correctly when it's compressed We have to do a fast-clear eliminate when clearing DCC metadata with 0x20202020. I don't know if that fixes anything but that seems correct to me. CC: 18.2 <mesa-stable@lists.freedesktop.org> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `f9e8456c39`)	2018-08-16 23:22:19 +03:00
Samuel Pitoiset	bc6b6cb290	radv: fix missing initialization of the conditional rendering state This was missing when VK_EXT_conditional_rendering has been implemented. The predication type should be -1 to avoid restoring previous state when performing a decompression pass with DCC enabled. Note that we don't have to handle secondary command buffers because we don't support this feature currently. CC: 18.2 <mesa-stable@lists.freedesktop.org> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `f3a78a9da0`)	2018-08-16 23:21:34 +03:00
Bas Nieuwenhuizen	3ff3bfa3f5	radv: Revert divisor = 0 case for vertex attribute extension. Seems like DXVK depends on that and it might get reverted upstream. Since apps are not supposed to use 0 in v2 anyway, we should be safe implementing the old behavior there. Fixes: `66e12451ac` "radv: Update to new VK_EXT_vertex_attribute_divisor to version 2." CC: 18.2 <mesa-stable@lists.freedesktop.org> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `011a811652`)	2018-08-16 23:19:58 +03:00
Danylo Piliaiev	c2268223c8	glsl: Avoid calling get_array_element for scalar constants Accessing scalar constant as an array in function call or initializer list triggered assert in get_array_element. Examples: func(0[0]); vec2 t = { 0[0], 0 }; Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107550 Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> (cherry picked from commit `479a849ad6`)	2018-08-16 13:38:36 +03:00
Jason Ekstrand	b9a97a8b88	Revert "intel/nir: Call nir_lower_io_to_scalar_early" Commit `4434591bf5` caused substantially more URB messages in geometry and tessellation shaders. Before we can really enable this sort of optimization, We either need some way of combining them back together into vectors or we need to do cross-stage vector element elimination without splitting everything into scalars. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107510 Fixes: `4434591bf5` "intel/nir: Call nir_lower_io_to_scalar_early" Acked-by: Kenneth Graunke <kenneth@whitecape.org> Tested-by: Mark Janes <mark.a.janes@intel.com> (cherry picked from commit `10f44da775`)	2018-08-16 02:09:21 +03:00
Sergii Romantsov	dbb5396667	intel/ppgtt: memory address alignment Kernel (for ppgtt) requires memory address to be aligned to page size (4096). -v2: added marking that also fixes initial commit `01058a5522`. -v3: numbers replaced by PAGE_SIZE; buffer-object size is aligned instead of alignment of offsets (Chris Wilson). -v4: changes related to PAGE_SIZE moved to separate commit -v5: restored alignment to page-size for 0-size. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106997 Fixes: `a363bb2cd0` (i965: Allocate VMA in userspace for full-PPGTT systems.) Fixes: `01058a5522` (i965: Add virtual memory allocator infrastructure to brw_bufmgr.) Signed-off-by: Sergii Romantsov <sergii.romantsov@globallogic.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (cherry picked from commit `24839663a4`)	2018-08-16 02:08:32 +03:00
Timothy Arceri	586ac9c237	radv: add Doom workaround Cc: <mesa-stable@lists.freedesktop.org> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `f0a8accb0d`)	2018-08-16 02:07:50 +03:00
Samuel Pitoiset	f070d5a568	radv: disable the auto-waitcnt-before-barrier LLVM option This option allows us to remove additional s_waitcnt instructions because s_barrier internally does s_waitcnt 0. Though, apparently there is a problem with LDS accesses that causes rendering issues with FFXV and DXVK. Disable this optimization for now (RadeonSI still uses it). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107460 CC: 18.2 <mesa-stable@lists.freedesktop.org> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `71d5b2fbf8`)	2018-08-16 02:07:15 +03:00
Mauro Rossi	b1e0876a6b	egl/android: fix regression in drm_gralloc path (v2) This patch fixes a regression in mesa 18.2 and mesa-dev branches for HAVE_DRM_GRALLOC code path which is causing black screen on Android and prevents boot due to SIGSEGV MAPERR crash related to unproper handling of drm_gralloc drm FD in new droid_open_device() path. Problem is due to `c7bb82136b` ("egl/android: Add DRM node probing and filtering") To avoid the crash the former existing working droid_open_device() is restored, renamed droid_open_device_drm_gralloc() and kept within HAVE_DRM_GRALLOC braces. Tested with mesa-dev and mesa 18.2 branch and oreo-x86 bootanimation and Androdi GUI booting is fixed with i965, nouveau, radeon. The changes are compatible with gbm_gralloc, I've tested build with hwc too. (v2) remove indentation from HAVE_DRM_GRALLOC pre-processor directive NOTE: Definition of enum{} for GRALLOC_MODULE_PERFORM_GET_DRM_FD is not necessary and it's actually causing a redefinition building error, because in HAVE_DRM_GRALLOC path gralloc_drm.h is already exported by libgralloc_drm which is currently still a dependency. Fixes: `c7bb82136b` ("egl/android: Add DRM node probing and filtering") Cc: "18.2" <mesa-stable@lists.freedesktop.org> Signed-off-by: Mauro Rossi <issor.oruam@gmail.com> (cherry picked from commit `73b342c7a5`)	2018-08-15 15:45:34 +03:00