pdate version to 17.3.0-rc3

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
i965: Fix ARB_indirect_parameters logic.
2017-11-07 11:51:45 +00:00 · 2017-11-03 18:30:32 +00:00 · 2017-11-03 18:21:59 +00:00 · 2017-11-03 18:21:59 +00:00 · 2017-11-03 18:21:59 +00:00 · 2017-11-03 18:21:42 +00:00
22 changed files with 360 additions and 197 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-17.3.0-rc2
+17.3.0-rc3
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -104,6 +104,75 @@ get_chip_name(enum radeon_family family)
 	}
 }

+static void
+radv_physical_device_init_mem_types(struct radv_physical_device *device)
+{
+	STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
+	uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
+	                                  device->rad_info.vram_vis_size);
+
+	int vram_index = -1, visible_vram_index = -1, gart_index = -1;
+	device->memory_properties.memoryHeapCount = 0;
+	if (device->rad_info.vram_size - visible_vram_size > 0) {
+		vram_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
+			.size = device->rad_info.vram_size - visible_vram_size,
+			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+		};
+	}
+	if (visible_vram_size) {
+		visible_vram_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
+			.size = visible_vram_size,
+			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+		};
+	}
+	if (device->rad_info.gart_size > 0) {
+		gart_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
+			.size = device->rad_info.gart_size,
+			.flags = 0,
+		};
+	}
+
+	STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
+	unsigned type_count = 0;
+	if (vram_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+			.heapIndex = vram_index,
+		};
+	}
+	if (gart_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+			.heapIndex = gart_index,
+		};
+	}
+	if (visible_vram_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+			.heapIndex = visible_vram_index,
+		};
+	}
+	if (gart_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+			VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+			.heapIndex = gart_index,
+		};
+	}
+	device->memory_properties.memoryTypeCount = type_count;
+}
+
 static VkResult
 radv_physical_device_init(struct radv_physical_device *device,
 			  struct radv_instance *instance,
@@ -190,6 +259,7 @@ radv_physical_device_init(struct radv_physical_device *device,
 	 */
 	device->has_clear_state = device->rad_info.chip_class >= CIK;

+	radv_physical_device_init_mem_types(device);
 	return VK_SUCCESS;

 fail:
@@ -780,49 +850,7 @@ void radv_GetPhysicalDeviceMemoryProperties(
 {
 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);

-	STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
-
-	pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-		.heapIndex = RADV_MEM_HEAP_VRAM,
-	};
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-		VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-		.heapIndex = RADV_MEM_HEAP_GTT,
-	};
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-		VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-		VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-		.heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
-	};
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-		VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-		VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
-		.heapIndex = RADV_MEM_HEAP_GTT,
-	};
-
-	STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
-	uint64_t visible_vram_size = MIN2(physical_device->rad_info.vram_size,
-	                                  physical_device->rad_info.vram_vis_size);
-
-	pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
-	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
-		.size = physical_device->rad_info.vram_size -
-				visible_vram_size,
-		.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-	};
-	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
-		.size = visible_vram_size,
-		.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-	};
-	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
-		.size = physical_device->rad_info.gart_size,
-		.flags = 0,
-	};
+	*pMemoryProperties = physical_device->memory_properties;
 }

 void radv_GetPhysicalDeviceMemoryProperties2KHR(
@@ -2060,6 +2088,7 @@ VkResult radv_alloc_memory(VkDevice                        _device,
 	VkResult result;
 	enum radeon_bo_domain domain;
 	uint32_t flags = 0;
+	enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];

 	assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);

@@ -2102,18 +2131,18 @@ VkResult radv_alloc_memory(VkDevice                        _device,
 	}

 	uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
-	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
-	    pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
+	if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
+	    mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
 		domain = RADEON_DOMAIN_GTT;
 	else
 		domain = RADEON_DOMAIN_VRAM;

-	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
+	if (mem_type_index == RADV_MEM_TYPE_VRAM)
 		flags |= RADEON_FLAG_NO_CPU_ACCESS;
 	else
 		flags |= RADEON_FLAG_CPU_ACCESS;

-	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
+	if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
 		flags |= RADEON_FLAG_GTT_WC;

 	if (mem_flags & RADV_MEM_IMPLICIT_SYNC)
@@ -2126,7 +2155,7 @@ VkResult radv_alloc_memory(VkDevice                        _device,
 		result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
 		goto fail;
 	}
-	mem->type_index = pAllocateInfo->memoryTypeIndex;
+	mem->type_index = mem_type_index;
 out_success:
 	*pMem = radv_device_memory_to_handle(mem);

@@ -2219,13 +2248,14 @@ VkResult radv_InvalidateMappedMemoryRanges(
 }

 void radv_GetBufferMemoryRequirements(
-	VkDevice                                    device,
+	VkDevice                                    _device,
 	VkBuffer                                    _buffer,
 	VkMemoryRequirements*                       pMemoryRequirements)
 {
+	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);

-	pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;

 	if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
 		pMemoryRequirements->alignment = 4096;
@@ -2259,13 +2289,14 @@ void radv_GetBufferMemoryRequirements2KHR(
 }

 void radv_GetImageMemoryRequirements(
-	VkDevice                                    device,
+	VkDevice                                    _device,
 	VkImage                                     _image,
 	VkMemoryRequirements*                       pMemoryRequirements)
 {
+	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_image, image, _image);

-	pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;

 	pMemoryRequirements->size = image->size;
 	pMemoryRequirements->alignment = image->alignment;
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -170,83 +170,6 @@ radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
 	return entry;
 }

-bool
-radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
-					        struct radv_pipeline_cache *cache,
-					        const unsigned char *sha1,
-					        struct radv_shader_variant **variants)
-{
-	struct cache_entry *entry;
-
-	if (!cache)
-		cache = device->mem_cache;
-
-	pthread_mutex_lock(&cache->mutex);
-
-	entry = radv_pipeline_cache_search_unlocked(cache, sha1);
-
-	if (!entry) {
-		if (!device->physical_device->disk_cache ||
-		    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE)) {
-			pthread_mutex_unlock(&cache->mutex);
-			return false;
-		}
-
-		uint8_t disk_sha1[20];
-		disk_cache_compute_key(device->physical_device->disk_cache,
-				       sha1, 20, disk_sha1);
-		entry = (struct cache_entry *)
-			disk_cache_get(device->physical_device->disk_cache,
-				       disk_sha1, NULL);
-		if (!entry) {
-			pthread_mutex_unlock(&cache->mutex);
-			return false;
-		}
-	}
-
-	char *p = entry->code;
-	for(int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (!entry->variants[i] && entry->code_sizes[i]) {
-			struct radv_shader_variant *variant;
-			struct cache_entry_variant_info info;
-
-			variant = calloc(1, sizeof(struct radv_shader_variant));
-			if (!variant) {
-				pthread_mutex_unlock(&cache->mutex);
-				return false;
-			}
-
-			memcpy(&info, p, sizeof(struct cache_entry_variant_info));
-			p += sizeof(struct cache_entry_variant_info);
-
-			variant->config = info.config;
-			variant->info = info.variant_info;
-			variant->rsrc1 = info.rsrc1;
-			variant->rsrc2 = info.rsrc2;
-			variant->code_size = entry->code_sizes[i];
-			variant->ref_count = 1;
-
-			void *ptr = radv_alloc_shader_memory(device, variant);
-			memcpy(ptr, p, entry->code_sizes[i]);
-			p += entry->code_sizes[i];
-
-			entry->variants[i] = variant;
-		} else if (entry->code_sizes[i]) {
-			p += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
-		}
-
-	}
-
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i)
-		if (entry->variants[i])
-			p_atomic_inc(&entry->variants[i]->ref_count);
-
-	memcpy(variants, entry->variants, sizeof(entry->variants));
-	pthread_mutex_unlock(&cache->mutex);
-	return true;
-}
-
-
 static void
 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
 			      struct cache_entry *entry)
@@ -316,6 +239,97 @@ radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
 		radv_pipeline_cache_set_entry(cache, entry);
 }

+bool
+radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
+					        struct radv_pipeline_cache *cache,
+					        const unsigned char *sha1,
+					        struct radv_shader_variant **variants)
+{
+	struct cache_entry *entry;
+
+	if (!cache)
+		cache = device->mem_cache;
+
+	pthread_mutex_lock(&cache->mutex);
+
+	entry = radv_pipeline_cache_search_unlocked(cache, sha1);
+
+	if (!entry) {
+		if (!device->physical_device->disk_cache ||
+		    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE)) {
+			pthread_mutex_unlock(&cache->mutex);
+			return false;
+		}
+
+		uint8_t disk_sha1[20];
+		disk_cache_compute_key(device->physical_device->disk_cache,
+				       sha1, 20, disk_sha1);
+		entry = (struct cache_entry *)
+			disk_cache_get(device->physical_device->disk_cache,
+				       disk_sha1, NULL);
+		if (!entry) {
+			pthread_mutex_unlock(&cache->mutex);
+			return false;
+		} else {
+			size_t size = entry_size(entry);
+			struct cache_entry *new_entry = vk_alloc(&cache->alloc, size, 8,
+								 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
+			if (!new_entry) {
+				free(entry);
+				pthread_mutex_unlock(&cache->mutex);
+				return false;
+			}
+
+			memcpy(new_entry, entry, entry_size(entry));
+			free(entry);
+			entry = new_entry;
+
+			radv_pipeline_cache_add_entry(cache, new_entry);
+		}
+	}
+
+	char *p = entry->code;
+	for(int i = 0; i < MESA_SHADER_STAGES; ++i) {
+		if (!entry->variants[i] && entry->code_sizes[i]) {
+			struct radv_shader_variant *variant;
+			struct cache_entry_variant_info info;
+
+			variant = calloc(1, sizeof(struct radv_shader_variant));
+			if (!variant) {
+				pthread_mutex_unlock(&cache->mutex);
+				return false;
+			}
+
+			memcpy(&info, p, sizeof(struct cache_entry_variant_info));
+			p += sizeof(struct cache_entry_variant_info);
+
+			variant->config = info.config;
+			variant->info = info.variant_info;
+			variant->rsrc1 = info.rsrc1;
+			variant->rsrc2 = info.rsrc2;
+			variant->code_size = entry->code_sizes[i];
+			variant->ref_count = 1;
+
+			void *ptr = radv_alloc_shader_memory(device, variant);
+			memcpy(ptr, p, entry->code_sizes[i]);
+			p += entry->code_sizes[i];
+
+			entry->variants[i] = variant;
+		} else if (entry->code_sizes[i]) {
+			p += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
+		}
+
+	}
+
+	for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+		if (entry->variants[i])
+			p_atomic_inc(&entry->variants[i]->ref_count);
+
+	memcpy(variants, entry->variants, sizeof(entry->variants));
+	pthread_mutex_unlock(&cache->mutex);
+	return true;
+}
+
 void
 radv_pipeline_cache_insert_shaders(struct radv_device *device,
 				   struct radv_pipeline_cache *cache,
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -282,6 +282,9 @@ struct radv_physical_device {
 	 * the pipeline cache defined by apps.
 	 */
 	struct disk_cache *                          disk_cache;
+
+	VkPhysicalDeviceMemoryProperties memory_properties;
+	enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT];
 };

 struct radv_instance {
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -194,12 +194,26 @@ radv_wsi_image_create(VkDevice device_h,
 		.image = image_h
 	};

+	/* Find the first VRAM memory type, or GART for PRIME images. */
+	int memory_type_index = -1;
+	for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
+		bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+		if ((linear && !is_local) || (!linear && is_local)) {
+			memory_type_index = i;
+			break;
+		}
+	}
+
+	/* fallback */
+	if (memory_type_index == -1)
+		memory_type_index = 0;
+
 	result = radv_alloc_memory(device_h,
 				     &(VkMemoryAllocateInfo) {
 					     .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
 					     .pNext = &ded_alloc,
 					     .allocationSize = image->size,
-					     .memoryTypeIndex = linear ? 1 : 0,
+					     .memoryTypeIndex = memory_type_index,
 				     },
 				     NULL /* XXX: pAllocator */,
 				     RADV_MEM_IMPLICIT_SYNC,
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -28,6 +28,26 @@
 * \file nir_opt_intrinsics.c
 */

+static nir_ssa_def *
+high_subgroup_mask(nir_builder *b,
+                   nir_ssa_def *count,
+                   uint64_t base_mask)
+{
+   /* group_mask could probably be calculated more efficiently but we want to
+    * be sure not to shift by 64 if the subgroup size is 64 because the GLSL
+    * shift operator is undefined in that case. In any case if we were worried
+    * about efficency this should probably be done further down because the
+    * subgroup size is likely to be known at compile time.
+    */
+   nir_ssa_def *subgroup_size = nir_load_subgroup_size(b);
+   nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull);
+   nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size);
+   nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift);
+   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count);
+
+   return nir_iand(b, higher_bits, group_mask);
+}
+
 static bool
 opt_intrinsics_impl(nir_function_impl *impl)
 {
@@ -95,10 +115,10 @@ opt_intrinsics_impl(nir_function_impl *impl)
               replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count);
               break;
            case nir_intrinsic_load_subgroup_ge_mask:
-               replacement = nir_ishl(&b, nir_imm_int64(&b, ~0ull), count);
+               replacement = high_subgroup_mask(&b, count, ~0ull);
               break;
            case nir_intrinsic_load_subgroup_gt_mask:
-               replacement = nir_ishl(&b, nir_imm_int64(&b, ~1ull), count);
+               replacement = high_subgroup_mask(&b, count, ~1ull);
               break;
            case nir_intrinsic_load_subgroup_le_mask:
               replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~1ull), count));
--- a/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c
@@ -555,6 +555,7 @@ etna_try_rs_blit(struct pipe_context *pctx,
   }

   /* Set up color TS to source surface before blit, if needed */
+   bool source_ts_valid = false;
   if (src->levels[blit_info->src.level].ts_size &&
       src->levels[blit_info->src.level].ts_valid) {
      struct etna_reloc reloc;
@@ -579,6 +580,8 @@ etna_try_rs_blit(struct pipe_context *pctx,

      etna_set_state(ctx->stream, VIVS_TS_COLOR_CLEAR_VALUE,
                     src->levels[blit_info->src.level].clear_value);
+
+      source_ts_valid = true;
   } else {
      etna_set_state(ctx->stream, VIVS_TS_MEM_CONFIG, ts_mem_config);
   }
@@ -593,6 +596,7 @@ etna_try_rs_blit(struct pipe_context *pctx,
      .source_stride = src_lev->stride,
      .source_padded_width = src_lev->padded_width,
      .source_padded_height = src_lev->padded_height,
+      .source_ts_valid = source_ts_valid,
      .dest_format = translate_rs_format(dst_format),
      .dest_tiling = dst->layout,
      .dest = dst->bo,
--- a/src/gallium/drivers/etnaviv/etnaviv_emit.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c
@@ -171,6 +171,10 @@ etna_submit_rs_state(struct etna_context *ctx,
   struct etna_cmd_stream *stream = ctx->stream;
   struct etna_coalesce coalesce;

+   if (cs->RS_KICKER_INPLACE && !cs->source_ts_valid)
+      /* Inplace resolve is no-op if TS is not configured */
+      return;
+
   ctx->stats.rs_operations++;

   if (cs->RS_KICKER_INPLACE) {
--- a/src/gallium/drivers/etnaviv/etnaviv_rs.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_rs.c
@@ -133,6 +133,7 @@ etna_compile_rs_state(struct etna_context *ctx, struct compiled_rs_state *cs,
      /* Total number of tiles (same as for autodisable) */
      cs->RS_KICKER_INPLACE = rs->source_padded_width * rs->source_padded_height / 16;
   }
+   cs->source_ts_valid = rs->source_ts_valid;
 }

 void
--- a/src/gallium/drivers/etnaviv/etnaviv_rs.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_rs.h
@@ -33,6 +33,7 @@
 struct rs_state {
   uint8_t downsample_x : 1; /* Downsample in x direction */
   uint8_t downsample_y : 1; /* Downsample in y direction */
+   uint8_t source_ts_valid : 1;

   uint8_t source_format; /* RS_FORMAT_XXX */
   uint8_t source_tiling; /* ETNA_LAYOUT_XXX */
@@ -61,6 +62,7 @@ struct rs_state {

 /* treat this as opaque structure */
 struct compiled_rs_state {
+   uint8_t source_ts_valid : 1;
   uint32_t RS_CONFIG;
   uint32_t RS_SOURCE_STRIDE;
   uint32_t RS_DEST_STRIDE;
--- a/src/gallium/drivers/i915/i915_state_derived.c
+++ b/src/gallium/drivers/i915/i915_state_derived.c
@@ -216,6 +216,23 @@ void i915_update_derived(struct i915_context *i915)
   if (I915_DBG_ON(DBG_ATOMS))
      i915_dump_dirty(i915, __FUNCTION__);

+   if (!i915->fs) {
+      i915->dirty &= ~(I915_NEW_FS_CONSTANTS | I915_NEW_FS);
+      i915->hardware_dirty &= ~(I915_HW_PROGRAM | I915_HW_CONSTANTS);
+   }
+
+   if (!i915->vs)
+      i915->dirty &= ~I915_NEW_VS;
+
+   if (!i915->blend)
+      i915->dirty &= ~I915_NEW_BLEND;
+
+   if (!i915->rasterizer)
+      i915->dirty &= ~I915_NEW_RASTERIZER;
+
+   if (!i915->depth_stencil)
+      i915->dirty &= ~I915_NEW_DEPTH_STENCIL;
+   
   for (i = 0; atoms[i]; i++)
      if (atoms[i]->dirty & i915->dirty)
         atoms[i]->update(i915);
--- a/src/gallium/drivers/i915/i915_state_dynamic.c
+++ b/src/gallium/drivers/i915/i915_state_dynamic.c
@@ -213,7 +213,8 @@ static void upload_STIPPLE(struct i915_context *i915)

   /* I915_NEW_RASTERIZER
    */
-   st[1] |= i915->rasterizer->st;
+   if (i915->rasterizer)
+      st[1] |= i915->rasterizer->st;

   /* I915_NEW_STIPPLE
    */
--- a/src/gallium/drivers/i915/i915_state_immediate.c
+++ b/src/gallium/drivers/i915/i915_state_immediate.c
@@ -168,11 +168,13 @@ static void upload_S6(struct i915_context *i915)

   /* I915_NEW_BLEND
    */
-   LIS6 |= i915->blend->LIS6;
+   if (i915->blend)
+      LIS6 |= i915->blend->LIS6;

   /* I915_NEW_DEPTH
    */
-   LIS6 |= i915->depth_stencil->depth_LIS6;
+   if (i915->depth_stencil)
+      LIS6 |= i915->depth_stencil->depth_LIS6;

   set_immediate(i915, I915_IMMEDIATE_S6, LIS6);
 }
--- a/src/gallium/drivers/i915/i915_state_static.c
+++ b/src/gallium/drivers/i915/i915_state_static.c
@@ -216,7 +216,7 @@ static void update_dst_buf_vars(struct i915_context *i915)
      zformat = translate_depth_format(depth_surface->format);

      if (is->is_i945 && tex->tiling != I915_TILE_NONE
-            && !i915->fs->info.writes_z)
+          && (i915->fs && !i915->fs->info.writes_z))
         early_z = CLASSIC_EARLY_DEPTH;
   } else
      zformat = 0;
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -711,22 +711,24 @@ void alu_group_tracker::update_flags(alu_node* n) {
 }

 int post_scheduler::run() {
-	run_on(sh.root);
-	return 0;
+	return run_on(sh.root) ? 0 : 1;
 }

-void post_scheduler::run_on(container_node* n) {
-
+bool post_scheduler::run_on(container_node* n) {
+	int r = true;
 	for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
 		if (I->is_container()) {
 			if (I->subtype == NST_BB) {
 				bb_node* bb = static_cast<bb_node*>(*I);
-				schedule_bb(bb);
+				r = schedule_bb(bb);
 			} else {
-				run_on(static_cast<container_node*>(*I));
+				r = run_on(static_cast<container_node*>(*I));
 			}
+			if (!r)
+				break;
 		}
 	}
+	return r;
 }

 void post_scheduler::init_uc_val(container_node *c, value *v) {
@@ -758,7 +760,7 @@ unsigned post_scheduler::init_ucm(container_node *c, node *n) {
 	return F == ucm.end() ? 0 : F->second;
 }

-void post_scheduler::schedule_bb(bb_node* bb) {
+bool post_scheduler::schedule_bb(bb_node* bb) {
 	PSC_DUMP(
 		sblog << "scheduling BB " << bb->id << "\n";
 		if (!pending.empty())
@@ -791,8 +793,10 @@ void post_scheduler::schedule_bb(bb_node* bb) {

 		if (n->is_alu_clause()) {
 			n->remove();
-			process_alu(static_cast<container_node*>(n));
-			continue;
+			bool r = process_alu(static_cast<container_node*>(n));
+			if (r)
+				continue;
+			return false;
 		}

 		n->remove();
@@ -800,6 +804,7 @@ void post_scheduler::schedule_bb(bb_node* bb) {
 	}

 	this->cur_bb = NULL;
+	return true;
 }

 void post_scheduler::init_regmap() {
@@ -933,10 +938,10 @@ void post_scheduler::process_fetch(container_node *c) {
 	cur_bb->push_front(c);
 }

-void post_scheduler::process_alu(container_node *c) {
+bool post_scheduler::process_alu(container_node *c) {

 	if (c->empty())
-		return;
+		return true;

 	ucm.clear();
 	alu.reset();
@@ -973,7 +978,7 @@ void post_scheduler::process_alu(container_node *c) {
 		}
 	}

-	schedule_alu(c);
+	return schedule_alu(c);
 }

 void post_scheduler::update_local_interferences() {
@@ -1135,15 +1140,20 @@ void post_scheduler::emit_clause() {
 	emit_index_registers();
 }

-void post_scheduler::schedule_alu(container_node *c) {
+bool post_scheduler::schedule_alu(container_node *c) {

 	assert(!ready.empty() || !ready_copies.empty());

-	while (1) {
-
+	bool improving = true;
+	int last_pending = pending.count();
+	while (improving) {
 		prev_regmap = regmap;
-
 		if (!prepare_alu_group()) {
+
+			int new_pending = pending.count();
+			improving = (new_pending < last_pending) || (last_pending == 0);
+			last_pending = new_pending;
+
 			if (alu.current_idx[0] || alu.current_idx[1]) {
 				regmap = prev_regmap;
 				emit_clause();
@@ -1186,6 +1196,7 @@ void post_scheduler::schedule_alu(container_node *c) {
 		dump::dump_op_list(&pending);
 		assert(!"unscheduled pending instructions");
 	}
+	return improving;
 }

 void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) {
--- a/src/gallium/drivers/r600/sb/sb_sched.h
+++ b/src/gallium/drivers/r600/sb/sb_sched.h
@@ -267,14 +267,14 @@ public:
 		live(), ucm(), alu(sh),	regmap(), cleared_interf() {}

 	virtual int run();
-	void run_on(container_node *n);
-	void schedule_bb(bb_node *bb);
+	bool run_on(container_node *n);
+	bool schedule_bb(bb_node *bb);

 	void load_index_register(value *v, unsigned idx);
 	void process_fetch(container_node *c);

-	void process_alu(container_node *c);
-	void schedule_alu(container_node *c);
+	bool process_alu(container_node *c);
+	bool schedule_alu(container_node *c);
 	bool prepare_alu_group();

 	void release_op(node *n);
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -302,8 +302,7 @@ void si_nir_scan_shader(const struct nir_shader *nir,
 	info->num_written_clipdistance = nir->info.clip_distance_array_size;
 	info->num_written_culldistance = nir->info.cull_distance_array_size;
 	info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
-	info->culldist_writemask = u_bit_consecutive(info->num_written_clipdistance,
-						     info->num_written_culldistance);
+	info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance);

 	if (info->processor == PIPE_SHADER_FRAGMENT)
 		info->uses_kill = nir->info.fs.uses_discard;
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -68,8 +68,9 @@ LOCAL_SHARED_LIBRARIES += $(sort $(GALLIUM_SHARED_LIBS))
 ifneq ($(filter 5 6 7, $(MESA_ANDROID_MAJOR_VERSION)),)
 LOCAL_POST_INSTALL_CMD := \
 	$(foreach l, lib $(if $(filter true,$(TARGET_IS_64_BIT)),lib64), \
-	  mkdir -p $(TARGET_OUT)/$(l)/$(MESA_DRI_MODULE_REL_PATH); \
-	  $(foreach d, $(GALLIUM_TARGET_DRIVERS), ln -sf gallium_dri.so $(TARGET_OUT)/$(l)/$(MESA_DRI_MODULE_REL_PATH)/$(d)_dri.so;) \
+	  $(eval MESA_DRI_MODULE_PATH := $(TARGET_OUT_VENDOR)/$(l)/$(MESA_DRI_MODULE_REL_PATH)) \
+	  mkdir -p $(MESA_DRI_MODULE_PATH); \
+	  $(foreach d, $(GALLIUM_TARGET_DRIVERS), ln -sf gallium_dri.so $(MESA_DRI_MODULE_PATH)/$(d)_dri.so;) \
 	)
 else
 LOCAL_MODULE_SYMLINKS := $(foreach d, $(GALLIUM_TARGET_DRIVERS), $(d)_dri.so)
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2092,10 +2092,10 @@ fs_visitor::assign_constant_locations()
    */
   uint32_t *param = stage_prog_data->param;
   stage_prog_data->nr_params = num_push_constants;
-   stage_prog_data->param = ralloc_array(NULL, uint32_t, num_push_constants);
+   stage_prog_data->param = ralloc_array(mem_ctx, uint32_t, num_push_constants);
   if (num_pull_constants > 0) {
      stage_prog_data->nr_pull_params = num_pull_constants;
-      stage_prog_data->pull_param = ralloc_array(NULL, uint32_t,
+      stage_prog_data->pull_param = ralloc_array(mem_ctx, uint32_t,
                                                 num_pull_constants);
   }

@@ -6166,6 +6166,31 @@ fs_visitor::run_gs()
   return !failed;
 }

+/* From the SKL PRM, Volume 16, Workarounds:
+ *
+ *   0877  3D   Pixel Shader Hang possible when pixel shader dispatched with
+ *              only header phases (R0-R2)
+ *
+ *   WA: Enable a non-header phase (e.g. push constant) when dispatch would
+ *       have been header only.
+ *
+ * Instead of enabling push constants one can alternatively enable one of the
+ * inputs. Here one simply chooses "layer" which shouldn't impose much
+ * overhead.
+ */
+static void
+gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data)
+{
+   if (wm_prog_data->num_varying_inputs)
+      return;
+
+   if (wm_prog_data->base.curb_read_length)
+      return;
+
+   wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
+   wm_prog_data->num_varying_inputs = 1;
+}
+
 bool
 fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
 {
@@ -6229,6 +6254,10 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
      optimize();

      assign_curb_setup();
+
+      if (devinfo->gen >= 9)
+         gen9_ps_header_only_workaround(wm_prog_data);
+
      assign_urb_setup();

      fixup_3src_null_dest();
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -875,7 +875,6 @@ brw_draw_prims(struct gl_context *ctx,
   struct brw_context *brw = brw_context(ctx);
   const struct gl_vertex_array **arrays = ctx->Array._DrawArrays;
   int predicate_state = brw->predicate.state;
-   int combine_op = MI_PREDICATE_COMBINEOP_SET;
   struct brw_transform_feedback_object *xfb_obj =
      (struct brw_transform_feedback_object *) gl_xfb_obj;

@@ -919,49 +918,35 @@ brw_draw_prims(struct gl_context *ctx,
    * to it.
    */

-    if (brw->draw.draw_params_count_bo &&
-        predicate_state == BRW_PREDICATE_STATE_USE_BIT) {
-      /* We need to empty the MI_PREDICATE_DATA register since it might
-       * already be set.
-       */
-
-      BEGIN_BATCH(4);
-      OUT_BATCH(MI_PREDICATE_DATA);
-      OUT_BATCH(0u);
-      OUT_BATCH(MI_PREDICATE_DATA + 4);
-      OUT_BATCH(0u);
-      ADVANCE_BATCH();
-
-      /* We need to combine the results of both predicates.*/
-      combine_op = MI_PREDICATE_COMBINEOP_AND;
-   }
-
   for (i = 0; i < nr_prims; i++) {
      /* Implementation of ARB_indirect_parameters via predicates */
      if (brw->draw.draw_params_count_bo) {
-         struct brw_bo *draw_id_bo = NULL;
-         uint32_t draw_id_offset;
-
-         intel_upload_data(brw, &prims[i].draw_id, 4, 4, &draw_id_bo,
-                           &draw_id_offset);
-
         brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);

+         /* Upload the current draw count from the draw parameters buffer to
+          * MI_PREDICATE_SRC0.
+          */
         brw_load_register_mem(brw, MI_PREDICATE_SRC0,
                               brw->draw.draw_params_count_bo,
                               brw->draw.draw_params_count_offset);
-         brw_load_register_mem(brw, MI_PREDICATE_SRC1, draw_id_bo,
-                               draw_id_offset);
+         /* Zero the top 32-bits of MI_PREDICATE_SRC0 */
+         brw_load_register_imm32(brw, MI_PREDICATE_SRC0 + 4, 0);
+         /* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
+         brw_load_register_imm64(brw, MI_PREDICATE_SRC1, prims[i].draw_id);

         BEGIN_BATCH(1);
-         OUT_BATCH(GEN7_MI_PREDICATE |
-                   MI_PREDICATE_LOADOP_LOADINV | combine_op |
-                   MI_PREDICATE_COMPAREOP_DELTAS_EQUAL);
+         if (i == 0 && brw->predicate.state != BRW_PREDICATE_STATE_USE_BIT) {
+            OUT_BATCH(GEN7_MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
+                      MI_PREDICATE_COMBINEOP_SET |
+                      MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+         } else {
+            OUT_BATCH(GEN7_MI_PREDICATE |
+                      MI_PREDICATE_LOADOP_LOAD | MI_PREDICATE_COMBINEOP_XOR |
+                      MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
+         }
         ADVANCE_BATCH();

         brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
-
-         brw_bo_unreference(draw_id_bo);
      }

      brw_draw_single_prim(ctx, arrays, &prims[i], i, xfb_obj, stream,
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -241,6 +241,27 @@ intel_miptree_supports_hiz(const struct brw_context *brw,
   }
 }

+/**
+ * Return true if the format that will be used to access the miptree is
+ * CCS_E-compatible with the miptree's linear/non-sRGB format.
+ *
+ * Why use the linear format? Well, although the miptree may be specified with
+ * an sRGB format, the usage of that color space/format can be toggled. Since
+ * our HW tends to support more linear formats than sRGB ones, we use this
+ * format variant for check for CCS_E compatibility.
+ */
+static bool
+format_ccs_e_compat_with_miptree(const struct gen_device_info *devinfo,
+                                 const struct intel_mipmap_tree *mt,
+                                 enum isl_format access_format)
+{
+   assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E);
+
+   mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
+   enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format);
+   return isl_formats_are_ccs_e_compatible(devinfo, isl_format, access_format);
+}
+
 static bool
 intel_miptree_supports_ccs_e(struct brw_context *brw,
                             const struct intel_mipmap_tree *mt)
@@ -2549,6 +2570,7 @@ can_texture_with_ccs(struct brw_context *brw,
   if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
      return false;

+   /* TODO: Replace with format_ccs_e_compat_with_miptree for better perf. */
   if (!isl_formats_are_ccs_e_compatible(&brw->screen->devinfo,
                                         mt->surf.format, view_format)) {
      perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
@@ -2666,8 +2688,11 @@ intel_miptree_render_aux_usage(struct brw_context *brw,
      return mt->mcs_buf ? ISL_AUX_USAGE_CCS_D : ISL_AUX_USAGE_NONE;

   case ISL_AUX_USAGE_CCS_E: {
-      /* If the format supports CCS_E, then we can just use it */
-      if (isl_format_supports_ccs_e(&brw->screen->devinfo, render_format))
+      /* If the format supports CCS_E and is compatible with the miptree,
+       * then we can use it.
+       */
+      if (format_ccs_e_compat_with_miptree(&brw->screen->devinfo,
+                                           mt, render_format))
         return ISL_AUX_USAGE_CCS_E;

      /* Otherwise, we have to fall back to CCS_D */
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -1110,7 +1110,7 @@ disk_cache_get(struct disk_cache *cache, const cache_key key, size_t *size)
       * TODO: pass the metadata back to the caller and do some basic
       * validation.
       */
-      cache_item_md_size += sizeof(cache_key);
+      cache_item_md_size += num_keys * sizeof(cache_key);
      ret = lseek(fd, num_keys * sizeof(cache_key), SEEK_CUR);
      if (ret == -1)
         goto fail;
Author	SHA1	Message	Date
Emil Velikov	19b62847e0	pdate version to 17.3.0-rc3 Signed-off-by: Emil Velikov <emil.velikov@collabora.com>	2017-11-07 11:51:45 +00:00
Plamena Manolova	d5cc7e47a8	i965: Fix ARB_indirect_parameters logic. This patch modifies the ARB_indirect_parameters logic in brw_draw_prims, so that our implementation isn't affected if another application attempts to use predicates. Previously we were using a predicate with a DELTAS_EQUAL comparison operation and relying on the MI_PREDICATE_DATA register being 0. Our code to initialize MI_PREDICATE_DATA to 0 was incorrect, so we were accidentally using whatever value was written there. Because the kernel does not initialize the MI_PREDICATE_DATA register on hardware context creation, we might inherit the value from whatever context was last running on the GPU (likely another process). The Haswell command parser also does not currently allow us to write the MI_PREDICATE_DATA register. Rather than fixing this and requiring an updated kernel, we switch to a different approach which uses a SRCS_EQUAL predicate that makes no assumptions about the states of any of the predicate registers. Fixes Piglit's spec/arb_indirect_parameters/tf-count-arrays test. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103085 Signed-off-by: Plamena Manolova <plamena.manolova@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (cherry picked from commit `048d4c45c9`)	2017-11-03 18:30:32 +00:00
Dave Airlie	9b44ef94b4	i915g: make gears run again. We need to validate some structs exist before we dirty the states, and avoid the problem in some other places. Fixes: `e027935a7` ("st/mesa: don't update unrelated states in non-draw calls such as Clear") (cherry picked from commit `cc69f2385e`)	2017-11-03 18:21:59 +00:00
Jordan Justen	a12ca3b231	disk_cache: Fix issue reading GLSL metadata This would cause the read of the metadata content to fail, which would prevent the linking from being skipped. Seen on Rocket League with i965 shader cache. Fixes: `b86ecea344` "util/disk_cache: write cache item metadata to disk" Cc: Timothy Arceri <tarceri@itsqueeze.com> Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (cherry picked from commit `e5b141634c`)	2017-11-03 18:21:59 +00:00
Timothy Arceri	9710fbbcdf	radeonsi: fix culldist_writemask in nir path The shared si_create_shader_selector() code already offsets the mask. Fixes the following piglit tests: arb_cull_distance/clip-cull-3.shader_test arb_cull_distance/clip-cull-4.shader_test Fixes: `29d7bdd179` (radeonsi: scan NIR shaders to obtain required info) Reviewed-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `e80bbd6f52`)	2017-11-03 18:21:59 +00:00
Timothy Arceri	b4bf9f6a41	radv: add cache items to in memory cache when reading from disk Otherwise we will leak them, load duplicates from disk rather than memory and never write items loaded from disk to the apps pipeline cache. Fixes: `fd24be134f` 'radv: make use of on-disk cache' Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `1e84e53712`) Squashed with commit: radv: use correct alloc function when loading from disk Fixes regression in: dEQP-VK.api.object_management.alloc_callback_fail.graphics_pipeline Fixes: `1e84e53712` "radv: add cache items to in memory cache when reading from disk" Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `e92405c55a`)	2017-11-03 18:21:42 +00:00
Bas Nieuwenhuizen	2516c3217d	radv: Don't expose heaps with 0 memory. It confuses CTS. This pregenerates the heap info into the physical device, so we can use it for translating contiguous indices into our "standard" ones. This also makes the WSI a bit smarter in case the first preferred heap does not exist. Reviewed-by: Dave Airlie <airlied@redhat.com> CC: <mesa-stable@lists.freedesktop.org> (cherry picked from commit `806721429a`)	2017-11-03 18:20:06 +00:00
Jason Ekstrand	383b360348	intel/fs: Alloc pull constants off mem_ctx It doesn't actually matter since the only user of push constants, i965, ralloc_steals it back to NULL but it's more consistent and probably fixes memory leaks in some error cases. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit `7b4387519c`)	2017-11-03 18:20:04 +00:00
Wladimir J. van der Laan	71571aab14	etnaviv: don't do resolve-in-place without valid TS GC3000 resolve-in-place assumes that the TS state is configured. If it is not, this will result in MMU errors. This is especially apparent when using glGenMipmaps(). Fixes: `78ade65956` ("etnaviv: Do GC3000 resolve-in-place when possible") Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Wladimir J. van der Laan <laanwj@gmail.com> Tested-by: Chris Healy <cphealy@gmail.com> Signed-off-by: Lucas Stach <l.stach@pengutronix.de> (cherry picked from commit `8fbd82f464`)	2017-11-03 18:20:01 +00:00
Gert Wollny	13bfb83b31	r600/sb: bail out if prepare_alu_group() doesn't find a proper scheduling It is possible that the optimizer ends up in an infinite loop in post_scheduler::schedule_alu(), because post_scheduler::prepare_alu_group() does not find a proper scheduling. This can be deducted from pending.count() being larger than zero and not getting smaller. This patch works around this problem by signalling this failure so that the optimizers bails out and the un-optimized shader is used. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103142 Cc: <mesa-stable@lists.freedesktop.org> Signed-off-by: Gert Wollny <gw.fossdev@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com> (cherry picked from commit `69eee511c6`)	2017-11-03 18:19:59 +00:00
Neil Roberts	4c82f2c3a9	nir/opt_intrinsics: Fix values for gl_SubGroupG{e,t}MaskARB Previously the values were calculated by just shifting ~0 by the invocation ID. This would end up including bits that are higher than gl_SubGroupSizeARB. The corresponding CTS test effectively requires that these high bits be zero so it was failing. There is a Piglit test as well but this appears to checking the wrong values so it passes. For the two greater-than bitmasks, this patch adds an extra mask with (~0>>(64-gl_SubGroupSizeARB)) to force these bits to zero. Fixes: KHR-GL45.shader_ballot_tests.ShaderBallotBitmasks Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102680#c3 Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Neil Roberts <nroberts@igalia.com> (cherry picked from commit `b697ece10a`)	2017-11-03 18:19:57 +00:00
Nanley Chery	14c40ebd0f	i965: Check CCS_E compatibility for texture view rendering Only use CCS_E to render to a texture that is CCS_E-compatible with the original texture's miptree (linear) format. This prevents render operations from writing data that can't be decoded with the original miptree format. On Gen10, with the new CCS_E-enabled formats handled, this enables the driver to pass the arb_texture_view-rendering-formats piglit test. v2. Add a TODO for texturing. (Jason) Cc: <mesa-stable@lists.freedesktop.org> Signed-off-by: Nanley Chery <nanley.g.chery@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> (cherry picked from commit `9e849eb8bb`)	2017-11-03 18:19:54 +00:00
Mauro Rossi	77839e9ba8	Android: move drivers' symlinks to /vendor (v2) Having moved gallium_dri.so library to /vendor/lib/dri also symlinks need to be coherently created using TARGET_OUT_VENDOR instead of TARGET_OUT or all non Intel drivers will not be loaded with Android N and earlier, thus causing SurfaceFlinger SIGABRT (v2) simplification of post install command Fixes: `c3f75d483c` ("Android: move libraries to /vendor") Cc: 17.3 <mesa-stable@lists.freedesktop.org> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> (v1) Reviewed-by: Rob Herring <robh@kernel.org> (v1) Reviewed-by: Emil Velikov <emil.velikov@collabora.com> (cherry picked from commit `7dae419aa7`)	2017-11-03 18:19:52 +00:00
Tapani Pälli	7826bc9538	i965: fix blorp stage_prog_data->param leak Patch uses mem_ctx for allocation to ensure param array gets freed later. ==6164== 48 bytes in 1 blocks are definitely lost in loss record 61 of 193 ==6164== at 0x4C2EB6B: malloc (vg_replace_malloc.c:299) ==6164== by 0x12E31C6C: ralloc_size (ralloc.c:121) ==6164== by 0x130189F1: fs_visitor::assign_constant_locations() (brw_fs.cpp:2095) ==6164== by 0x13022D32: fs_visitor::optimize() (brw_fs.cpp:5715) ==6164== by 0x13024D5A: fs_visitor::run_fs(bool, bool) (brw_fs.cpp:6229) ==6164== by 0x1302549A: brw_compile_fs (brw_fs.cpp:6570) ==6164== by 0x130C4B07: blorp_compile_fs (blorp.c:194) ==6164== by 0x130D384B: blorp_params_get_clear_kernel (blorp_clear.c:79) ==6164== by 0x130D3C56: blorp_fast_clear (blorp_clear.c:332) ==6164== by 0x12EFA439: do_single_blorp_clear (brw_blorp.c:1261) ==6164== by 0x12EFC4AF: brw_blorp_clear_color (brw_blorp.c:1326) ==6164== by 0x12EFF72B: brw_clear (brw_clear.c:297) Fixes: `8d90e28839` ("intel/compiler: Allocate pull_param in assign_constant_locations") Signed-off-by: Tapani Pälli <tapani.palli@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit `446c5726ec`)	2017-11-03 18:19:39 +00:00
Topi Pohjolainen	f0951a6aa9	intel/compiler/gen9: Pixel shader header only workaround Fixes intermittent GPU hangs on Broxton with an Intel internal test case. There are plenty of similar fragment shaders in piglit that do not use any varyings and any uniforms. According to the documentation special timing is needed between pipeline stages. Apparently we just don't hit that with piglit. Even with the failing test case one doesn't always get the hang. Moreover, according to the error states the hang happens significantly later than the execution of the problematic shader. There are multiple render cycles (primitive submissions) in between. I've also seen error states where the ACTHD points outside the batch. Almost as if the hardware writes somewhere that gets used later on. That would also explain why piglit doesn't suffer from this - most tests kick off one render cycle and any corruption is left unseen. v2 (Ken): Instead of enabling push constants, enable one of the inputs (PSIZ). v3 (Ken, Jason): Use LAYER instead making vulkan emit_3dstate_sbe() happy. Cc: "17.3 17.2" <mesa-stable@lists.freedesktop.org> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com> (cherry picked from commit `97e01adfd5`)	2017-11-03 18:19:36 +00:00