Update version to 17.3.0-rc2

wayland-egl: fix wayland cflags
Fixes: 80bfff5c4f "wayland-egl: adds CFLAGS for wayland.egl.h include" Suggested-by: Daniel Stone <daniel@fooishbar.org> Signed-off-by: Eric Engestrom <eric.engestrom@imgtec.com> Acked-by: Emil Velikov <emil.velikov@collabora.com> Acked-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de> (cherry picked from commit 866c8a94d4)
2017-10-30 13:52:46 +00:00 · 2017-10-27 21:33:04 +01:00 · 2017-10-27 21:33:04 +01:00 · 2017-10-27 21:33:04 +01:00 · 2017-10-27 21:33:04 +01:00 · 2017-10-27 21:33:04 +01:00
47 changed files with 425 additions and 242 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-17.3.0-devel
+17.3.0-rc2
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3631,15 +3631,17 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
 	LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false);
 	MAYBE_UNUSED int length;

+	bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
+
 	switch (instr->intrinsic) {
 	case nir_intrinsic_image_atomic_add:
 		atomic_name = "add";
 		break;
 	case nir_intrinsic_image_atomic_min:
-		atomic_name = "smin";
+		atomic_name = is_unsigned ? "umin" : "smin";
 		break;
 	case nir_intrinsic_image_atomic_max:
-		atomic_name = "smax";
+		atomic_name = is_unsigned ? "umax" : "smax";
 		break;
 	case nir_intrinsic_image_atomic_and:
 		atomic_name = "and";
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -927,9 +927,11 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
 		    in->numSamples == 1) {
 			ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
 			ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
+			ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};

 			din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
 			dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
+			dout.pMipInfo = meta_mip_info;

 			din.dccKeyFlags.pipeAligned = 1;
 			din.dccKeyFlags.rbAligned = 1;
@@ -955,21 +957,37 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
 			surf->dcc_alignment = dout.dccRamBaseAlign;
 			surf->num_dcc_levels = in->numMipLevels;

-			/* Disable DCC for the smallest levels. It seems to be
-			 * required for DCC readability between CB and shaders
-			 * when TC L2 isn't flushed. This was guessed.
+			/* Disable DCC for levels that are in the mip tail.
+			 *
+			 * There are two issues that this is intended to
+			 * address:
+			 *
+			 * 1. Multiple mip levels may share a cache line. This
+			 *    can lead to corruption when switching between
+			 *    rendering to different mip levels because the
+			 *    RBs don't maintain coherency.
+			 *
+			 * 2. Texturing with metadata after rendering sometimes
+			 *    fails with corruption, probably for a similar
+			 *    reason.
+			 *
+			 * Working around these issues for all levels in the
+			 * mip tail may be overly conservative, but it's what
+			 * Vulkan does.
 			 *
 			 * Alternative solutions that also work but are worse:
-			 * - Disable DCC.
+			 * - Disable DCC entirely.
 			 * - Flush TC L2 after rendering.
 			 */
-			for (unsigned i = 1; i < in->numMipLevels; i++) {
-				if (mip_info[i].pitch *
-				    mip_info[i].height * surf->bpe < 1024) {
+			for (unsigned i = 0; i < in->numMipLevels; i++) {
+				if (meta_mip_info[i].inMiptail) {
 					surf->num_dcc_levels = i;
 					break;
 				}
 			}
+
+			if (!surf->num_dcc_levels)
+				surf->dcc_size = 0;
 		}

 		/* FMASK */
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -152,6 +152,8 @@ radv_physical_device_init(struct radv_physical_device *device,
 		goto fail;
 	}

+	device->name = get_chip_name(device->rad_info.family);
+
 	if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
 		radv_finish_wsi(device);
 		device->ws->destroy(device->ws);
@@ -168,12 +170,11 @@ radv_physical_device_init(struct radv_physical_device *device,
 	/* The gpu id is already embeded in the uuid so we just pass "radv"
 	 * when creating the cache.
 	 */
-	char buf[VK_UUID_SIZE + 1];
-	disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE);
-	device->disk_cache = disk_cache_create("radv", buf, shader_env_flags);
+	char buf[VK_UUID_SIZE * 2 + 1];
+	disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
+	device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);

 	fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
-	device->name = get_chip_name(device->rad_info.family);

 	radv_get_driver_uuid(&device->device_uuid);
 	radv_get_device_uuid(&device->rad_info, &device->device_uuid);
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1766,6 +1766,13 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 						    stage ? stage->pName : "main", i,
 						    stage ? stage->pSpecializationInfo : NULL);
 		pipeline->active_stages |= mesa_to_vk_shader_stage(i);
+		/* We don't want to alter meta shaders IR directly so clone it
+		 * first.
+		 */
+		if (nir[i]->info.name) {
+			nir[i] = nir_shader_clone(NULL, nir[i]);
+		}
+
 	}

 	if (nir[MESA_SHADER_TESS_CTRL]) {
@@ -1779,6 +1786,14 @@ void radv_create_shaders(struct radv_pipeline *pipeline,

 	radv_link_shaders(pipeline, nir);

+	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+		if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS))
+			continue;
+
+		if (modules[i])
+			nir_print_shader(nir[i], stderr);
+	}
+
 	if (nir[MESA_SHADER_FRAGMENT]) {
 		if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
 			pipeline->shaders[MESA_SHADER_FRAGMENT] =
@@ -1863,7 +1878,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,

 	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
 		free(codes[i]);
-		if (modules[i] && !modules[i]->nir && !pipeline->device->trace_bo)
+		if (modules[i] && !pipeline->device->trace_bo)
 			ralloc_free(nir[i]);
 	}

--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -231,6 +231,8 @@ radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
 			p += entry->code_sizes[i];

 			entry->variants[i] = variant;
+		} else if (entry->code_sizes[i]) {
+			p += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
 		}

 	}
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -291,9 +291,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
 	nir_remove_dead_variables(nir, nir_var_local);
 	radv_optimize_nir(nir);

-	if (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)
-		nir_print_shader(nir, stderr);
-
 	return nir;
 }

--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -85,6 +85,7 @@ LIBGLSL_FILES = \
 	glsl/lower_buffer_access.cpp \
 	glsl/lower_buffer_access.h \
 	glsl/lower_const_arrays_to_uniforms.cpp \
+	glsl/lower_cs_derived.cpp \
 	glsl/lower_discard.cpp \
 	glsl/lower_discard_flow.cpp \
 	glsl/lower_distance.cpp \
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -1295,15 +1295,10 @@ builtin_variable_generator::generate_cs_special_vars()
                       uvec3_t, "gl_LocalGroupSizeARB");
   }

-   if (state->ctx->Const.LowerCsDerivedVariables) {
-      add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
-      add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
-   } else {
-      add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
-                       uvec3_t, "gl_GlobalInvocationID");
-      add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
-                       uint_t, "gl_LocalInvocationIndex");
-   }
+   add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
+                    uvec3_t, "gl_GlobalInvocationID");
+   add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
+                    uint_t, "gl_LocalInvocationIndex");
 }


@@ -1474,84 +1469,3 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
      break;
   }
 }
-
-
-/**
- * Initialize compute shader variables with values that are derived from other
- * compute shader variable.
- */
-static void
-initialize_cs_derived_variables(gl_shader *shader,
-                                ir_function_signature *const main_sig)
-{
-   assert(shader->Stage == MESA_SHADER_COMPUTE);
-
-   ir_variable *gl_GlobalInvocationID =
-      shader->symbols->get_variable("gl_GlobalInvocationID");
-   assert(gl_GlobalInvocationID);
-   ir_variable *gl_WorkGroupID =
-      shader->symbols->get_variable("gl_WorkGroupID");
-   assert(gl_WorkGroupID);
-   ir_variable *gl_WorkGroupSize =
-      shader->symbols->get_variable("gl_WorkGroupSize");
-   if (gl_WorkGroupSize == NULL) {
-      void *const mem_ctx = ralloc_parent(shader->ir);
-      gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type,
-                                                  "gl_WorkGroupSize",
-                                                  ir_var_auto);
-      gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly;
-      gl_WorkGroupSize->data.read_only = true;
-      shader->ir->push_head(gl_WorkGroupSize);
-   }
-   ir_variable *gl_LocalInvocationID =
-      shader->symbols->get_variable("gl_LocalInvocationID");
-   assert(gl_LocalInvocationID);
-
-   /* gl_GlobalInvocationID =
-    *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
-    */
-   ir_instruction *inst =
-      assign(gl_GlobalInvocationID,
-             add(mul(gl_WorkGroupID, gl_WorkGroupSize),
-                 gl_LocalInvocationID));
-   main_sig->body.push_head(inst);
-
-   /* gl_LocalInvocationIndex =
-    *    gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
-    *    gl_LocalInvocationID.y * gl_WorkGroupSize.x +
-    *    gl_LocalInvocationID.x;
-    */
-   ir_expression *index_z =
-      mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)),
-          swizzle_y(gl_WorkGroupSize));
-   ir_expression *index_y =
-      mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize));
-   ir_expression *index_y_plus_z = add(index_y, index_z);
-   operand index_x(swizzle_x(gl_LocalInvocationID));
-   ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
-   ir_variable *gl_LocalInvocationIndex =
-      shader->symbols->get_variable("gl_LocalInvocationIndex");
-   assert(gl_LocalInvocationIndex);
-   inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
-   main_sig->body.push_head(inst);
-}
-
-
-/**
- * Initialize builtin variables with values based on other builtin variables.
- * These are initialized in the main function.
- */
-void
-_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
-                                        gl_shader *shader)
-{
-   /* We only need to set CS variables currently. */
-   if (shader->Stage == MESA_SHADER_COMPUTE &&
-       ctx->Const.LowerCsDerivedVariables) {
-      ir_function_signature *const main_sig =
-         _mesa_get_main_function_signature(shader->symbols);
-
-      if (main_sig != NULL)
-         initialize_cs_derived_variables(shader, main_sig);
-   }
-}
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -2009,8 +2009,6 @@ opt_shader_and_create_symbol_table(struct gl_context *ctx,
         break;
      }
   }
-
-   _mesa_glsl_initialize_derived_variables(ctx, shader);
 }

 void
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -2412,10 +2412,6 @@ extern void
 _mesa_glsl_initialize_variables(exec_list *instructions,
 				struct _mesa_glsl_parse_state *state);

-extern void
-_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
-                                        gl_shader *shader);
-
 extern void
 reparent_ir(exec_list *list, void *mem_ctx);

--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -166,6 +166,7 @@ void optimize_dead_builtin_variables(exec_list *instructions,
 bool lower_tess_level(gl_linked_shader *shader);

 bool lower_vertex_id(gl_linked_shader *shader);
+bool lower_cs_derived(gl_linked_shader *shader);
 bool lower_blend_equation_advanced(gl_linked_shader *shader);

 bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -2374,6 +2374,9 @@ link_intrastage_shaders(void *mem_ctx,
   if (ctx->Const.VertexID_is_zero_based)
      lower_vertex_id(linked);

+   if (ctx->Const.LowerCsDerivedVariables)
+      lower_cs_derived(linked);
+
 #ifdef DEBUG
   /* Compute the source checksum. */
   linked->SourceChecksum = 0;
--- a/src/compiler/glsl/lower_cs_derived.cpp
+++ b/src/compiler/glsl/lower_cs_derived.cpp
@@ -0,0 +1,234 @@
+/*
+ * Copyright © 2017 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_cs_derived.cpp
+ *
+ * For hardware that does not support the gl_GlobalInvocationID and
+ * gl_LocalInvocationIndex system values, replace them with fresh
+ * globals. Note that we can't rely on gl_WorkGroupSize or
+ * gl_LocalGroupSizeARB being available, since they may only have been defined
+ * in a non-main shader.
+ *
+ * [ This can happen if only a secondary shader has the layout(local_size_*)
+ *   declaration. ]
+ *
+ * This is meant to be run post-linking.
+ */
+
+#include "glsl_symbol_table.h"
+#include "ir_hierarchical_visitor.h"
+#include "ir.h"
+#include "ir_builder.h"
+#include "linker.h"
+#include "program/prog_statevars.h"
+#include "builtin_functions.h"
+
+using namespace ir_builder;
+
+namespace {
+
+class lower_cs_derived_visitor : public ir_hierarchical_visitor {
+public:
+   explicit lower_cs_derived_visitor(gl_linked_shader *shader)
+      : progress(false),
+        shader(shader),
+        local_size_variable(shader->Program->info.cs.local_size_variable),
+        gl_WorkGroupSize(NULL),
+        gl_WorkGroupID(NULL),
+        gl_LocalInvocationID(NULL),
+        gl_GlobalInvocationID(NULL),
+        gl_LocalInvocationIndex(NULL)
+   {
+      main_sig = _mesa_get_main_function_signature(shader->symbols);
+      assert(main_sig);
+   }
+
+   virtual ir_visitor_status visit(ir_dereference_variable *);
+
+   ir_variable *add_system_value(
+         int slot, const glsl_type *type, const char *name);
+   void find_sysvals();
+   void make_gl_GlobalInvocationID();
+   void make_gl_LocalInvocationIndex();
+
+   bool progress;
+
+private:
+   gl_linked_shader *shader;
+   bool local_size_variable;
+   ir_function_signature *main_sig;
+
+   ir_rvalue *gl_WorkGroupSize;
+   ir_variable *gl_WorkGroupID;
+   ir_variable *gl_LocalInvocationID;
+
+   ir_variable *gl_GlobalInvocationID;
+   ir_variable *gl_LocalInvocationIndex;
+};
+
+} /* anonymous namespace */
+
+ir_variable *
+lower_cs_derived_visitor::add_system_value(
+      int slot, const glsl_type *type, const char *name)
+{
+   ir_variable *var = new(shader) ir_variable(type, name, ir_var_system_value);
+   var->data.how_declared = ir_var_declared_implicitly;
+   var->data.read_only = true;
+   var->data.location = slot;
+   var->data.explicit_location = true;
+   var->data.explicit_index = 0;
+   shader->ir->push_head(var);
+
+   return var;
+}
+
+void
+lower_cs_derived_visitor::find_sysvals()
+{
+   if (gl_WorkGroupSize != NULL)
+      return;
+
+   ir_variable *WorkGroupSize;
+   if (local_size_variable)
+      WorkGroupSize = shader->symbols->get_variable("gl_LocalGroupSizeARB");
+   else
+      WorkGroupSize = shader->symbols->get_variable("gl_WorkGroupSize");
+   if (WorkGroupSize)
+      gl_WorkGroupSize = new(shader) ir_dereference_variable(WorkGroupSize);
+   gl_WorkGroupID = shader->symbols->get_variable("gl_WorkGroupID");
+   gl_LocalInvocationID = shader->symbols->get_variable("gl_LocalInvocationID");
+
+   /*
+    * These may be missing due to either dead code elimination, or, in the
+    * case of the group size, due to the layout being declared in a non-main
+    * shader. Re-create them.
+    */
+
+   if (!gl_WorkGroupID)
+      gl_WorkGroupID = add_system_value(
+            SYSTEM_VALUE_WORK_GROUP_ID, glsl_type::uvec3_type, "gl_WorkGroupID");
+   if (!gl_LocalInvocationID)
+      gl_LocalInvocationID = add_system_value(
+            SYSTEM_VALUE_LOCAL_INVOCATION_ID, glsl_type::uvec3_type,
+            "gl_LocalInvocationID");
+   if (!WorkGroupSize) {
+      if (local_size_variable) {
+         gl_WorkGroupSize = new(shader) ir_dereference_variable(
+               add_system_value(
+                     SYSTEM_VALUE_LOCAL_GROUP_SIZE, glsl_type::uvec3_type,
+                     "gl_LocalGroupSizeARB"));
+      } else {
+         ir_constant_data data;
+         memset(&data, 0, sizeof(data));
+         for (int i = 0; i < 3; i++)
+            data.u[i] = shader->Program->info.cs.local_size[i];
+         gl_WorkGroupSize = new(shader) ir_constant(glsl_type::uvec3_type, &data);
+      }
+   }
+}
+
+void
+lower_cs_derived_visitor::make_gl_GlobalInvocationID()
+{
+   if (gl_GlobalInvocationID != NULL)
+      return;
+
+   find_sysvals();
+
+   /* gl_GlobalInvocationID =
+    *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
+    */
+   gl_GlobalInvocationID = new(shader) ir_variable(
+         glsl_type::uvec3_type, "__GlobalInvocationID", ir_var_temporary);
+   shader->ir->push_head(gl_GlobalInvocationID);
+
+   ir_instruction *inst =
+      assign(gl_GlobalInvocationID,
+             add(mul(gl_WorkGroupID, gl_WorkGroupSize->clone(shader, NULL)),
+                 gl_LocalInvocationID));
+   main_sig->body.push_head(inst);
+}
+
+void
+lower_cs_derived_visitor::make_gl_LocalInvocationIndex()
+{
+   if (gl_LocalInvocationIndex != NULL)
+      return;
+
+   find_sysvals();
+
+   /* gl_LocalInvocationIndex =
+    *    gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
+    *    gl_LocalInvocationID.y * gl_WorkGroupSize.x +
+    *    gl_LocalInvocationID.x;
+    */
+   gl_LocalInvocationIndex = new(shader)
+      ir_variable(glsl_type::uint_type, "__LocalInvocationIndex", ir_var_temporary);
+   shader->ir->push_head(gl_LocalInvocationIndex);
+
+   ir_expression *index_z =
+      mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL))),
+          swizzle_y(gl_WorkGroupSize->clone(shader, NULL)));
+   ir_expression *index_y =
+      mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL)));
+   ir_expression *index_y_plus_z = add(index_y, index_z);
+   operand index_x(swizzle_x(gl_LocalInvocationID));
+   ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
+   ir_instruction *inst =
+      assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
+   main_sig->body.push_head(inst);
+}
+
+ir_visitor_status
+lower_cs_derived_visitor::visit(ir_dereference_variable *ir)
+{
+   if (ir->var->data.mode == ir_var_system_value &&
+       ir->var->data.location == SYSTEM_VALUE_GLOBAL_INVOCATION_ID) {
+      make_gl_GlobalInvocationID();
+      ir->var = gl_GlobalInvocationID;
+      progress = true;
+   }
+
+   if (ir->var->data.mode == ir_var_system_value &&
+       ir->var->data.location == SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) {
+      make_gl_LocalInvocationIndex();
+      ir->var = gl_LocalInvocationIndex;
+      progress = true;
+   }
+
+   return visit_continue;
+}
+
+bool
+lower_cs_derived(gl_linked_shader *shader)
+{
+   if (shader->Stage != MESA_SHADER_COMPUTE)
+      return false;
+
+   lower_cs_derived_visitor v(shader);
+   v.run(shader->ir);
+
+   return v.progress;
+}
--- a/src/compiler/glsl/meson.build
+++ b/src/compiler/glsl/meson.build
@@ -124,6 +124,7 @@ files_libglsl = files(
  'lower_buffer_access.cpp',
  'lower_buffer_access.h',
  'lower_const_arrays_to_uniforms.cpp',
+  'lower_cs_derived.cpp',
  'lower_discard.cpp',
  'lower_discard_flow.cpp',
  'lower_distance.cpp',
--- a/src/compiler/glsl/opt_dead_builtin_variables.cpp
+++ b/src/compiler/glsl/opt_dead_builtin_variables.cpp
@@ -62,23 +62,6 @@ optimize_dead_builtin_variables(exec_list *instructions,
       * information, so removing these variables from the user shader will
       * cause problems later.
       *
-       * For compute shaders, gl_GlobalInvocationID has some dependencies, so
-       * we avoid removing these dependencies.
-       *
-       * We also avoid removing gl_GlobalInvocationID at this stage because it
-       * might be used by a linked shader. In this case it still needs to be
-       * initialized by the main function.
-       *
-       *    gl_GlobalInvocationID =
-       *       gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
-       *
-       * Similarly, we initialize gl_LocalInvocationIndex in the main function:
-       *
-       *    gl_LocalInvocationIndex =
-       *       gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
-       *       gl_LocalInvocationID.y * gl_WorkGroupSize.x +
-       *       gl_LocalInvocationID.x;
-       *
       * Matrix uniforms with "Transpose" are not eliminated because there's
       * an optimization pass that can turn references to the regular matrix
       * into references to the transpose matrix.  Eliminating the transpose
@@ -90,11 +73,6 @@ optimize_dead_builtin_variables(exec_list *instructions,
       */
      if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0
          || strcmp(var->name, "gl_Vertex") == 0
-          || strcmp(var->name, "gl_WorkGroupID") == 0
-          || strcmp(var->name, "gl_WorkGroupSize") == 0
-          || strcmp(var->name, "gl_LocalInvocationID") == 0
-          || strcmp(var->name, "gl_GlobalInvocationID") == 0
-          || strcmp(var->name, "gl_LocalInvocationIndex") == 0
          || strstr(var->name, "Transpose") != NULL)
         continue;

--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -434,7 +434,7 @@ INTRINSIC(load_interpolated_input, 2, ARR(2, 1), true, 0, 0,
 /* src[] = { buffer_index, offset }. No const_index */
 LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { offset }. const_index[] = { base, component } */
-LOAD(output, 1, 1, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(output, 1, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { vertex, offset }. const_index[] = { base, component } */
 LOAD(per_vertex_output, 2, 1, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { offset }. const_index[] = { base } */
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -2802,7 +2802,8 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,

   case SpvOpMemoryModel:
      assert(w[1] == SpvAddressingModelLogical);
-      assert(w[2] == SpvMemoryModelGLSL450);
+      assert(w[2] == SpvMemoryModelSimple ||
+             w[2] == SpvMemoryModelGLSL450);
      break;

   case SpvOpEntryPoint: {
--- a/src/egl/meson.build
+++ b/src/egl/meson.build
@@ -21,7 +21,9 @@
 c_args_for_egl = []
 link_for_egl = []
 deps_for_egl = []
-incs_for_egl = []
+incs_for_egl = [
+  inc_include, inc_src, inc_loader, inc_gbm, include_directories('main'),
+]
 files_egl = files(
  'main/eglapi.c',
  'main/eglapi.h',
@@ -159,10 +161,7 @@ libegl = shared_library(
    '-D_EGL_BUILT_IN_DRIVER_DRI2',
    '-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_@0@'.format(egl_native_platform.to_upper()),
  ],
-  include_directories : [
-    incs_for_egl, inc_include, inc_src, inc_loader, inc_gbm,
-    include_directories('main'),
-  ],
+  include_directories : incs_for_egl,
  link_with : [link_for_egl, libloader, libxmlconfig, libglapi, libmesa_util],
  link_args : [ld_args_bsymbolic, ld_args_gc_sections],
  dependencies : [deps_for_egl, dep_dl, dep_libdrm, dep_clock, dep_thread],
--- a/src/egl/wayland/wayland-egl/Makefile.am
+++ b/src/egl/wayland/wayland-egl/Makefile.am
@@ -3,7 +3,7 @@ pkgconfig_DATA = wayland-egl.pc

 AM_CFLAGS = $(DEFINES) \
 	    $(VISIBILITY_CFLAGS) \
-	    $(WAYLAND_SERVER_CFLAGS)
+	    $(WAYLAND_CLIENT_CFLAGS)

 lib_LTLIBRARIES = libwayland-egl.la
 noinst_HEADERS = wayland-egl-backend.h
--- a/src/egl/wayland/wayland-egl/meson.build
+++ b/src/egl/wayland/wayland-egl/meson.build
@@ -24,6 +24,7 @@ libwayland_egl = shared_library(
  'wayland-egl.c',
  c_args : [c_vis_args],
  link_args : ld_args_gc_sections,
+  dependencies : dep_wayland_client,
  version : '1.0.0',
  install : true,
 )
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -182,8 +182,11 @@ void si_vid_join_surfaces(struct r600_common_context *rctx,

 			for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j)
 				surfaces[i]->u.legacy.level[j].offset += off;
-		} else
+		} else {
 			surfaces[i]->u.gfx9.surf_offset += off;
+			for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.gfx9.offset); ++j)
+				surfaces[i]->u.gfx9.offset[j] += off;
+		}

 		off += surfaces[i]->surf_size;
 	}
--- a/src/gallium/drivers/radeonsi/driinfo_radeonsi.h
+++ b/src/gallium/drivers/radeonsi/driinfo_radeonsi.h
@@ -6,5 +6,5 @@ DRI_CONF_SECTION_PERFORMANCE
 DRI_CONF_SECTION_END

 DRI_CONF_SECTION_DEBUG
-   DRI_CONF_RADEONSI_CLEAR_DB_META_BEFORE_CLEAR("false")
+   DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR("false")
 DRI_CONF_SECTION_END
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -901,16 +901,16 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 		 * corruption in ARK: Survival Evolved, but that may just be
 		 * a coincidence and the root cause is elsewhere.
 		 *
-		 * The corruption can be fixed by putting the DB metadata flush
-		 * before or after the depth clear. (suprisingly)
+		 * The corruption can be fixed by putting the DB flush before
+		 * or after the depth clear. (surprisingly)
 		 *
 		 * https://bugs.freedesktop.org/show_bug.cgi?id=102955 (apitrace)
 		 *
 		 * This hack decreases back-to-back ClearDepth performance.
 		 */
-		if (sctx->screen->clear_db_meta_before_clear)
-			sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META |
-					 SI_CONTEXT_PS_PARTIAL_FLUSH;
+		if (sctx->screen->clear_db_cache_before_clear) {
+			sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
+		}
 	}

 	si_blitter_begin(ctx, SI_CLEAR);
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1072,8 +1072,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 		driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
 	sscreen->commutative_blend_add =
 		driQueryOptionb(config->options, "radeonsi_commutative_blend_add");
-	sscreen->clear_db_meta_before_clear =
-		driQueryOptionb(config->options, "radeonsi_clear_db_meta_before_clear");
+	sscreen->clear_db_cache_before_clear =
+		driQueryOptionb(config->options, "radeonsi_clear_db_cache_before_clear");
 	sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
 					    sscreen->b.family <= CHIP_POLARIS12) ||
 					   sscreen->b.family == CHIP_VEGA10 ||
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -98,7 +98,7 @@ struct si_screen {
 	bool				has_out_of_order_rast;
 	bool				assume_no_z_fights;
 	bool				commutative_blend_add;
-	bool				clear_db_meta_before_clear;
+	bool				clear_db_cache_before_clear;
 	bool				has_msaa_sample_loc_bug;
 	bool				dpbb_allowed;
 	bool				dfsm_allowed;
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2015,14 +2015,21 @@ static LLVMValueRef fetch_constant(
 		 * code reducing SIMD wave occupancy from 8 to 2 in many cases.
 		 *
 		 * Using s_buffer_load_dword (x1) seems to be the best option right now.
+		 *
+		 * LLVM 5.0 on SI doesn't insert a required s_nop between SALU setting
+		 * a descriptor and s_buffer_load_dword using it, so we can't expand
+		 * the pointer into a full descriptor like below. We have to use
+		 * s_load_dword instead. The only case when LLVM 5.0 would select
+		 * s_buffer_load_dword (that we have to prevent) is when we use use
+		 * a literal offset where we don't need bounds checking.
 		 */
-#if 0 /* keep this codepath disabled */
-		if (!reg->Register.Indirect) {
+		if (ctx->screen->b.chip_class == SI &&
+                    HAVE_LLVM < 0x0600 &&
+                    !reg->Register.Indirect) {
 			addr = LLVMBuildLShr(ctx->ac.builder, addr, LLVMConstInt(ctx->i32, 2, 0), "");
 			LLVMValueRef result = ac_build_load_invariant(&ctx->ac, ptr, addr);
 			return bitcast(bld_base, type, result);
 		}
-#endif

 		/* Do the bounds checking with a descriptor, because
 		 * doing computation and manual bounds checking of 64-bit
--- a/src/gallium/drivers/vc4/vc4_cl.h
+++ b/src/gallium/drivers/vc4/vc4_cl.h
@@ -61,7 +61,7 @@ struct vc4_cl {
        struct vc4_cl_out *next;
        struct vc4_cl_out *reloc_next;
        uint32_t size;
-#ifdef DEBUG
+#ifndef NDEBUG
        uint32_t reloc_count;
 #endif
 };
@@ -163,8 +163,8 @@ static inline void
 cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
 {
        assert(n == 1 || n == 2);
-#ifdef DEBUG
        assert(cl->reloc_count == 0);
+#ifndef NDEBUG
        cl->reloc_count = n;
 #endif

@@ -177,8 +177,8 @@ cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
 static inline struct vc4_cl_out *
 cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
 {
-#ifdef DEBUG
        assert(cl->reloc_count == 0);
+#ifndef NDEBUG
        cl->reloc_count = n;
 #endif
        cl->reloc_next = cl->next;
@@ -196,7 +196,7 @@ cl_reloc(struct vc4_job *job, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
        *(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo);
        cl_advance(&cl->reloc_next, 4);

-#ifdef DEBUG
+#ifndef NDEBUG
        cl->reloc_count--;
 #endif

@@ -211,7 +211,7 @@ cl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl,
        *(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo);
        cl_advance(&cl->reloc_next, 4);

-#ifdef DEBUG
+#ifndef NDEBUG
        cl->reloc_count--;
 #endif

@@ -297,7 +297,7 @@ cl_pack_emit_reloc(struct vc4_cl *cl, const struct vc4_cl_reloc *reloc)
        *(uint32_t *)cl->reloc_next = vc4_gem_hindex(cl->job, reloc->bo);
        cl_advance(&cl->reloc_next, 4);

-#ifdef DEBUG
+#ifndef NDEBUG
        cl->reloc_count--;
 #endif
 }
--- a/src/gallium/state_trackers/clover/llvm/codegen/common.cpp
+++ b/src/gallium/state_trackers/clover/llvm/codegen/common.cpp
@@ -70,7 +70,6 @@ namespace {
   make_kernel_args(const Module &mod, const std::string &kernel_name,
                    const clang::CompilerInstance &c) {
      std::vector<module::argument> args;
-      const auto address_spaces = c.getTarget().getAddressSpaceMap();
      const Function &f = *mod.getFunction(kernel_name);
      ::llvm::DataLayout dl(&mod);
      const auto size_type =
@@ -128,8 +127,8 @@ namespace {
               const unsigned address_space =
                  cast< ::llvm::PointerType>(actual_type)->getAddressSpace();

-               if (address_space == address_spaces[clang::LangAS::opencl_local
-                                                   - compat::lang_as_offset]) {
+               if (address_space == compat::target_address_space(
+                                  c.getTarget(), clang::LangAS::opencl_local)) {
                  args.emplace_back(module::argument::local, arg_api_size,
                                    target_size, target_align,
                                    module::argument::zero_ext);
--- a/src/gallium/state_trackers/clover/llvm/compat.hpp
+++ b/src/gallium/state_trackers/clover/llvm/compat.hpp
@@ -69,11 +69,19 @@ namespace clover {
         typedef ::llvm::TargetLibraryInfo target_library_info;
 #endif

+         template<typename T, typename AS>
+         unsigned target_address_space(const T &target, const AS lang_as) {
+            const auto &map = target.getAddressSpaceMap();
+#if HAVE_LLVM >= 0x0500
+            return map[static_cast<unsigned>(lang_as)];
+#else
+            return map[lang_as - clang::LangAS::Offset];
+#endif
+         }
+
 #if HAVE_LLVM >= 0x0500
-         const auto lang_as_offset = 0;
         const clang::InputKind ik_opencl = clang::InputKind::OpenCL;
 #else
-         const auto lang_as_offset = clang::LangAS::Offset;
         const clang::InputKind ik_opencl = clang::IK_OpenCL;
 #endif

--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -1190,7 +1190,7 @@ brw_JMPI(struct brw_codegen *p, struct brw_reg index,
   struct brw_reg ip = brw_ip_reg();
   brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);

-   brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_2);
+   brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
   brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
   brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
   brw_inst_set_pred_control(devinfo, inst, predicate_control);
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -47,7 +47,8 @@ cat(struct string *dest, const struct string src)
 static bool
 contains(const struct string haystack, const struct string needle)
 {
-   return memmem(haystack.str, haystack.len, needle.str, needle.len) != NULL;
+   return haystack.str && memmem(haystack.str, haystack.len,
+                                 needle.str, needle.len) != NULL;
 }
 #define CONTAINS(haystack, needle) \
   contains(haystack, (struct string){needle, strlen(needle)})
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5013,7 +5013,9 @@ needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i)
 {
   return !(is_periodic(inst->src[i], lbld.dispatch_width()) ||
            (inst->components_read(i) == 1 &&
-             lbld.dispatch_width() <= inst->exec_size));
+             lbld.dispatch_width() <= inst->exec_size)) ||
+          (inst->flags_written() &
+           flag_mask(inst->src[i], type_sz(inst->src[i].type)));
 }

 /**
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -402,7 +402,6 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
      brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);

      int jmp = brw_JMPI(p, brw_imm_ud(0), BRW_PREDICATE_NORMAL) - p->store;
-      brw_inst_set_exec_size(p->devinfo, brw_last_inst, BRW_EXECUTE_1);
      {
         /* Don't send AA data */
         fire_fb_write(inst, offset(payload, 1), implied_header, inst->mlen-1);
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -173,14 +173,13 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
   NIR_PASS_V(nir, nir_propagate_invariant);
   NIR_PASS_V(nir, nir_lower_io_to_temporaries,
              entry_point->impl, true, false);
-   NIR_PASS_V(nir, nir_lower_system_values);

   /* Vulkan uses the separate-shader linking model */
   nir->info.separate_shader = true;

   nir = brw_preprocess_nir(compiler, nir);

-   NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
+   NIR_PASS_V(nir, nir_lower_system_values);

   if (stage == MESA_SHADER_FRAGMENT)
      NIR_PASS_V(nir, anv_nir_lower_input_attachments);
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -315,7 +315,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
      src_format = dst_format = MESA_FORMAT_R_FLOAT32;
   }

-   enum isl_format src_isl_format = brw_isl_format_for_mesa_format(src_format);
+   enum isl_format src_isl_format =
+      brw_blorp_to_isl_format(brw, src_format, false);
   enum isl_aux_usage src_aux_usage =
      intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format);
   /* We do format workarounds for some depth formats so we can't reliably
@@ -328,8 +329,10 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
   intel_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1,
                                src_aux_usage, src_clear_supported);

+   enum isl_format dst_isl_format =
+      brw_blorp_to_isl_format(brw, dst_format, true);
   enum isl_aux_usage dst_aux_usage =
-      intel_miptree_render_aux_usage(brw, dst_mt, encode_srgb, false);
+      intel_miptree_render_aux_usage(brw, dst_mt, dst_isl_format, false);
   const bool dst_clear_supported = dst_aux_usage != ISL_AUX_USAGE_NONE;
   intel_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1,
                                dst_aux_usage, dst_clear_supported);
@@ -351,10 +354,9 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
   struct blorp_batch batch;
   blorp_batch_init(&brw->blorp, &batch, brw, 0);
   blorp_blit(&batch, &src_surf, src_level, src_layer,
-              brw_blorp_to_isl_format(brw, src_format, false), src_isl_swizzle,
+              src_isl_format, src_isl_swizzle,
              &dst_surf, dst_level, dst_layer,
-              brw_blorp_to_isl_format(brw, dst_format, true),
-              ISL_SWIZZLE_IDENTITY,
+              dst_isl_format, ISL_SWIZZLE_IDENTITY,
              src_x0, src_y0, src_x1, src_y1,
              dst_x0, dst_y0, dst_x1, dst_y1,
              filter, mirror_x, mirror_y);
@@ -1157,6 +1159,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
   mesa_format format = irb->Base.Base.Format;
   if (!encode_srgb && _mesa_get_format_color_encoding(format) == GL_SRGB)
      format = _mesa_get_srgb_format_linear(format);
+   enum isl_format isl_format = brw->mesa_to_isl_render_format[format];

   x0 = fb->_Xmin;
   x1 = fb->_Xmax;
@@ -1255,8 +1258,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,

      struct blorp_batch batch;
      blorp_batch_init(&brw->blorp, &batch, brw, 0);
-      blorp_fast_clear(&batch, &surf,
-                       brw->mesa_to_isl_render_format[format],
+      blorp_fast_clear(&batch, &surf, isl_format,
                       level, irb->mt_layer, num_layers,
                       x0, y0, x1, y1);
      blorp_batch_finish(&batch);
@@ -1275,9 +1277,9 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
          irb->mt, irb->mt_level, irb->mt_layer, num_layers);

      enum isl_aux_usage aux_usage =
-         intel_miptree_render_aux_usage(brw, irb->mt, encode_srgb, false);
+         intel_miptree_render_aux_usage(brw, irb->mt, isl_format, false);
      intel_miptree_prepare_render(brw, irb->mt, level, irb->mt_layer,
-                                   num_layers, encode_srgb, false);
+                                   num_layers, isl_format, false);

      struct isl_surf isl_tmp[2];
      struct blorp_surf surf;
@@ -1289,16 +1291,14 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,

      struct blorp_batch batch;
      blorp_batch_init(&brw->blorp, &batch, brw, 0);
-      blorp_clear(&batch, &surf,
-                  brw->mesa_to_isl_render_format[format],
-                  ISL_SWIZZLE_IDENTITY,
+      blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
                  level, irb->mt_layer, num_layers,
                  x0, y0, x1, y1,
                  clear_color, color_write_disable);
      blorp_batch_finish(&batch);

      intel_miptree_finish_render(brw, irb->mt, level, irb->mt_layer,
-                                  num_layers, encode_srgb, false);
+                                  num_layers, isl_format, false);
   }

   return;
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1072,6 +1072,12 @@ intelDestroyContext(__DRIcontext * driContextPriv)
   if (brw->wm.base.scratch_bo)
      brw_bo_unreference(brw->wm.base.scratch_bo);

+   brw_bo_unreference(brw->vs.base.push_const_bo);
+   brw_bo_unreference(brw->tcs.base.push_const_bo);
+   brw_bo_unreference(brw->tes.base.push_const_bo);
+   brw_bo_unreference(brw->gs.base.push_const_bo);
+   brw_bo_unreference(brw->wm.base.push_const_bo);
+
   brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);

   if (ctx->swrast_context) {
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -25,6 +25,7 @@

 #include <sys/errno.h>

+#include "main/blend.h"
 #include "main/context.h"
 #include "main/condrender.h"
 #include "main/samplerobj.h"
@@ -503,9 +504,13 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw)
      if (irb == NULL || irb->mt == NULL)
         continue;

+      mesa_format mesa_format =
+         _mesa_get_render_format(ctx, intel_rb_format(irb));
+      enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
+
      intel_miptree_prepare_render(brw, irb->mt, irb->mt_level,
                                   irb->mt_layer, irb->layer_count,
-                                   ctx->Color.sRGBEnabled,
+                                   isl_format,
                                   ctx->Color.BlendEnabled & (1 << i));
   }
 }
@@ -571,10 +576,14 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
      if (!irb)
         continue;

+      mesa_format mesa_format =
+         _mesa_get_render_format(ctx, intel_rb_format(irb));
+      enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
+
      brw_render_cache_set_add_bo(brw, irb->mt->bo);
      intel_miptree_finish_render(brw, irb->mt, irb->mt_level,
                                  irb->mt_layer, irb->layer_count,
-                                  ctx->Color.sRGBEnabled,
+                                  isl_format,
                                  ctx->Color.BlendEnabled & (1 << i));
   }
 }
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -101,30 +101,6 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
      OUT_BATCH(0);
      ADVANCE_BATCH();
   }
-
-   /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
-    * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
-    *
-    * On Gen6-7.5, we use an execbuf parameter to do this for us.
-    * However, the kernel ignores that when execlists are in use.
-    * Fortunately, we can just write the registers from userspace
-    * on Gen8+, and they're context saved/restored.
-    */
-   if (devinfo->gen >= 9) {
-      BEGIN_BATCH(3);
-      OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-      OUT_BATCH(CS_DEBUG_MODE2);
-      OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
-                CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
-      ADVANCE_BATCH();
-   } else if (devinfo->gen == 8) {
-      BEGIN_BATCH(3);
-      OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
-      OUT_BATCH(INSTPM);
-      OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
-                INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
-      ADVANCE_BATCH();
-   }
 }

 static inline const struct brw_tracked_state *
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -213,11 +213,6 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
   struct intel_mipmap_tree *mt = irb->mt;

-   enum isl_aux_usage aux_usage =
-      brw->draw_aux_buffer_disabled[unit] ? ISL_AUX_USAGE_NONE :
-      intel_miptree_render_aux_usage(brw, mt, ctx->Color.sRGBEnabled,
-                                     ctx->Color.BlendEnabled & (1 << unit));
-
   assert(brw_render_target_supported(brw, rb));

   mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
@@ -225,9 +220,15 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
      _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
                    __func__, _mesa_get_format_name(rb_format));
   }
+   enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format];
+
+   enum isl_aux_usage aux_usage =
+      brw->draw_aux_buffer_disabled[unit] ? ISL_AUX_USAGE_NONE :
+      intel_miptree_render_aux_usage(brw, mt, isl_format,
+                                     ctx->Color.BlendEnabled & (1 << unit));

   struct isl_view view = {
-      .format = brw->mesa_to_isl_render_format[rb_format],
+      .format = isl_format,
      .base_level = irb->mt_level - irb->mt->first_level,
      .levels = 1,
      .base_array_layer = irb->mt_layer,
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2654,7 +2654,8 @@ intel_miptree_prepare_image(struct brw_context *brw,
 enum isl_aux_usage
 intel_miptree_render_aux_usage(struct brw_context *brw,
                               struct intel_mipmap_tree *mt,
-                               bool srgb_enabled, bool blend_enabled)
+                               enum isl_format render_format,
+                               bool blend_enabled)
 {
   switch (mt->aux_usage) {
   case ISL_AUX_USAGE_MCS:
@@ -2665,12 +2666,8 @@ intel_miptree_render_aux_usage(struct brw_context *brw,
      return mt->mcs_buf ? ISL_AUX_USAGE_CCS_D : ISL_AUX_USAGE_NONE;

   case ISL_AUX_USAGE_CCS_E: {
-      mesa_format mesa_format =
-         srgb_enabled ? mt->format :_mesa_get_srgb_format_linear(mt->format);
-      enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
-
      /* If the format supports CCS_E, then we can just use it */
-      if (isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format))
+      if (isl_format_supports_ccs_e(&brw->screen->devinfo, render_format))
         return ISL_AUX_USAGE_CCS_E;

      /* Otherwise, we have to fall back to CCS_D */
@@ -2679,8 +2676,8 @@ intel_miptree_render_aux_usage(struct brw_context *brw,
       * formats.  However, there are issues with blending where it doesn't
       * properly apply the sRGB curve to the clear color when blending.
       */
-      if (blend_enabled && isl_format_is_srgb(isl_format) &&
-          !isl_color_value_is_zero_one(mt->fast_clear_color, isl_format))
+      if (blend_enabled && isl_format_is_srgb(render_format) &&
+          !isl_color_value_is_zero_one(mt->fast_clear_color, render_format))
         return ISL_AUX_USAGE_NONE;

      return ISL_AUX_USAGE_CCS_D;
@@ -2695,10 +2692,11 @@ void
 intel_miptree_prepare_render(struct brw_context *brw,
                             struct intel_mipmap_tree *mt, uint32_t level,
                             uint32_t start_layer, uint32_t layer_count,
-                             bool srgb_enabled, bool blend_enabled)
+                             enum isl_format render_format,
+                             bool blend_enabled)
 {
   enum isl_aux_usage aux_usage =
-      intel_miptree_render_aux_usage(brw, mt, srgb_enabled, blend_enabled);
+      intel_miptree_render_aux_usage(brw, mt, render_format, blend_enabled);
   intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
                                aux_usage, aux_usage != ISL_AUX_USAGE_NONE);
 }
@@ -2707,12 +2705,13 @@ void
 intel_miptree_finish_render(struct brw_context *brw,
                            struct intel_mipmap_tree *mt, uint32_t level,
                            uint32_t start_layer, uint32_t layer_count,
-                            bool srgb_enabled, bool blend_enabled)
+                            enum isl_format render_format,
+                            bool blend_enabled)
 {
   assert(_mesa_is_format_color_format(mt->format));

   enum isl_aux_usage aux_usage =
-      intel_miptree_render_aux_usage(brw, mt, srgb_enabled, blend_enabled);
+      intel_miptree_render_aux_usage(brw, mt, render_format, blend_enabled);
   intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
                              aux_usage);
 }
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -650,17 +650,20 @@ intel_miptree_prepare_image(struct brw_context *brw,
 enum isl_aux_usage
 intel_miptree_render_aux_usage(struct brw_context *brw,
                               struct intel_mipmap_tree *mt,
-                               bool srgb_enabled, bool blend_enabled);
+                               enum isl_format render_format,
+                               bool blend_enabled);
 void
 intel_miptree_prepare_render(struct brw_context *brw,
                             struct intel_mipmap_tree *mt, uint32_t level,
                             uint32_t start_layer, uint32_t layer_count,
-                             bool srgb_enabled, bool blend_enabled);
+                             enum isl_format render_format,
+                             bool blend_enabled);
 void
 intel_miptree_finish_render(struct brw_context *brw,
                            struct intel_mipmap_tree *mt, uint32_t level,
                            uint32_t start_layer, uint32_t layer_count,
-                            bool srgb_enabled, bool blend_enabled);
+                            enum isl_format render_format,
+                            bool blend_enabled);
 void
 intel_miptree_prepare_depth(struct brw_context *brw,
                            struct intel_mipmap_tree *mt, uint32_t level,
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -2523,7 +2523,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
   screen->compiler = brw_compiler_create(screen, devinfo);
   screen->compiler->shader_debug_log = shader_debug_log_mesa;
   screen->compiler->shader_perf_log = shader_perf_log_mesa;
-   screen->compiler->constant_buffer_0_is_relative = devinfo->gen < 8;
+   screen->compiler->constant_buffer_0_is_relative = true;
   screen->compiler->supports_pull_constants = true;

   screen->has_exec_fence =
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -3815,7 +3815,7 @@ bind_uniform_buffers(struct gl_context *ctx, GLuint first, GLsizei count,
      }

      set_buffer_multi_binding(ctx, buffers, i, caller,
-                               binding, offset, size, !range,
+                               binding, offset, size, range,
                               USAGE_UNIFORM_BUFFER);
   }

@@ -3916,7 +3916,7 @@ bind_shader_storage_buffers(struct gl_context *ctx, GLuint first,
      }

      set_buffer_multi_binding(ctx, buffers, i, caller,
-                               binding, offset, size, !range,
+                               binding, offset, size, range,
                               USAGE_SHADER_STORAGE_BUFFER);
   }

@@ -4238,7 +4238,7 @@ bind_atomic_buffers(struct gl_context *ctx,
      }

      set_buffer_multi_binding(ctx, buffers, i, caller,
-                               binding, offset, size, !range,
+                               binding, offset, size, range,
                               USAGE_ATOMIC_COUNTER_BUFFER);
   }

--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -330,6 +330,15 @@ get_fb0_attachment(struct gl_context *ctx, struct gl_framebuffer *fb,
      return &fb->Attachment[BUFFER_BACK_LEFT];
   case GL_BACK_RIGHT:
      return &fb->Attachment[BUFFER_BACK_RIGHT];
+   case GL_BACK:
+      /* The ARB_ES3_1_compatibility spec says:
+       *
+       *    "Since this command can only query a single framebuffer
+       *     attachment, BACK is equivalent to BACK_LEFT."
+       */
+      if (ctx->Extensions.ARB_ES3_1_compatibility)
+         return &fb->Attachment[BUFFER_BACK_LEFT];
+      return NULL;
   case GL_AUX0:
      if (fb->Visual.numAuxBuffers == 1) {
         return &fb->Attachment[BUFFER_AUX0];
--- a/src/util/drirc
+++ b/src/util/drirc
@@ -264,7 +264,7 @@ TODO: document the other workarounds.
    </device>
    <device driver="radeonsi">
        <application name="ARK: Survival Evolved (and unintentionally the UE4 demo template)" executable="ShooterGame">
-            <option name="radeonsi_clear_db_meta_before_clear" value="true" />
+            <option name="radeonsi_clear_db_cache_before_clear" value="true" />
        </application>
    </device>
 </driconf>
--- a/src/util/xmlpool/t_options.h
+++ b/src/util/xmlpool/t_options.h
@@ -444,7 +444,7 @@ DRI_CONF_OPT_BEGIN_B(radeonsi_commutative_blend_add, def) \
        DRI_CONF_DESC(en,gettext("Commutative additive blending optimizations (may cause rendering errors)")) \
 DRI_CONF_OPT_END

-#define DRI_CONF_RADEONSI_CLEAR_DB_META_BEFORE_CLEAR(def) \
-DRI_CONF_OPT_BEGIN_B(radeonsi_clear_db_meta_before_clear, def) \
-        DRI_CONF_DESC(en,"Clear DB metadata cache before fast depth clear") \
+#define DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR(def) \
+DRI_CONF_OPT_BEGIN_B(radeonsi_clear_db_cache_before_clear, def) \
+        DRI_CONF_DESC(en,"Clear DB cache before fast depth clear") \
 DRI_CONF_OPT_END
Author	SHA1	Message	Date
Emil Velikov	3eb187f376	Update version to 17.3.0-rc2	2017-10-30 13:52:46 +00:00
Eric Engestrom	0c20849f9c	wayland-egl: fix wayland cflags Fixes: `80bfff5c4f` "wayland-egl: adds CFLAGS for wayland.egl.h include" Suggested-by: Daniel Stone <daniel@fooishbar.org> Signed-off-by: Eric Engestrom <eric.engestrom@imgtec.com> Acked-by: Emil Velikov <emil.velikov@collabora.com> Acked-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de> (cherry picked from commit `866c8a94d4`)	2017-10-27 21:33:04 +01:00
Eric Engestrom	fb09360ea5	vc4: fix release build Mesa's DEBUG and assert's NDEBUG are not tied to each other, so we need to explicitly compile this code out. Fixes: `3df7892878` "vc4: Drop reloc_count tracking for debug asserts on non-debug builds." Cc: Eric Anholt <eric@anholt.net> Signed-off-by: Eric Engestrom <eric.engestrom@imgtec.com> Reviewed-by: Eric Anholt <eric@anholt.net> (cherry picked from commit `5d44e35a8f`)	2017-10-27 21:33:04 +01:00
Samuel Pitoiset	1664322838	radeonsi: update hack for HTILE corruption in ARK: Survival Evolved It appears that flushing the DB metadata is actually not sufficient since the driver uses the new VS blit shaders. This looks quite strange though, but it seems like we need to flush DB for fixing the corruption. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102955 Fixes: `69ccb9dae7` (radeonsi: use new VS blit shaders (VS inputs in SGPRs) Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `dd79aa4ad3`)	2017-10-27 21:33:04 +01:00
Daniel Stone	c7e625df69	meson: wayland-egl depends on wayland-client Since wayland-egl.h is currently provided by the core Wayland package, depend on wayland-client to make sure we get it in our include path. Signed-off-by: Daniel Stone <daniels@collabora.com> Acked-by: Emil Velikov <emil.velikov@collabora.com> Fixes: `108d257a16` ("meson: build libEGL") Cc: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de> Cc: Dylan Baker <dylan@pnwbakers.com> Cc: Gert Wollny <gw.fossdev@gmail.com> (cherry picked from commit `9f7ed60b3e`)	2017-10-27 21:33:04 +01:00
Jason Ekstrand	5addf041ef	intel/eu: Use EXECUTE_1 for JMPI The PRM says "The execution size must be 1." In `73137997e2`, the execution size was set to 1 when it should have been BRW_EXECUTE_1 (which maps to 0). Later, in `dc2d3a7f5c`, JMPI was used for line AA on gen6 and earlier and we started manually stomping the exeution size to BRW_EXECUTE_1 in the generator. This commit fixes the original bug and makes brw_JMPI just do the right thing. Reviewed-by: Matt Turner <mattst88@gmail.com> Fixes: `73137997e2` (cherry picked from commit `562b8d458c`)	2017-10-27 21:33:04 +01:00
Bas Nieuwenhuizen	f4b6883ebc	radv: Fix truncation issue hexifying the cache uuid for the disk cache. Going from binary to hex has a 2x blowup. Fixes: `1421625292` 'radv: create on-disk shader cache' Reviewed-by: Dave Airlie <airlied@redhat.com> (cherry picked from commit `5bfbab2fdc`)	2017-10-27 21:33:04 +01:00
Eric Anholt	70ee0a4525	i965: Fix memmem compiler warnings. gcc is throwing this warning in my meson build: ../src/intel/compiler/brw_eu_validate.c:50:11: warning argument 1 null where non-null expected [-Wnonnull] return memmem(haystack.str, haystack.len, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ needle.str, needle.len) != NULL; ~~~~~~~~~~~~~~~~~~~~~~~ The first check for CONTAINS has a NULL error_msg.str and 0 len. The glibc implementation will exit without looking at any haystack bytes if haystack.len < needle.len, so this was safe, but silence the warning anyway by guarding against implementation variablility. Fixes: `122ef3799d` ("i965: Only insert error message if not already present") Reviewed-by: Matt Turner <mattst88@gmail.com> (cherry picked from commit `e91c3540fc`)	2017-10-27 21:33:04 +01:00
Timothy Arceri	17d988bfaa	radv: move nir print after linking is done We now have linking optimisations so we want to delay dumping the nir until after these are complete. Fixes: `06f05040eb` (radv: Link shaders) Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `f0a2bbd1a4`) Squashed with commit: radv: print NIR before LLVM IR and disassembly It's still printed after linking, but it makes more sense to have SPIRV->NIR->LLVM IR->ASM. Fixes: `f0a2bbd1a4` (radv: move nir print after linking is done) Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `9711979df0`)	2017-10-27 21:32:34 +01:00
Dave Airlie	03cf1953ad	mesa/bufferobj: don't double negate the range This fixes a regression I introduced refactoring this code, I managed to invert range twice, I moved the inversion into the common code, but forgot to stop doing it in the callee. Fixes: GL45-CTS.multi_bind.dispatch_bind_buffers_base Fixes: `35ac13ed3` (mesa/bufferobj: consolidate some codepaths between ubo/ssbo/atomics.) Reported-by: Nicolai Hähnle <nicolai.haehnle@amd.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com> (cherry picked from commit `11d688d9f0`)	2017-10-27 18:56:44 +01:00
Timothy Arceri	4fb6b83968	radv: clone meta shaders before linking The IR is reused in different pipeline combinations so we need to clone it to avoid link time optimistaions messing up the original copy. Fixes: `06f05040eb` (radv: Link shaders) Reviewed-by: Dave Airlie <airlied@redhat.com> (cherry picked from commit `013313cf89`)	2017-10-27 18:56:41 +01:00
Dylan Baker	26b44eadac	meson: fix egl build for meson version < 0.43 Meson 0.43 added the ability to pass nested lists to include_directories, so the code that we have works for 0.43, but not for 0.42. This patch changes the include_directories list to be flat so it works with 0.42 fixes: `108d257a16` ("meson: build libEGL") Tested-by: Vinson Lee <vlee@freedesktop.org> Reviewed-by: Rhys Kidd <rhyskidd@gmail.com> Reviewed-by: Eric Anholt <eric@anholt.net> Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com> (cherry picked from commit `77f7ef0287`)	2017-10-27 18:56:37 +01:00
Kenneth Graunke	e22cf6e9b4	mesa: Accept GL_BACK in get_fb0_attachment with ARB_ES3_1_compatibility. According to the ARB_ES3_1_compatibility specification, glGetFramebufferAttachmentParameteriv is supposed to accept BACK, and it behaves exactly like BACK_LEFT. Fixes a GL error in GFXBench 5 Aztec Ruins. Cc: "17.3 17.2" <mesa-stable@lists.freedesktop.org> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> (cherry picked from commit `4f538c3f99`)	2017-10-27 18:56:11 +01:00
Tapani Pälli	7df1b901b9	i965: unref push_const_bo in intelDestroyContext Valgrind shows that leak is caused by gen6_upload_push_constant, add unref push_const_bo per stage to destructor to fix this (like done for scratch_bo). ==10952== 144 bytes in 1 blocks are definitely lost in loss record 44 of 66 ==10952== at 0x4C30A1E: calloc (vg_replace_malloc.c:711) ==10952== by 0x8C02847: bo_alloc_internal.constprop.10 (brw_bufmgr.c:344) ==10952== by 0x8C425C4: intel_upload_space (intel_upload.c:101) ==10952== by 0x8C22ED0: gen6_upload_push_constants (gen6_constant_state.c:154) v2: remove if conditions, brw_bo_unreference handles NULL (Ken, Emil) Fixes: `24891d7c05` ("i965: Store per-stage push constant BO pointers.") Signed-off-by: Tapani Pälli <tapani.palli@intel.com> Reviewed-by: Emil Velikov <emil.velikov@collabora.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit `0b131ca427`)	2017-10-27 18:55:56 +01:00
Jason Ekstrand	cbb8aec81c	i965/miptree: Take an isl_format in render_aux_usage Not all rendering matches the miptree format. We allow rendering to texture views so there are cases where it may not match. In those cases, our current scheme of just passing the value of ctx->sRGBEnabled isn't viable. Instead, just do what we do for texturing and pass the view format in directly. Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit `39c5c12f8f`)	2017-10-27 18:55:53 +01:00
Jason Ekstrand	ff8c152640	i965/blorp: Use more temporary isl_format variables Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit `78e50185d6`)	2017-10-27 18:55:50 +01:00
Jason Ekstrand	0fef0c7deb	i965/blorp: Use blorp_to_isl_format for src_isl_format in blit_miptrees Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit `94389943b6`)	2017-10-27 18:55:48 +01:00
Jason Ekstrand	66603bff6f	spirv: Claim support for the simple memory model It's rather surprising that we've never actually hit this before. Aparently, Ian's SPIR-V generator currently claims the Simple when you don't do anything complex. We really shouldn't assert-fail on it. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit `8ab9820d34`)	2017-10-27 18:55:46 +01:00
Marek Olšák	b0082632eb	radeonsi: add a workaround for weird s_buffer_load_dword behavior on SI See my LLVM patch which fixes the root cause. Users have to apply this patch and then they have 2 choices: - Downgrade to LLVM 5.0 - Update to LLVM git after my LLVM patch is pushed. It won't be possible to use current and earlier development version of LLVM 6.0. Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> Cc: 17.3 <mesa-stable@lists.freedesktop.org> (cherry picked from commit `3f8e3c2bd8`)	2017-10-27 18:55:43 +01:00
Leo Liu	3da6dd8003	radeon/video: add gfx9 offsets when rejoin the video surface For CPU access. Signed-off-by: Leo Liu <leo.liu@amd.com> Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Christian König <christian.koenig@amd.com> (cherry picked from commit `ea3dc75d72`)	2017-10-27 18:55:41 +01:00
Jason Ekstrand	2e33d68046	anv/pipeline: Call nir_lower_system_valaues after brw_preprocess_nir We currently have a bug where nir_lower_system_values gets called before nir_lower_var_copies so it will miss any system value uses which come from a copy_var intrinsic. Moving it to after brw_preprocess_nir fixes this problem. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit `279f8fb69c`)	2017-10-27 18:55:38 +01:00
Jason Ekstrand	3b699fdd19	anv/pipeline: Drop nir_lower_clip_cull_distance_arrays We already handle it in brw_preprocess_nir Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (cherry picked from commit `afa0ddb81e`)	2017-10-27 18:55:24 +01:00
Jason Ekstrand	a2123968fa	intel/fs: Handle flag read/write aliasing in needs_src_copy In order to implement the ballot intrinsic, we do a MOV from flag register to some GRF. If that GRF is used in a SEL, cmod propagation helpfully changes it into a MOV from the flag register with a cmod. This is perfectly valid but when lower_simd_width comes along, it simply splits into two instructions which both have conditional modifiers. This is a problem since we're reading the flag register. This commit makes us check whether or not flags_written() overlaps with the flag values that we are reading via the instruction source and, if we have any interference, will force us to emit a copy of the source. Reviewed-by: Matt Turner <mattst88@gmail.com> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit `fa6e74e33e`)	2017-10-27 18:50:27 +01:00
Jan Vesely	1ce3fbeb91	clover: Fix compilation after clang r315871 v2: use a more generic compat function v3: rename and formatting cleanup Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103388 Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> Reviewed-by: Francisco Jerez <currojerez@riseup.net> CC: <mesa-stable@lists.freedesktop.org> (cherry picked from commit `a6d38f476b`)	2017-10-27 18:50:24 +01:00
Jason Ekstrand	8f2bc19856	nir/intrinsics: Set the correct num_indices for load_output Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com> (cherry picked from commit `c1b84256cc`)	2017-10-27 18:50:21 +01:00
Matthew Nicholls	b6f0c16a89	ac/nir: generate correct instruction for atomic min/max on unsigned images v2: fix silly typo Cc: "17.2 17.3" <mesa-stable@lists.freedesktop.org> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `27a0b24bf2`)	2017-10-27 18:50:19 +01:00
Dave Airlie	5c8eb88553	radv: use device name in cache creation like radeonsi. Not sure how useful this is, but it makes it more consistent. Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Cc: "17.3" <mesa-stable@lists.freedesktop.org> Signed-off-by: Dave Airlie <airlied@redhat.com> (cherry picked from commit `d8cefaa197`)	2017-10-27 18:50:12 +01:00
Alex Smith	afdb9da492	radv: Update code pointer correctly if a variant is already created This was the actual cause of GPU hangs fixed by `0fdd531457` ("radv: Fix pipeline cache locking issues"), since multiple threads would end up trying to create the variants for a single entry. Now that we're locking around the whole of this function, this isn't really necessary (we either create all or none of the variants), but fix this anyway in case things change later. Signed-off-by: Alex Smith <asmith@feralinteractive.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> CC: 17.3 <mesa-stable@lists.freedesktop.org> (cherry picked from commit `fee9d05e21`)	2017-10-27 18:50:09 +01:00
Kenneth Graunke	b8f10fdf34	i965: Revert absolute mode for constant buffer pointers. The kernel doesn't initialize the value of the INSTPM or CS_DEBUG_MODE2 registers at context initialization time. Instead, they're inherited from whatever happened to be running on the GPU prior to first run of a new context. So, when we started setting these, other contexts in the system started inheriting our values. Since this controls whether 3DSTATE_CONSTANT_* takes a pointer or an offset, getting the wrong setting is fatal for almost any process which isn't expecting this. Unfortunately, VA-API and Beignet don't initialize this (nor does older Mesa), so they will die horribly if we start doing this. UXA and SNA don't use any push constants, so they are unaffected. Until we have some kind of solution to this problem, I'm going to revert this patch and abandon using the feature for now. It will lead to fewer pushed UBO ranges on Broadwell+, which may lead to lower performance, though I don't have any data on the impact. Cc: "17.3 17.2" <mesa-stable@lists.freedesktop.org> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102774 (cherry picked from commit `013d331220`)	2017-10-27 18:50:07 +01:00
Nicolai Hähnle	ea132f9265	amd/common/gfx9: workaround DCC corruption more conservatively Fixes KHR-GL45.texture_swizzle.smoke and others on Vega. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102809 Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `f9ccfda9bc`)	2017-10-27 18:50:04 +01:00
Ilia Mirkin	08b41e70dd	glsl: fix derived cs variables There are two issues with the current implementation. First, it relies on the layout(local_size_*) happening in the same shader as the main function, and secondly it doesn't work for variable group sizes. In both cases, the simplest fix is to move the setup of these derived values to a later time, similar to how the gl_VertexID workarounds are done. There already exist system values defined for both of the derived values, so we use them unconditionally, and lower them after linking is performed. While we're at it, we move to using gl_LocalGroupSizeARB instead of gl_WorkGroupSize for variable group sizes. Also the dead code elimination avoidance can be removed, since there can be situations where gl_LocalGroupSizeARB is needed but has not been inserted for the shader with main function. As a result, the lowering code has to insert its own copies of the system values if needed. Reported-by: Stephane Chevigny <stephane.chevigny@polymtl.ca> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103393 Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `4d24a7cb97`)	2017-10-27 18:50:02 +01:00
Emil Velikov	ae720e2873	Update version to 17.3.0-rc1 Signed-off-by: Emil Velikov <emil.velikov@collabora.com>	2017-10-23 13:30:56 +01:00