winsys/radeon: improve debuging printing

Make sure one can identify virtual address failure from allocation failure. Signed-off-by: Jerome Glisse <jglisse@redhat.com> (cherry picked from commit 9a47684564)
xorg: fix exa finish access
2013-02-08 20:33:22 -05:00 · 2013-02-08 19:01:51 -05:00 · 2013-02-08 11:17:33 -08:00 · 2013-02-08 11:17:28 -08:00 · 2013-02-08 11:17:23 -08:00 · 2013-02-08 11:17:17 -08:00
50 changed files with 968 additions and 396 deletions
--- a/configure.ac
+++ b/configure.ac
@@ -30,7 +30,7 @@ AC_SUBST([OSMESA_VERSION])

 dnl Versions for external dependencies
 LIBDRM_REQUIRED=2.4.24
-LIBDRM_RADEON_REQUIRED=2.4.40
+LIBDRM_RADEON_REQUIRED=2.4.42
 LIBDRM_INTEL_REQUIRED=2.4.38
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
 LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
@@ -57,10 +57,10 @@ LT_PREREQ([2.2])
 LT_INIT([disable-static])

 AX_PROG_BISON([],
-              AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.c"]
+              AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.c"],
                    [AC_MSG_ERROR([bison not found - unable to compile glcpp-parse.y])]))
 AX_PROG_FLEX([],
-             AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-lex.c"]
+             AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-lex.c"],
                   [AC_MSG_ERROR([flex not found - unable to compile glcpp-lex.l])]))

 AC_PATH_PROG([PERL], [perl])
--- a/docs/relnotes-9.1.html
+++ b/docs/relnotes-9.1.html
@@ -44,9 +44,18 @@ Note: some of the new features are only available with certain drivers.
 </p>

 <ul>
+<li>GL_ANGLE_texture_compression_dxt3</li>
+<li>GL_ANGLE_texture_compression_dxt5</li>
+<li>GL_ARB_ES3_compatibility</li>
+<li>GL_ARB_internalformat_query</li>
 <li>GL_ARB_map_buffer_alignment</li>
-<li>GL_ARB_texture_cube_map_array</li>
+<li>GL_ARB_shading_language_packing</li>
 <li>GL_ARB_texture_buffer_object_rgb32</li>
+<li>GL_ARB_texture_cube_map_array</li>
+<li>GL_EXT_color_buffer_float</li>
+<li>GL_OES_depth_texture_cube_map</li>
+<li>OpenGL 3.1 core profile support on Radeon HD2000 up to HD6000 series </li>
+<li>Multisample anti-aliasing support on Radeon X1000 series</li>
 </ul>


--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -46,3 +46,17 @@ CHIPSET(0x6839, VERDE_6839, VERDE)
 CHIPSET(0x683B, VERDE_683B, VERDE)
 CHIPSET(0x683D, VERDE_683D, VERDE)
 CHIPSET(0x683F, VERDE_683F, VERDE)
+
+CHIPSET(0x6600, OLAND_6600, OLAND)
+CHIPSET(0x6601, OLAND_6601, OLAND)
+CHIPSET(0x6602, OLAND_6602, OLAND)
+CHIPSET(0x6603, OLAND_6603, OLAND)
+CHIPSET(0x6606, OLAND_6606, OLAND)
+CHIPSET(0x6607, OLAND_6607, OLAND)
+CHIPSET(0x6610, OLAND_6610, OLAND)
+CHIPSET(0x6611, OLAND_6611, OLAND)
+CHIPSET(0x6613, OLAND_6613, OLAND)
+CHIPSET(0x6620, OLAND_6620, OLAND)
+CHIPSET(0x6621, OLAND_6621, OLAND)
+CHIPSET(0x6623, OLAND_6623, OLAND)
+CHIPSET(0x6631, OLAND_6631, OLAND)
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -530,7 +530,7 @@ def generate(env):
    env.PkgCheckModules('XF86VIDMODE', ['xxf86vm'])
    env.PkgCheckModules('DRM', ['libdrm >= 2.4.24'])
    env.PkgCheckModules('DRM_INTEL', ['libdrm_intel >= 2.4.30'])
-    env.PkgCheckModules('DRM_RADEON', ['libdrm_radeon >= 2.4.40'])
+    env.PkgCheckModules('DRM_RADEON', ['libdrm_radeon >= 2.4.42'])
    env.PkgCheckModules('XORG', ['xorg-server >= 1.6.0'])
    env.PkgCheckModules('KMS', ['libkms >= 2.4.24'])
    env.PkgCheckModules('UDEV', ['libudev > 150'])
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -487,6 +487,7 @@ static void r300_set_blend_color(struct pipe_context* pipe,
        (struct r300_blend_color_state*)r300->blend_color_state.state;
    struct pipe_blend_color c;
    enum pipe_format format = fb->nr_cbufs ? fb->cbufs[0]->format : 0;
+    float tmp;
    CB_LOCALS;

    state->state = *color; /* Save it, so that we can reuse it in set_fb_state */
@@ -513,6 +514,13 @@ static void r300_set_blend_color(struct pipe_context* pipe,
            c.color[2] = c.color[3];
            break;

+        case PIPE_FORMAT_R8G8B8A8_UNORM:
+        case PIPE_FORMAT_R8G8B8X8_UNORM:
+            tmp = c.color[0];
+            c.color[0] = c.color[2];
+            c.color[2] = tmp;
+            break;
+
        default:;
        }
    }
@@ -919,6 +927,9 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
    /* Need to reset clamping or colormask. */
    r300_mark_atom_dirty(r300, &r300->blend_state);

+    /* Re-swizzle the blend color. */
+    r300_set_blend_color(pipe, &((struct r300_blend_color_state*)r300->blend_color_state.state)->state);
+
    /* If zsbuf is set from NULL to non-NULL or vice versa.. */
    if (!!old_state->zsbuf != !!state->zsbuf) {
        r300_mark_atom_dirty(r300, &r300->dsa_state);
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -978,9 +978,9 @@ r300_texture_create_object(struct r300_screen *rscreen,
    tex->tex.microtile = microtile;
    tex->tex.macrotile[0] = macrotile;
    tex->tex.stride_in_bytes_override = stride_in_bytes_override;
-    tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ?
-                  RADEON_DOMAIN_GTT :
-                  RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT;
+    tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? RADEON_DOMAIN_GTT :
+                  base->nr_samples > 1 ? RADEON_DOMAIN_VRAM :
+                                         RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT;
    tex->buf = buffer;

    r300_texture_desc_init(rscreen, tex, base);
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -243,9 +243,9 @@ void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_en
 void evergreen_dma_copy(struct r600_context *rctx,
 		struct pipe_resource *dst,
 		struct pipe_resource *src,
-		unsigned long dst_offset,
-		unsigned long src_offset,
-		unsigned long size)
+		uint64_t dst_offset,
+		uint64_t src_offset,
+		uint64_t size)
 {
 	struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
 	unsigned i, ncopy, csize, sub_cmd, shift;
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1668,6 +1668,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 		surf = (struct r600_surface*)state->cbufs[i];
 		rtex = (struct r600_texture*)surf->base.texture;

+		r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
+
 		if (!surf->color_initialized) {
 			evergreen_init_color_surface(rctx, surf);
 		}
@@ -1699,6 +1701,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 	if (state->zsbuf) {
 		surf = (struct r600_surface*)state->zsbuf;

+		r600_context_add_resource_size(ctx, state->zsbuf->texture);
+
 		if (!surf->depth_initialized) {
 			evergreen_init_depth_surface(rctx, surf);
 		}
@@ -3481,7 +3485,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
 	unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
 	unsigned ncopy, height, cheight, detile, i, x, y, z, src_mode, dst_mode;
 	unsigned sub_cmd, bank_h, bank_w, mt_aspect, nbanks, tile_split;
-	unsigned long base, addr;
+	uint64_t base, addr;

 	/* make sure that the dma ring is only one active */
 	rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC);
@@ -3502,7 +3506,8 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
 	if (dst_mode == RADEON_SURF_MODE_LINEAR) {
 		/* T2L */
 		array_mode = evergreen_array_mode(src_mode);
-		slice_tile_max = (((pitch * rsrc->surface.level[src_level].npix_y) >> 6) / bpp) - 1;
+		slice_tile_max = (rsrc->surface.level[src_level].nblk_x * rsrc->surface.level[src_level].nblk_y) >> 6;
+		slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0;
 		/* linear height must be the same as the slice tile max height, it's ok even
 		 * if the linear destination/source have smaller heigh as the size of the
 		 * dma packet will be using the copy_height which is always smaller or equal
@@ -3526,7 +3531,8 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
 	} else {
 		/* L2T */
 		array_mode = evergreen_array_mode(dst_mode);
-		slice_tile_max = (((pitch * rdst->surface.level[dst_level].npix_y) >> 6) / bpp) - 1;
+		slice_tile_max = (rdst->surface.level[dst_level].nblk_x * rdst->surface.level[dst_level].nblk_y) >> 6;
+		slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0;
 		/* linear height must be the same as the slice tile max height, it's ok even
 		 * if the linear destination/source have smaller heigh as the size of the
 		 * dma packet will be using the copy_height which is always smaller or equal
@@ -3625,7 +3631,7 @@ boolean evergreen_dma_blit(struct pipe_context *ctx,
 	}

 	if (src_mode == dst_mode) {
-		unsigned long dst_offset, src_offset;
+		uint64_t dst_offset, src_offset;
 		/* simple dma blit would do NOTE code here assume :
 		 *   src_box.x/y == 0
 		 *   dst_x/y == 0
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -174,9 +174,9 @@ void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw);
 void r600_dma_copy(struct r600_context *rctx,
 		struct pipe_resource *dst,
 		struct pipe_resource *src,
-		unsigned long dst_offset,
-		unsigned long src_offset,
-		unsigned long size);
+		uint64_t dst_offset,
+		uint64_t src_offset,
+		uint64_t size);
 boolean r600_dma_blit(struct pipe_context *ctx,
 			struct pipe_resource *dst,
 			unsigned dst_level,
@@ -187,9 +187,9 @@ boolean r600_dma_blit(struct pipe_context *ctx,
 void evergreen_dma_copy(struct r600_context *rctx,
 		struct pipe_resource *dst,
 		struct pipe_resource *src,
-		unsigned long dst_offset,
-		unsigned long src_offset,
-		unsigned long size);
+		uint64_t dst_offset,
+		uint64_t src_offset,
+		uint64_t size);
 boolean evergreen_dma_blit(struct pipe_context *ctx,
 			struct pipe_resource *dst,
 			unsigned dst_level,
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -359,6 +359,16 @@ out_err:
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 			boolean count_draw_in)
 {
+	if (!ctx->ws->cs_memory_below_limit(ctx->rings.gfx.cs, ctx->vram, ctx->gtt)) {
+		ctx->gtt = 0;
+		ctx->vram = 0;
+		ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
+		return;
+	}
+	/* all will be accounted once relocation are emited */
+	ctx->gtt = 0;
+	ctx->vram = 0;
+
 	/* The number of dwords we already used in the CS so far. */
 	num_dw += ctx->rings.gfx.cs->cdw;

@@ -784,6 +794,8 @@ void r600_begin_new_cs(struct r600_context *ctx)

 	ctx->pm4_dirty_cdwords = 0;
 	ctx->flags = 0;
+	ctx->gtt = 0;
+	ctx->vram = 0;

 	/* Begin a new CS. */
 	r600_emit_command_buffer(ctx->rings.gfx.cs, &ctx->start_cs_cmd);
@@ -1160,9 +1172,9 @@ void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw)
 void r600_dma_copy(struct r600_context *rctx,
 		struct pipe_resource *dst,
 		struct pipe_resource *src,
-		unsigned long dst_offset,
-		unsigned long src_offset,
-		unsigned long size)
+		uint64_t dst_offset,
+		uint64_t src_offset,
+		uint64_t size)
 {
 	struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
 	unsigned i, ncopy, csize, shift;
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -447,6 +447,10 @@ struct r600_context {
 	unsigned			backend_mask;
 	unsigned			max_db; /* for OQ */

+	/* current unaccounted memory usage */
+	uint64_t			vram;
+	uint64_t			gtt;
+
 	/* Miscellaneous state objects. */
 	void				*custom_dsa_flush;
 	void				*custom_blend_resolve;
@@ -869,9 +873,11 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx,
 	 * look serialized from driver pov
 	 */
 	if (!ring->flushing) {
-		if (ring == &ctx->rings.gfx && ctx->rings.dma.cs) {
-			/* flush dma ring */
-			ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
+		if (ring == &ctx->rings.gfx) {
+			if (ctx->rings.dma.cs) {
+				/* flush dma ring */
+				ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
+			}
 		} else {
 			/* flush gfx ring */
 			ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
@@ -996,4 +1002,28 @@ static INLINE unsigned u_max_layer(struct pipe_resource *r, unsigned level)
 	}
 }

+static INLINE void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
+{
+	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct r600_resource *rr = (struct r600_resource *)r;
+
+	if (r == NULL) {
+		return;
+	}
+
+	/*
+	 * The idea is to compute a gross estimate of memory requirement of
+	 * each draw call. After each draw call, memory will be precisely
+	 * accounted. So the uncertainty is only on the current draw call.
+	 * In practice this gave very good estimate (+/- 10% of the target
+	 * memory limit).
+	 */
+	if (rr->domains & RADEON_DOMAIN_GTT) {
+		rctx->gtt += rr->buf->size;
+	}
+	if (rr->domains & RADEON_DOMAIN_VRAM) {
+		rctx->vram += rr->buf->size;
+	}
+}
+
 #endif
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1544,6 +1544,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,

 		surf = (struct r600_surface*)state->cbufs[i];
 		rtex = (struct r600_texture*)surf->base.texture;
+		r600_context_add_resource_size(ctx, state->cbufs[i]->texture);

 		if (!surf->color_initialized || force_cmask_fmask) {
 			r600_init_color_surface(rctx, surf, force_cmask_fmask);
@@ -1576,6 +1577,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
 	if (state->zsbuf) {
 		surf = (struct r600_surface*)state->zsbuf;

+		r600_context_add_resource_size(ctx, state->zsbuf->texture);
+
 		if (!surf->depth_initialized) {
 			r600_init_depth_surface(rctx, surf);
 		}
@@ -2979,7 +2982,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
 	struct r600_texture *rdst = (struct r600_texture*)dst;
 	unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
 	unsigned ncopy, height, cheight, detile, i, x, y, z, src_mode, dst_mode;
-	unsigned long base, addr;
+	uint64_t base, addr;

 	/* make sure that the dma ring is only one active */
 	rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC);
@@ -2998,7 +3001,8 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
 	if (dst_mode == RADEON_SURF_MODE_LINEAR) {
 		/* T2L */
 		array_mode = r600_array_mode(src_mode);
-		slice_tile_max = (((pitch * rsrc->surface.level[src_level].npix_y) >> 6) / bpp) - 1;
+		slice_tile_max = (rsrc->surface.level[src_level].nblk_x * rsrc->surface.level[src_level].nblk_y) >> 6;
+		slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0;
 		/* linear height must be the same as the slice tile max height, it's ok even
 		 * if the linear destination/source have smaller heigh as the size of the
 		 * dma packet will be using the copy_height which is always smaller or equal
@@ -3016,7 +3020,8 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
 	} else {
 		/* L2T */
 		array_mode = r600_array_mode(dst_mode);
-		slice_tile_max = (((pitch * rdst->surface.level[dst_level].npix_y) >> 6) / bpp) - 1;
+		slice_tile_max = (rdst->surface.level[dst_level].nblk_x * rdst->surface.level[dst_level].nblk_y) >> 6;
+		slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0;
 		/* linear height must be the same as the slice tile max height, it's ok even
 		 * if the linear destination/source have smaller heigh as the size of the
 		 * dma packet will be using the copy_height which is always smaller or equal
@@ -3109,7 +3114,7 @@ boolean r600_dma_blit(struct pipe_context *ctx,
 	}

 	if (src_mode == dst_mode) {
-		unsigned long dst_offset, src_offset, size;
+		uint64_t dst_offset, src_offset, size;

 		/* simple dma blit would do NOTE code here assume :
 		 *   src_box.x/y == 0
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -479,7 +479,8 @@ static void r600_set_index_buffer(struct pipe_context *ctx,

 	if (ib) {
 		pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
-	        memcpy(&rctx->index_buffer, ib, sizeof(*ib));
+		memcpy(&rctx->index_buffer, ib, sizeof(*ib));
+		r600_context_add_resource_size(ctx, ib->buffer);
 	} else {
 		pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
 	}
@@ -516,6 +517,7 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
 					vb[i].buffer_offset = input[i].buffer_offset;
 					pipe_resource_reference(&vb[i].buffer, input[i].buffer);
 					new_buffer_mask |= 1 << i;
+					r600_context_add_resource_size(ctx, input[i].buffer);
 				} else {
 					pipe_resource_reference(&vb[i].buffer, NULL);
 					disable_mask |= 1 << i;
@@ -613,6 +615,7 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,

 			pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]);
 			new_mask |= 1 << i;
+			r600_context_add_resource_size(pipe, views[i]->texture);
 		} else {
 			pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL);
 			disable_mask |= 1 << i;
@@ -806,6 +809,8 @@ static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
 	rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
 	r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);

+	r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo);
+
 	if (rctx->chip_class <= R700) {
 		bool multiwrite = rctx->ps_shader->current->shader.fs_write_all;

@@ -835,6 +840,8 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
 	if (state) {
 		r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate);

+		r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->vs_shader->current->bo);
+
 		/* Update clip misc state. */
 		if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
 		    rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
@@ -938,10 +945,13 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint
 		} else {
 			u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
 		}
+		/* account it in gtt */
+		rctx->gtt += input->buffer_size;
 	} else {
 		/* Setup the hw buffer. */
 		cb->buffer_offset = input->buffer_offset;
 		pipe_resource_reference(&cb->buffer, input->buffer);
+		r600_context_add_resource_size(ctx, input->buffer);
 	}

 	state->enabled_mask |= 1 << index;
@@ -1004,6 +1014,7 @@ static void r600_set_so_targets(struct pipe_context *ctx,
 	/* Set the new targets. */
 	for (i = 0; i < num_targets; i++) {
 		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
+		r600_context_add_resource_size(ctx, targets[i]->buffer);
 	}
 	for (; i < rctx->num_so_targets; i++) {
 		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -270,6 +270,7 @@ static void r600_texture_destroy(struct pipe_screen *screen,
 	if (rtex->flushed_depth_texture)
 		pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);

+        pipe_resource_reference((struct pipe_resource**)&rtex->htile, NULL);
 	pb_reference(&resource->buf, NULL);
 	FREE(rtex);
 }
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -155,7 +155,7 @@ static inline LLVMValueRef bitcast(

 void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_base,
                                          struct lp_build_emit_data * emit_data,
-                                          unsigned coord_arg);
+                                          LLVMValueRef *coords_arg);

 void radeon_llvm_context_init(struct radeon_llvm_context * ctx);

--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -531,7 +531,7 @@ static void kil_emit(
 void radeon_llvm_emit_prepare_cube_coords(
 		struct lp_build_tgsi_context * bld_base,
 		struct lp_build_emit_data * emit_data,
-		unsigned coord_arg)
+		LLVMValueRef *coords_arg)
 {

 	unsigned target = emit_data->inst->Texture.Texture;
@@ -542,11 +542,13 @@ void radeon_llvm_emit_prepare_cube_coords(
 	LLVMValueRef coords[4];
 	LLVMValueRef mad_args[3];
 	LLVMValueRef idx;
+	struct LLVMOpaqueValue *cube_vec;
+	LLVMValueRef v;
 	unsigned i;

-	LLVMValueRef v = build_intrinsic(builder, "llvm.AMDGPU.cube",
-			LLVMVectorType(type, 4),
-			&emit_data->args[coord_arg], 1, LLVMReadNoneAttribute);
+	cube_vec = lp_build_gather_values(bld_base->base.gallivm, coords_arg, 4);
+	v = build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
+                            &cube_vec, 1, LLVMReadNoneAttribute);

 	for (i = 0; i < 4; ++i) {
 		idx = lp_build_const_int32(gallivm, i);
@@ -579,18 +581,14 @@ void radeon_llvm_emit_prepare_cube_coords(
 	if (target != TGSI_TEXTURE_CUBE ||
 		opcode != TGSI_OPCODE_TEX) {

-		/* load source coord.w component - array_index for cube arrays or
-		 * compare value for SHADOWCUBE */
-		idx = lp_build_const_int32(gallivm, 3);
-		coords[3] = LLVMBuildExtractElement(builder,
-				emit_data->args[coord_arg], idx, "");
-
 		/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
 		if (target == TGSI_TEXTURE_CUBE_ARRAY ||
 			target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {

+			/* coords_arg.w component - array_index for cube arrays or
+			 * compare value for SHADOWCUBE */
 			coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
-					coords[3], lp_build_const_float(gallivm, 8.0), coords[2]);
+					coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
 		}

 		/* for instructions that need additional src (compare/lod/bias),
@@ -598,12 +596,11 @@ void radeon_llvm_emit_prepare_cube_coords(
 		if (opcode == TGSI_OPCODE_TEX2 ||
 			opcode == TGSI_OPCODE_TXB2 ||
 			opcode == TGSI_OPCODE_TXL2) {
-			coords[3] = emit_data->args[coord_arg + 1];
+			coords[3] = coords_arg[4];
 		}
 	}

-	emit_data->args[coord_arg] =
-			lp_build_gather_values(bld_base->base.gallivm, coords, 4);
+	memcpy(coords_arg, coords, sizeof(coords));
 }

 static void txd_fetch_args(
@@ -645,9 +642,6 @@ static void txp_fetch_args(
 					TGSI_OPCODE_DIV, arg, src_w);
 	}
 	coords[3] = bld_base->base.one;
-	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
-						coords, 4);
-	emit_data->arg_count = 1;

 	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
 	     inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
@@ -655,8 +649,12 @@ static void txp_fetch_args(
 	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
-		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, 0);
+		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
 	}
+
+	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+						coords, 4);
+	emit_data->arg_count = 1;
 }

 static void tex_fetch_args(
@@ -673,17 +671,12 @@ static void tex_fetch_args(

 	const struct tgsi_full_instruction * inst = emit_data->inst;

-	LLVMValueRef coords[4];
+	LLVMValueRef coords[5];
 	unsigned chan;
 	for (chan = 0; chan < 4; chan++) {
 		coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
 	}

-	emit_data->arg_count = 1;
-	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
-						coords, 4);
-	emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-
 	if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
 		inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
 		inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
@@ -692,7 +685,7 @@ static void tex_fetch_args(
 		 * That operand should be passed as a float value in the args array
 		 * right after the coord vector. After packing it's not used anymore,
 		 * that's why arg_count is not increased */
-		emit_data->args[1] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+		coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
 	}

 	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
@@ -701,8 +694,13 @@ static void tex_fetch_args(
 	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
 	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
-		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, 0);
+		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
 	}
+
+	emit_data->arg_count = 1;
+	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+						coords, 4);
+	emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
 }

 static void txf_fetch_args(
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -280,6 +280,7 @@ static const char *r600_get_family_name(enum radeon_family family)
 	case CHIP_TAHITI: return "AMD TAHITI";
 	case CHIP_PITCAIRN: return "AMD PITCAIRN";
 	case CHIP_VERDE: return "AMD CAPE VERDE";
+	case CHIP_OLAND: return "AMD OLAND";
 	default: return "AMD unknown";
 	}
 }
@@ -379,7 +380,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
 			return 15;
 	case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
-		return /*rscreen->info.drm_minor >= 9 ? 16384 :*/ 0;
+		return 16384;
 	case PIPE_CAP_MAX_COMBINED_SAMPLERS:
 		return 32;

--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -791,54 +791,127 @@ static void tex_fetch_args(
 	struct lp_build_tgsi_context * bld_base,
 	struct lp_build_emit_data * emit_data)
 {
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	const struct tgsi_full_instruction * inst = emit_data->inst;
+	unsigned opcode = inst->Instruction.Opcode;
+	unsigned target = inst->Texture.Texture;
 	LLVMValueRef ptr;
 	LLVMValueRef offset;
+	LLVMValueRef coords[4];
+	LLVMValueRef address[16];
+	unsigned count = 0;
+	unsigned chan;

 	/* WriteMask */
 	/* XXX: should be optimized using emit_data->inst->Dst[0].Register.WriteMask*/
 	emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, 0xf);

-	/* Coordinates */
-	/* XXX: Not all sample instructions need 4 address arguments. */
-	if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
-		LLVMValueRef src_w;
-		unsigned chan;
-		LLVMValueRef coords[4];
-
-		emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-		src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
-
-		for (chan = 0; chan < 3; chan++ ) {
-			LLVMValueRef arg = lp_build_emit_fetch(bld_base,
-							       emit_data->inst, 0, chan);
+	/* Fetch and project texture coordinates */
+	coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+	for (chan = 0; chan < 3; chan++ ) {
+		coords[chan] = lp_build_emit_fetch(bld_base,
+						   emit_data->inst, 0,
+						   chan);
+		if (opcode == TGSI_OPCODE_TXP)
 			coords[chan] = lp_build_emit_llvm_binary(bld_base,
 								 TGSI_OPCODE_DIV,
-								 arg, src_w);
-		}
+								 coords[chan],
+								 coords[3]);
+	}
+
+	if (opcode == TGSI_OPCODE_TXP)
 		coords[3] = bld_base->base.one;
-		emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm,
-							    coords, 4);
-	} else
-		emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
-							 0, LP_CHAN_ALL);

-	if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
-		inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
-		inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
-		/* These instructions have additional operand that should be packed
-		 * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
-		 * That operand should be passed as a float value in the args array
-		 * right after the coord vector. After packing it's not used anymore,
-		 * that's why arg_count is not increased */
-		emit_data->args[2] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+	/* Pack LOD bias value */
+	if (opcode == TGSI_OPCODE_TXB)
+		address[count++] = coords[3];
+
+	if ((target == TGSI_TEXTURE_CUBE || target == TGSI_TEXTURE_SHADOWCUBE) &&
+	    opcode != TGSI_OPCODE_TXQ)
+		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
+
+	/* Pack depth comparison value */
+	switch (target) {
+	case TGSI_TEXTURE_SHADOW1D:
+	case TGSI_TEXTURE_SHADOW1D_ARRAY:
+	case TGSI_TEXTURE_SHADOW2D:
+	case TGSI_TEXTURE_SHADOWRECT:
+		address[count++] = coords[2];
+		break;
+	case TGSI_TEXTURE_SHADOWCUBE:
+	case TGSI_TEXTURE_SHADOW2D_ARRAY:
+		address[count++] = coords[3];
+		break;
+	case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+		address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
 	}

-	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
-	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
-	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
-		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, 1);
+	/* Pack texture coordinates */
+	address[count++] = coords[0];
+	switch (target) {
+	case TGSI_TEXTURE_2D:
+	case TGSI_TEXTURE_2D_ARRAY:
+	case TGSI_TEXTURE_3D:
+	case TGSI_TEXTURE_CUBE:
+	case TGSI_TEXTURE_RECT:
+	case TGSI_TEXTURE_SHADOW2D:
+	case TGSI_TEXTURE_SHADOWRECT:
+	case TGSI_TEXTURE_SHADOW2D_ARRAY:
+	case TGSI_TEXTURE_SHADOWCUBE:
+	case TGSI_TEXTURE_2D_MSAA:
+	case TGSI_TEXTURE_2D_ARRAY_MSAA:
+	case TGSI_TEXTURE_CUBE_ARRAY:
+	case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+		address[count++] = coords[1];
 	}
+	switch (target) {
+	case TGSI_TEXTURE_3D:
+	case TGSI_TEXTURE_CUBE:
+	case TGSI_TEXTURE_SHADOWCUBE:
+	case TGSI_TEXTURE_CUBE_ARRAY:
+	case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+		address[count++] = coords[2];
+	}
+
+	/* Pack array slice */
+	switch (target) {
+	case TGSI_TEXTURE_1D_ARRAY:
+		address[count++] = coords[1];
+	}
+	switch (target) {
+	case TGSI_TEXTURE_2D_ARRAY:
+	case TGSI_TEXTURE_2D_ARRAY_MSAA:
+	case TGSI_TEXTURE_SHADOW2D_ARRAY:
+		address[count++] = coords[2];
+	}
+	switch (target) {
+	case TGSI_TEXTURE_CUBE_ARRAY:
+	case TGSI_TEXTURE_SHADOW1D_ARRAY:
+	case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+		address[count++] = coords[3];
+	}
+
+	/* Pack LOD */
+	if (opcode == TGSI_OPCODE_TXL)
+		address[count++] = coords[3];
+
+	if (count > 16) {
+		assert(!"Cannot handle more than 16 texture address parameters");
+		count = 16;
+	}
+
+	for (chan = 0; chan < count; chan++ ) {
+		address[chan] = LLVMBuildBitCast(gallivm->builder,
+						 address[chan],
+						 LLVMInt32TypeInContext(gallivm->context),
+						 "");
+	}
+
+	/* Pad to power of two vector */
+	while (count < util_next_power_of_two(count))
+		address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+
+	emit_data->args[1] = lp_build_gather_values(gallivm, address, count);

 	/* Resource */
 	ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, SI_SGPR_RESOURCE);
@@ -855,8 +928,7 @@ static void tex_fetch_args(
 						ptr, offset);

 	/* Dimensions */
-	emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm,
-					emit_data->inst->Texture.Texture);
+	emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm, target);

 	emit_data->arg_count = 5;
 	/* XXX: To optimize, we could use a float or v2f32, if the last bits of
@@ -866,22 +938,37 @@ static void tex_fetch_args(
 			4);
 }

+static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
+				struct lp_build_tgsi_context * bld_base,
+				struct lp_build_emit_data * emit_data)
+{
+	struct lp_build_context * base = &bld_base->base;
+	char intr_name[23];
+
+	sprintf(intr_name, "%sv%ui32", action->intr_name,
+		LLVMGetVectorSize(LLVMTypeOf(emit_data->args[1])));
+
+	emit_data->output[emit_data->chan] = lp_build_intrinsic(
+		base->gallivm->builder, intr_name, emit_data->dst_type,
+		emit_data->args, emit_data->arg_count);
+}
+
 static const struct lp_build_tgsi_action tex_action = {
 	.fetch_args = tex_fetch_args,
-	.emit = lp_build_tgsi_intrinsic,
-	.intr_name = "llvm.SI.sample"
+	.emit = build_tex_intrinsic,
+	.intr_name = "llvm.SI.sample."
 };

 static const struct lp_build_tgsi_action txb_action = {
 	.fetch_args = tex_fetch_args,
-	.emit = lp_build_tgsi_intrinsic,
-	.intr_name = "llvm.SI.sample.bias"
+	.emit = build_tex_intrinsic,
+	.intr_name = "llvm.SI.sampleb."
 };

 static const struct lp_build_tgsi_action txl_action = {
 	.fetch_args = tex_fetch_args,
-	.emit = lp_build_tgsi_intrinsic,
-	.intr_name = "llvm.SI.sample.lod"
+	.emit = build_tex_intrinsic,
+	.intr_name = "llvm.SI.samplel."
 };


--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -720,7 +720,6 @@ static uint32_t si_translate_colorformat(enum pipe_format format)
 	case PIPE_FORMAT_L8A8_SNORM:
 	case PIPE_FORMAT_L8A8_UINT:
 	case PIPE_FORMAT_L8A8_SINT:
-	case PIPE_FORMAT_L8A8_SRGB:
 	case PIPE_FORMAT_R8G8_SNORM:
 	case PIPE_FORMAT_R8G8_UNORM:
 	case PIPE_FORMAT_R8G8_UINT:
@@ -804,15 +803,12 @@ static uint32_t si_translate_colorformat(enum pipe_format format)
 		return V_028C70_COLOR_10_11_11;

 	/* 64-bit buffers. */
-	case PIPE_FORMAT_R16G16B16_USCALED:
-	case PIPE_FORMAT_R16G16B16_SSCALED:
 	case PIPE_FORMAT_R16G16B16A16_UINT:
 	case PIPE_FORMAT_R16G16B16A16_SINT:
 	case PIPE_FORMAT_R16G16B16A16_USCALED:
 	case PIPE_FORMAT_R16G16B16A16_SSCALED:
 	case PIPE_FORMAT_R16G16B16A16_UNORM:
 	case PIPE_FORMAT_R16G16B16A16_SNORM:
-	case PIPE_FORMAT_R16G16B16_FLOAT:
 	case PIPE_FORMAT_R16G16B16A16_FLOAT:
 		return V_028C70_COLOR_16_16_16_16;

@@ -898,7 +894,6 @@ static uint32_t si_translate_colorswap(enum pipe_format format)
 	case PIPE_FORMAT_L8A8_SNORM:
 	case PIPE_FORMAT_L8A8_UINT:
 	case PIPE_FORMAT_L8A8_SINT:
-	case PIPE_FORMAT_L8A8_SRGB:
 		return V_028C70_SWAP_ALT;
 	case PIPE_FORMAT_R8G8_SNORM:
 	case PIPE_FORMAT_R8G8_UNORM:
@@ -1172,6 +1167,8 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
 		goto out_unknown; /* TODO */

 	case UTIL_FORMAT_COLORSPACE_SRGB:
+		if (desc->nr_channels != 4 && desc->nr_channels != 1)
+			goto out_unknown;
 		break;

 	default:
@@ -1624,15 +1621,19 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
 		if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
 			ntype = V_028C70_NUMBER_SRGB;
 		else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
-			if (desc->channel[i].normalized)
-				ntype = V_028C70_NUMBER_SNORM;
-			else if (desc->channel[i].pure_integer)
+			if (desc->channel[i].pure_integer) {
 				ntype = V_028C70_NUMBER_SINT;
+			} else {
+				assert(desc->channel[i].normalized);
+				ntype = V_028C70_NUMBER_SNORM;
+			}
 		} else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
-			if (desc->channel[i].normalized)
-				ntype = V_028C70_NUMBER_UNORM;
-			else if (desc->channel[i].pure_integer)
+			if (desc->channel[i].pure_integer) {
 				ntype = V_028C70_NUMBER_UINT;
+			} else {
+				assert(desc->channel[i].normalized);
+				ntype = V_028C70_NUMBER_UNORM;
+			}
 		}
 	}

@@ -2093,16 +2094,31 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
 	first_non_void = util_format_get_first_non_void_channel(pipe_format);
 	if (first_non_void < 0) {
 		num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
-	} else switch (desc->channel[first_non_void].type) {
-	case UTIL_FORMAT_TYPE_FLOAT:
-		num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
-		break;
-	case UTIL_FORMAT_TYPE_SIGNED:
-		num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
-		break;
-	case UTIL_FORMAT_TYPE_UNSIGNED:
-	default:
+	} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+		num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
+	} else {
 		num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
+
+		switch (desc->channel[first_non_void].type) {
+		case UTIL_FORMAT_TYPE_FLOAT:
+			num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
+			break;
+		case UTIL_FORMAT_TYPE_SIGNED:
+			if (desc->channel[first_non_void].normalized)
+				num_format = V_008F14_IMG_NUM_FORMAT_SNORM;
+			else if (desc->channel[first_non_void].pure_integer)
+				num_format = V_008F14_IMG_NUM_FORMAT_SINT;
+			else
+				num_format = V_008F14_IMG_NUM_FORMAT_SSCALED;
+			break;
+		case UTIL_FORMAT_TYPE_UNSIGNED:
+			if (desc->channel[first_non_void].normalized)
+				num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
+			else if (desc->channel[first_non_void].pure_integer)
+				num_format = V_008F14_IMG_NUM_FORMAT_UINT;
+			else
+				num_format = V_008F14_IMG_NUM_FORMAT_USCALED;
+		}
 	}

 	format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void);
@@ -2476,10 +2492,20 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
 			num_format = V_008F0C_BUF_NUM_FORMAT_USCALED; /* XXX */
 			break;
 		case UTIL_FORMAT_TYPE_SIGNED:
-			num_format = V_008F0C_BUF_NUM_FORMAT_SNORM;
+			if (desc->channel[first_non_void].normalized)
+				num_format = V_008F0C_BUF_NUM_FORMAT_SNORM;
+			else if (desc->channel[i].pure_integer)
+				num_format = V_008F0C_BUF_NUM_FORMAT_SINT;
+			else
+				num_format = V_008F0C_BUF_NUM_FORMAT_SSCALED;
 			break;
 		case UTIL_FORMAT_TYPE_UNSIGNED:
-			num_format = V_008F0C_BUF_NUM_FORMAT_UNORM;
+			if (desc->channel[first_non_void].normalized)
+				num_format = V_008F0C_BUF_NUM_FORMAT_UNORM;
+			else if (desc->channel[i].pure_integer)
+				num_format = V_008F0C_BUF_NUM_FORMAT_UINT;
+			else
+				num_format = V_008F0C_BUF_NUM_FORMAT_USCALED;
 			break;
 		case UTIL_FORMAT_TYPE_FLOAT:
 		default:
@@ -2665,9 +2691,14 @@ void si_init_config(struct r600_context *rctx)
 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a);
 		break;
 	case CHIP_VERDE:
-	default:
 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a);
 		break;
+	case CHIP_OLAND:
+		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082);
+		break;
+	default:
+		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
+		break;
 	}

 	si_pm4_set_state(rctx, init, pm4);
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -524,10 +524,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	struct pipe_index_buffer ib = {};
 	uint32_t cp_coher_cntl;

-	if ((!info->count && (info->indexed || !info->count_from_stream_output)) ||
-	    (info->indexed && !rctx->index_buffer.buffer)) {
+	if (!info->count && (info->indexed || !info->count_from_stream_output))
 		return;
-	}

 	if (!rctx->ps_shader || !rctx->vs_shader)
 		return;
@@ -538,13 +536,14 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	if (info->indexed) {
 		/* Initialize the index buffer struct. */
 		pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer);
+		ib.user_buffer = rctx->index_buffer.user_buffer;
 		ib.index_size = rctx->index_buffer.index_size;
 		ib.offset = rctx->index_buffer.offset + info->start * ib.index_size;

 		/* Translate or upload, if needed. */
 		r600_translate_index_buffer(rctx, &ib, info->count);

-		if (ib.user_buffer) {
+		if (ib.user_buffer && !ib.buffer) {
 			r600_upload_index_buffer(rctx, &ib, info->count);
 		}

--- a/src/gallium/state_trackers/xorg/xorg_exa.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa.c
@@ -318,7 +318,7 @@ ExaFinishAccess(PixmapPtr pPix, int index)
    if (!priv)
 	return;

-    if (!priv->map_transfer || pPix->devPrivate.ptr == NULL)
+    if (!priv->map_transfer)
 	return;

    exa_debug_printf("ExaFinishAccess %d\n", index);
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -593,10 +593,11 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
        va.offset = bo->va;
        r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
        if (r && va.operation == RADEON_VA_RESULT_ERROR) {
-            fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
+            fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
            fprintf(stderr, "radeon:    size      : %d bytes\n", size);
            fprintf(stderr, "radeon:    alignment : %d bytes\n", desc->alignment);
            fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
+            fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
            radeon_bo_destroy(&bo->base);
            return NULL;
        }
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -383,6 +383,16 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
    return status;
 }

+static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
+{
+    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+    boolean status =
+        (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
+        (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
+
+    return status;
+}
+
 static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
                                      struct radeon_winsys_cs_handle *buf)
 {
@@ -575,6 +585,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
    ws->base.cs_destroy = radeon_drm_cs_destroy;
    ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
    ws->base.cs_validate = radeon_drm_cs_validate;
+    ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
    ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
    ws->base.cs_flush = radeon_drm_cs_flush;
    ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -312,6 +312,7 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
    case CHIP_TAHITI:
    case CHIP_PITCAIRN:
    case CHIP_VERDE:
+    case CHIP_OLAND:
        ws->info.chip_class = TAHITI;
        break;
    }
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -123,6 +123,7 @@ enum radeon_family {
    CHIP_TAHITI,
    CHIP_PITCAIRN,
    CHIP_VERDE,
+    CHIP_OLAND,
    CHIP_LAST,
 };

@@ -392,6 +393,16 @@ struct radeon_winsys {
     */
    boolean (*cs_validate)(struct radeon_winsys_cs *cs);

+    /**
+     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
+     * added so far.
+     *
+     * \param cs        A command stream to validate.
+     * \param vram      VRAM memory size pending to be use
+     * \param gtt       GTT memory size pending to be use
+     */
+    boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
+
    /**
     * Write a relocated dword to a command buffer.
     *
--- a/src/glsl/link_uniform_blocks.cpp
+++ b/src/glsl/link_uniform_blocks.cpp
@@ -29,7 +29,7 @@
 #include "main/hash_table.h"
 #include "program.h"

-class ubo_visitor : public uniform_field_visitor {
+class ubo_visitor : public program_resource_visitor {
 public:
   ubo_visitor(void *mem_ctx, gl_uniform_buffer_variable *variables,
               unsigned num_variables)
@@ -44,7 +44,7 @@ public:
      this->offset = 0;
      this->buffer_size = 0;
      this->is_array_instance = strchr(name, ']') != NULL;
-      this->uniform_field_visitor::process(type, name);
+      this->program_resource_visitor::process(type, name);
   }

   unsigned index;
@@ -112,7 +112,7 @@ private:
   }
 };

-class count_block_size : public uniform_field_visitor {
+class count_block_size : public program_resource_visitor {
 public:
   count_block_size() : num_active_uniforms(0)
   {
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -52,7 +52,7 @@ values_for_type(const glsl_type *type)
 }

 void
-uniform_field_visitor::process(const glsl_type *type, const char *name)
+program_resource_visitor::process(const glsl_type *type, const char *name)
 {
   assert(type->is_record()
          || (type->is_array() && type->fields.array->is_record())
@@ -65,7 +65,7 @@ uniform_field_visitor::process(const glsl_type *type, const char *name)
 }

 void
-uniform_field_visitor::process(ir_variable *var)
+program_resource_visitor::process(ir_variable *var)
 {
   const glsl_type *t = var->type;

@@ -93,8 +93,8 @@ uniform_field_visitor::process(ir_variable *var)
 }

 void
-uniform_field_visitor::recursion(const glsl_type *t, char **name,
-                                 size_t name_length, bool row_major)
+program_resource_visitor::recursion(const glsl_type *t, char **name,
+                                    size_t name_length, bool row_major)
 {
   /* Records need to have each field processed individually.
    *
@@ -110,7 +110,7 @@ uniform_field_visitor::recursion(const glsl_type *t, char **name,
         if (t->fields.structure[i].type->is_record())
            this->visit_field(&t->fields.structure[i]);

-         /* Append '.field' to the current uniform name. */
+         /* Append '.field' to the current variable name. */
         if (name_length == 0) {
            ralloc_asprintf_rewrite_tail(name, &new_length, "%s", field);
         } else {
@@ -125,7 +125,7 @@ uniform_field_visitor::recursion(const glsl_type *t, char **name,
      for (unsigned i = 0; i < t->length; i++) {
 	 size_t new_length = name_length;

-	 /* Append the subscript to the current uniform name */
+	 /* Append the subscript to the current variable name */
 	 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);

         recursion(t->fields.array, name, new_length,
@@ -137,7 +137,7 @@ uniform_field_visitor::recursion(const glsl_type *t, char **name,
 }

 void
-uniform_field_visitor::visit_field(const glsl_struct_field *field)
+program_resource_visitor::visit_field(const glsl_struct_field *field)
 {
   (void) field;
   /* empty */
@@ -153,7 +153,7 @@ uniform_field_visitor::visit_field(const glsl_struct_field *field)
 * If the same uniform is added multiple times (i.e., once for each shader
 * target), it will only be accounted once.
 */
-class count_uniform_size : public uniform_field_visitor {
+class count_uniform_size : public program_resource_visitor {
 public:
   count_uniform_size(struct string_to_uint_map *map)
      : num_active_uniforms(0), num_values(0), num_shader_samplers(0),
@@ -171,10 +171,10 @@ public:
   void process(ir_variable *var)
   {
      if (var->is_interface_instance())
-         uniform_field_visitor::process(var->interface_type,
-                                        var->interface_type->name);
+         program_resource_visitor::process(var->interface_type,
+                                           var->interface_type->name);
      else
-         uniform_field_visitor::process(var);
+         program_resource_visitor::process(var);
   }

   /**
@@ -258,7 +258,7 @@ private:
 * the \c gl_uniform_storage and \c gl_constant_value arrays are "big
 * enough."
 */
-class parcel_out_uniform_storage : public uniform_field_visitor {
+class parcel_out_uniform_storage : public program_resource_visitor {
 public:
   parcel_out_uniform_storage(struct string_to_uint_map *map,
 			      struct gl_uniform_storage *uniforms,
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -35,6 +35,8 @@
 #include "linker.h"
 #include "link_varyings.h"
 #include "main/macros.h"
+#include "program/hash_table.h"
+#include "program.h"


 /**
@@ -154,10 +156,13 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,

 /**
 * Initialize this object based on a string that was passed to
- * glTransformFeedbackVaryings.  If there is a parse error, the error is
- * reported using linker_error(), and false is returned.
+ * glTransformFeedbackVaryings.
+ *
+ * If the input is mal-formed, this call still succeeds, but it sets
+ * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var()
+ * will fail to find any matching variable.
 */
-bool
+void
 tfeedback_decl::init(struct gl_context *ctx, struct gl_shader_program *prog,
                     const void *mem_ctx, const char *input)
 {
@@ -170,12 +175,13 @@ tfeedback_decl::init(struct gl_context *ctx, struct gl_shader_program *prog,
   this->is_clip_distance_mesa = false;
   this->skip_components = 0;
   this->next_buffer_separator = false;
+   this->matched_candidate = NULL;

   if (ctx->Extensions.ARB_transform_feedback3) {
      /* Parse gl_NextBuffer. */
      if (strcmp(input, "gl_NextBuffer") == 0) {
         this->next_buffer_separator = true;
-         return true;
+         return;
      }

      /* Parse gl_SkipComponents. */
@@ -189,21 +195,17 @@ tfeedback_decl::init(struct gl_context *ctx, struct gl_shader_program *prog,
         this->skip_components = 4;

      if (this->skip_components)
-         return true;
+         return;
   }

   /* Parse a declaration. */
-   const char *bracket = strrchr(input, '[');
-
-   if (bracket) {
-      this->var_name = ralloc_strndup(mem_ctx, input, bracket - input);
-      if (sscanf(bracket, "[%u]", &this->array_subscript) != 1) {
-         linker_error(prog, "Cannot parse transform feedback varying %s", input);
-         return false;
-      }
+   const char *base_name_end;
+   long subscript = parse_program_resource_name(input, &base_name_end);
+   this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
+   if (subscript >= 0) {
+      this->array_subscript = subscript;
      this->is_subscripted = true;
   } else {
-      this->var_name = ralloc_strdup(mem_ctx, input);
      this->is_subscripted = false;
   }

@@ -215,8 +217,6 @@ tfeedback_decl::init(struct gl_context *ctx, struct gl_shader_program *prog,
       strcmp(this->var_name, "gl_ClipDistance") == 0) {
      this->is_clip_distance_mesa = true;
   }
-
-   return true;
 }


@@ -240,27 +240,32 @@ tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y)


 /**
- * Assign a location for this tfeedback_decl object based on the location
- * assignment in output_var.
+ * Assign a location for this tfeedback_decl object based on the transform
+ * feedback candidate found by find_candidate.
 *
 * If an error occurs, the error is reported through linker_error() and false
 * is returned.
 */
 bool
 tfeedback_decl::assign_location(struct gl_context *ctx,
-                                struct gl_shader_program *prog,
-                                ir_variable *output_var)
+                                struct gl_shader_program *prog)
 {
   assert(this->is_varying());

-   if (output_var->type->is_array()) {
+   unsigned fine_location
+      = this->matched_candidate->toplevel_var->location * 4
+      + this->matched_candidate->toplevel_var->location_frac
+      + this->matched_candidate->offset;
+
+   if (this->matched_candidate->type->is_array()) {
      /* Array variable */
      const unsigned matrix_cols =
-         output_var->type->fields.array->matrix_columns;
+         this->matched_candidate->type->fields.array->matrix_columns;
      const unsigned vector_elements =
-         output_var->type->fields.array->vector_elements;
+         this->matched_candidate->type->fields.array->vector_elements;
      unsigned actual_array_size = this->is_clip_distance_mesa ?
-         prog->Vert.ClipDistanceArraySize : output_var->type->array_size();
+         prog->Vert.ClipDistanceArraySize :
+         this->matched_candidate->type->array_size();

      if (this->is_subscripted) {
         /* Check array bounds. */
@@ -271,22 +276,11 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
                         actual_array_size);
            return false;
         }
-         if (this->is_clip_distance_mesa) {
-            this->location =
-               output_var->location + this->array_subscript / 4;
-            this->location_frac = this->array_subscript % 4;
-         } else {
-            unsigned fine_location
-               = output_var->location * 4 + output_var->location_frac;
-            unsigned array_elem_size = vector_elements * matrix_cols;
-            fine_location += array_elem_size * this->array_subscript;
-            this->location = fine_location / 4;
-            this->location_frac = fine_location % 4;
-         }
+         unsigned array_elem_size = this->is_clip_distance_mesa ?
+            1 : vector_elements * matrix_cols;
+         fine_location += array_elem_size * this->array_subscript;
         this->size = 1;
      } else {
-         this->location = output_var->location;
-         this->location_frac = output_var->location_frac;
         this->size = actual_array_size;
      }
      this->vector_elements = vector_elements;
@@ -294,7 +288,7 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
      if (this->is_clip_distance_mesa)
         this->type = GL_FLOAT;
      else
-         this->type = output_var->type->fields.array->gl_type;
+         this->type = this->matched_candidate->type->fields.array->gl_type;
   } else {
      /* Regular variable (scalar, vector, or matrix) */
      if (this->is_subscripted) {
@@ -303,13 +297,13 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
                      this->orig_name, this->var_name);
         return false;
      }
-      this->location = output_var->location;
-      this->location_frac = output_var->location_frac;
      this->size = 1;
-      this->vector_elements = output_var->type->vector_elements;
-      this->matrix_columns = output_var->type->matrix_columns;
-      this->type = output_var->type->gl_type;
+      this->vector_elements = this->matched_candidate->type->vector_elements;
+      this->matrix_columns = this->matched_candidate->type->matrix_columns;
+      this->type = this->matched_candidate->type->gl_type;
   }
+   this->location = fine_location / 4;
+   this->location_frac = fine_location % 4;

   /* From GL_EXT_transform_feedback:
    *   A program will fail to link if:
@@ -404,35 +398,26 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
 }


-ir_variable *
-tfeedback_decl::find_output_var(gl_shader_program *prog,
-                                gl_shader *producer) const
+const tfeedback_candidate *
+tfeedback_decl::find_candidate(gl_shader_program *prog,
+                               hash_table *tfeedback_candidates)
 {
   const char *name = this->is_clip_distance_mesa
      ? "gl_ClipDistanceMESA" : this->var_name;
-   ir_variable *var = producer->symbols->get_variable(name);
-   if (var && var->mode == ir_var_shader_out) {
-      const glsl_type *type = var->type;
-      while (type->base_type == GLSL_TYPE_ARRAY)
-         type = type->fields.array;
-      if (type->base_type == GLSL_TYPE_STRUCT) {
-         linker_error(prog, "Transform feedback of varying structs not "
-                      "implemented yet.");
-         return NULL;
-      }
-      return var;
+   this->matched_candidate = (const tfeedback_candidate *)
+      hash_table_find(tfeedback_candidates, name);
+   if (!this->matched_candidate) {
+      /* From GL_EXT_transform_feedback:
+       *   A program will fail to link if:
+       *
+       *   * any variable name specified in the <varyings> array is not
+       *     declared as an output in the geometry shader (if present) or
+       *     the vertex shader (if no geometry shader is present);
+       */
+      linker_error(prog, "Transform feedback varying %s undeclared.",
+                   this->orig_name);
   }
-
-   /* From GL_EXT_transform_feedback:
-    *   A program will fail to link if:
-    *
-    *   * any variable name specified in the <varyings> array is not
-    *     declared as an output in the geometry shader (if present) or
-    *     the vertex shader (if no geometry shader is present);
-    */
-   linker_error(prog, "Transform feedback varying %s undeclared.",
-                this->orig_name);
-   return NULL;
+   return this->matched_candidate;
 }


@@ -449,8 +434,7 @@ parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
                      char **varying_names, tfeedback_decl *decls)
 {
   for (unsigned i = 0; i < num_names; ++i) {
-      if (!decls[i].init(ctx, prog, mem_ctx, varying_names[i]))
-         return false;
+      decls[i].init(ctx, prog, mem_ctx, varying_names[i]);

      if (!decls[i].is_varying())
         continue;
@@ -870,6 +854,80 @@ is_varying_var(GLenum shaderType, const ir_variable *var)
 }


+/**
+ * Visitor class that generates tfeedback_candidate structs describing all
+ * possible targets of transform feedback.
+ *
+ * tfeedback_candidate structs are stored in the hash table
+ * tfeedback_candidates, which is passed to the constructor.  This hash table
+ * maps varying names to instances of the tfeedback_candidate struct.
+ */
+class tfeedback_candidate_generator : public program_resource_visitor
+{
+public:
+   tfeedback_candidate_generator(void *mem_ctx,
+                                 hash_table *tfeedback_candidates)
+      : mem_ctx(mem_ctx),
+        tfeedback_candidates(tfeedback_candidates)
+   {
+   }
+
+   void process(ir_variable *var)
+   {
+      this->toplevel_var = var;
+      this->varying_floats = 0;
+      if (var->is_interface_instance())
+         program_resource_visitor::process(var->interface_type,
+                                           var->interface_type->name);
+      else
+         program_resource_visitor::process(var);
+   }
+
+private:
+   virtual void visit_field(const glsl_type *type, const char *name,
+                            bool row_major)
+   {
+      assert(!type->is_record());
+      assert(!(type->is_array() && type->fields.array->is_record()));
+      assert(!type->is_interface());
+      assert(!(type->is_array() && type->fields.array->is_interface()));
+
+      (void) row_major;
+
+      tfeedback_candidate *candidate
+         = rzalloc(this->mem_ctx, tfeedback_candidate);
+      candidate->toplevel_var = this->toplevel_var;
+      candidate->type = type;
+      candidate->offset = this->varying_floats;
+      hash_table_insert(this->tfeedback_candidates, candidate,
+                        ralloc_strdup(this->mem_ctx, name));
+      this->varying_floats += type->component_slots();
+   }
+
+   /**
+    * Memory context used to allocate hash table keys and values.
+    */
+   void * const mem_ctx;
+
+   /**
+    * Hash table in which tfeedback_candidate objects should be stored.
+    */
+   hash_table * const tfeedback_candidates;
+
+   /**
+    * Pointer to the toplevel variable that is being traversed.
+    */
+   ir_variable *toplevel_var;
+
+   /**
+    * Total number of varying floats that have been visited so far.  This is
+    * used to determine the offset to each varying within the toplevel
+    * variable.
+    */
+   unsigned varying_floats;
+};
+
+
 /**
 * Assign locations for all variables that are produced in one pipeline stage
 * (the "producer") and consumed in the next stage (the "consumer").
@@ -902,6 +960,8 @@ assign_varying_locations(struct gl_context *ctx,
   const unsigned producer_base = VERT_RESULT_VAR0;
   const unsigned consumer_base = FRAG_ATTRIB_VAR0;
   varying_matches matches(ctx->Const.DisableVaryingPacking);
+   hash_table *tfeedback_candidates
+      = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare);

   /* Operate in a total of three passes.
    *
@@ -920,6 +980,9 @@ assign_varying_locations(struct gl_context *ctx,
      if ((output_var == NULL) || (output_var->mode != ir_var_shader_out))
 	 continue;

+      tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates);
+      g.process(output_var);
+
      ir_variable *input_var =
 	 consumer ? consumer->symbols->get_variable(output_var->name) : NULL;

@@ -935,15 +998,16 @@ assign_varying_locations(struct gl_context *ctx,
      if (!tfeedback_decls[i].is_varying())
         continue;

-      ir_variable *output_var
-         = tfeedback_decls[i].find_output_var(prog, producer);
+      const tfeedback_candidate *matched_candidate
+         = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates);

-      if (output_var == NULL)
+      if (matched_candidate == NULL) {
+         hash_table_dtor(tfeedback_candidates);
         return false;
-
-      if (output_var->is_unmatched_generic_inout) {
-         matches.record(output_var, NULL);
      }
+
+      if (matched_candidate->toplevel_var->is_unmatched_generic_inout)
+         matches.record(matched_candidate->toplevel_var, NULL);
   }

   const unsigned slots_used = matches.assign_locations();
@@ -953,13 +1017,14 @@ assign_varying_locations(struct gl_context *ctx,
      if (!tfeedback_decls[i].is_varying())
         continue;

-      ir_variable *output_var
-         = tfeedback_decls[i].find_output_var(prog, producer);
-
-      if (!tfeedback_decls[i].assign_location(ctx, prog, output_var))
+      if (!tfeedback_decls[i].assign_location(ctx, prog)) {
+         hash_table_dtor(tfeedback_candidates);
         return false;
+      }
   }

+   hash_table_dtor(tfeedback_candidates);
+
   if (ctx->Const.DisableVaryingPacking) {
      /* Transform feedback code assumes varyings are packed, so if the driver
       * has disabled varying packing, make sure it does not support transform
--- a/src/glsl/link_varyings.h
+++ b/src/glsl/link_varyings.h
@@ -41,6 +41,49 @@ struct gl_shader;
 class ir_variable;


+/**
+ * Data structure describing a varying which is available for use in transform
+ * feedback.
+ *
+ * For example, if the vertex shader contains:
+ *
+ *     struct S {
+ *       vec4 foo;
+ *       float[3] bar;
+ *     };
+ *
+ *     varying S[2] v;
+ *
+ * Then there would be tfeedback_candidate objects corresponding to the
+ * following varyings:
+ *
+ *     v[0].foo
+ *     v[0].bar
+ *     v[1].foo
+ *     v[1].bar
+ */
+struct tfeedback_candidate
+{
+   /**
+    * Toplevel variable containing this varying.  In the above example, this
+    * would point to the declaration of the varying v.
+    */
+   ir_variable *toplevel_var;
+
+   /**
+    * Type of this varying.  In the above example, this would point to the
+    * glsl_type for "vec4" or "float[3]".
+    */
+   const glsl_type *type;
+
+   /**
+    * Offset within the toplevel variable where this varying occurs (counted
+    * in multiples of the size of a float).
+    */
+   unsigned offset;
+};
+
+
 /**
 * Data structure tracking information about a transform feedback declaration
 * during linking.
@@ -48,17 +91,17 @@ class ir_variable;
 class tfeedback_decl
 {
 public:
-   bool init(struct gl_context *ctx, struct gl_shader_program *prog,
+   void init(struct gl_context *ctx, struct gl_shader_program *prog,
             const void *mem_ctx, const char *input);
   static bool is_same(const tfeedback_decl &x, const tfeedback_decl &y);
-   bool assign_location(struct gl_context *ctx, struct gl_shader_program *prog,
-                        ir_variable *output_var);
+   bool assign_location(struct gl_context *ctx,
+                        struct gl_shader_program *prog);
   unsigned get_num_outputs() const;
   bool store(struct gl_context *ctx, struct gl_shader_program *prog,
              struct gl_transform_feedback_info *info, unsigned buffer,
              const unsigned max_outputs) const;
-   ir_variable *find_output_var(gl_shader_program *prog,
-                                gl_shader *producer) const;
+   const tfeedback_candidate *find_candidate(gl_shader_program *prog,
+                                             hash_table *tfeedback_candidates);

   bool is_next_buffer_separator() const
   {
@@ -158,6 +201,12 @@ private:
    * Whether this is gl_NextBuffer from ARB_transform_feedback3.
    */
   bool next_buffer_separator;
+
+   /**
+    * If find_candidate() has been called, pointer to the tfeedback_candidate
+    * data structure that was found.  Otherwise NULL.
+    */
+   const tfeedback_candidate *matched_candidate;
 };


--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -200,6 +200,65 @@ linker_warning(gl_shader_program *prog, const char *fmt, ...)
 }


+/**
+ * Given a string identifying a program resource, break it into a base name
+ * and an optional array index in square brackets.
+ *
+ * If an array index is present, \c out_base_name_end is set to point to the
+ * "[" that precedes the array index, and the array index itself is returned
+ * as a long.
+ *
+ * If no array index is present (or if the array index is negative or
+ * mal-formed), \c out_base_name_end, is set to point to the null terminator
+ * at the end of the input string, and -1 is returned.
+ *
+ * Only the final array index is parsed; if the string contains other array
+ * indices (or structure field accesses), they are left in the base name.
+ *
+ * No attempt is made to check that the base name is properly formed;
+ * typically the caller will look up the base name in a hash table, so
+ * ill-formed base names simply turn into hash table lookup failures.
+ */
+long
+parse_program_resource_name(const GLchar *name,
+                            const GLchar **out_base_name_end)
+{
+   /* Section 7.3.1 ("Program Interfaces") of the OpenGL 4.3 spec says:
+    *
+    *     "When an integer array element or block instance number is part of
+    *     the name string, it will be specified in decimal form without a "+"
+    *     or "-" sign or any extra leading zeroes. Additionally, the name
+    *     string will not include white space anywhere in the string."
+    */
+
+   const size_t len = strlen(name);
+   *out_base_name_end = name + len;
+
+   if (len == 0 || name[len-1] != ']')
+      return -1;
+
+   /* Walk backwards over the string looking for a non-digit character.  This
+    * had better be the opening bracket for an array index.
+    *
+    * Initially, i specifies the location of the ']'.  Since the string may
+    * contain only the ']' charcater, walk backwards very carefully.
+    */
+   unsigned i;
+   for (i = len - 1; (i > 0) && isdigit(name[i-1]); --i)
+      /* empty */ ;
+
+   if ((i == 0) || name[i-1] != '[')
+      return -1;
+
+   long array_index = strtol(&name[i], NULL, 10);
+   if (array_index < 0)
+      return -1;
+
+   *out_base_name_end = name + (i - 1);
+   return array_index;
+}
+
+
 void
 link_invalidate_variable_locations(gl_shader *sh, int input_base,
                                   int output_base)
--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -61,38 +61,39 @@ link_uniform_blocks(void *mem_ctx,
                    struct gl_uniform_block **blocks_ret);

 /**
- * Class for processing all of the leaf fields of an uniform
+ * Class for processing all of the leaf fields of a variable that corresponds
+ * to a program resource.
 *
- * Leaves are, roughly speaking, the parts of the uniform that the application
- * could query with \c glGetUniformLocation (or that could be returned by
- * \c glGetActiveUniforms).
+ * The leaf fields are all the parts of the variable that the application
+ * could query using \c glGetProgramResourceIndex (or that could be returned
+ * by \c glGetProgramResourceName).
 *
 * Classes my derive from this class to implement specific functionality.
 * This class only provides the mechanism to iterate over the leaves.  Derived
 * classes must implement \c ::visit_field and may override \c ::process.
 */
-class uniform_field_visitor {
+class program_resource_visitor {
 public:
   /**
-    * Begin processing a uniform
+    * Begin processing a variable
    *
    * Classes that overload this function should call \c ::process from the
-    * base class to start the recursive processing of the uniform.
+    * base class to start the recursive processing of the variable.
    *
-    * \param var  The uniform variable that is to be processed
+    * \param var  The variable that is to be processed
    *
-    * Calls \c ::visit_field for each leaf of the uniform.
+    * Calls \c ::visit_field for each leaf of the variable.
    *
    * \warning
-    * This entry should only be used with uniform blocks in cases where the
-    * row / column ordering of matrices in the block does not matter.  For
-    * example, enumerating the names of members of the block, but not for
-    * determining the offsets of members.
+    * When processing a uniform block, this entry should only be used in cases
+    * where the row / column ordering of matrices in the block does not
+    * matter.  For example, enumerating the names of members of the block, but
+    * not for determining the offsets of members.
    */
   void process(ir_variable *var);

   /**
-    * Begin processing a uniform of a structured type.
+    * Begin processing a variable of a structured type.
    *
    * This flavor of \c process should be used to handle structured types
    * (i.e., structures, interfaces, or arrays there of) that need special
@@ -100,7 +101,7 @@ public:
    * (instead of the instance name) is used for an interface block.
    *
    * \param type  Type that is to be processed, associated with \c name
-    * \param name  Base name of the structured uniform being processed
+    * \param name  Base name of the structured variable being processed
    *
    * \note
    * \c type must be \c GLSL_TYPE_RECORD, \c GLSL_TYPE_INTERFACE, or an array
@@ -110,7 +111,7 @@ public:

 protected:
   /**
-    * Method invoked for each leaf of the uniform
+    * Method invoked for each leaf of the variable
    *
    * \param type  Type of the field.
    * \param name  Fully qualified name of the field.
--- a/src/glsl/program.h
+++ b/src/glsl/program.h
@@ -33,3 +33,7 @@ linker_error(gl_shader_program *prog, const char *fmt, ...)
 extern void
 linker_warning(gl_shader_program *prog, const char *fmt, ...)
   PRINTFLIKE(2, 3);
+
+extern long
+parse_program_resource_name(const GLchar *name,
+                            const GLchar **out_base_name_end);
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -23,6 +23,7 @@

 #include "main/teximage.h"
 #include "main/fbobject.h"
+#include "main/renderbuffer.h"

 #include "glsl/ralloc.h"

@@ -183,10 +184,19 @@ formats_match(GLbitfield buffer_bit, struct intel_renderbuffer *src_irb,
   gl_format src_format = find_miptree(buffer_bit, src_irb)->format;
   gl_format dst_format = find_miptree(buffer_bit, dst_irb)->format;

-   return _mesa_get_srgb_format_linear(src_format) ==
-          _mesa_get_srgb_format_linear(dst_format);
-}
+   gl_format linear_src_format = _mesa_get_srgb_format_linear(src_format);
+   gl_format linear_dst_format = _mesa_get_srgb_format_linear(dst_format);

+   /* Normally, we require the formats to be equal.  However, we also support
+    * blitting from ARGB to XRGB (discarding alpha), and from XRGB to ARGB
+    * (overriding alpha to 1.0 via blending).
+    */
+   return linear_src_format == linear_dst_format ||
+          (linear_src_format == MESA_FORMAT_XRGB8888 &&
+           linear_dst_format == MESA_FORMAT_ARGB8888) ||
+          (linear_src_format == MESA_FORMAT_ARGB8888 &&
+           linear_dst_format == MESA_FORMAT_XRGB8888);
+}

 static bool
 try_blorp_blit(struct intel_context *intel,
@@ -295,6 +305,93 @@ try_blorp_blit(struct intel_context *intel,
   return true;
 }

+bool
+brw_blorp_copytexsubimage(struct intel_context *intel,
+                          struct gl_renderbuffer *src_rb,
+                          struct gl_texture_image *dst_image,
+                          int srcX0, int srcY0,
+                          int dstX0, int dstY0,
+                          int width, int height)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
+   struct intel_renderbuffer *dst_irb;
+
+   /* BLORP is not supported before Gen6. */
+   if (intel->gen < 6)
+      return false;
+
+   /* Create a fake/wrapper renderbuffer to allow us to use do_blorp_blit(). */
+   dst_irb = intel_create_fake_renderbuffer_wrapper(intel, dst_image);
+   if (!dst_irb)
+      return false;
+
+   struct gl_renderbuffer *dst_rb = &dst_irb->Base.Base;
+
+   /* Unlike BlitFramebuffer, CopyTexSubImage doesn't have a buffer bit.
+    * It's only used by find_miptee() to decide whether to dereference the
+    * separate stencil miptree.  In the case of packed depth/stencil, core
+    * Mesa hands us the depth attachment as src_rb (not stencil), so assume
+    * non-stencil for now.  A buffer bit of 0 works for both color and depth.
+    */
+   GLbitfield buffer_bit = 0;
+
+   if (!formats_match(buffer_bit, src_irb, dst_irb)) {
+      _mesa_delete_renderbuffer(ctx, dst_rb);
+      return false;
+   }
+
+   /* Source clipping shouldn't be necessary, since copytexsubimage (in
+    * src/mesa/main/teximage.c) calls _mesa_clip_copytexsubimage() which
+    * takes care of it.
+    *
+    * Destination clipping shouldn't be necessary since the restrictions on
+    * glCopyTexSubImage prevent the user from specifying a destination rectangle
+    * that falls outside the bounds of the destination texture.
+    * See error_check_subtexture_dimensions().
+    */
+
+   int srcY1 = srcY0 + height;
+   int dstX1 = dstX0 + width;
+   int dstY1 = dstY0 + height;
+
+   /* Sync up the state of window system buffers.  We need to do this before
+    * we go looking for the buffers.
+    */
+   intel_prepare_render(intel);
+
+   /* Account for the fact that in the system framebuffer, the origin is at
+    * the lower left.
+    */
+   bool mirror_y = false;
+   if (_mesa_is_winsys_fbo(ctx->ReadBuffer)) {
+      GLint tmp = src_rb->Height - srcY0;
+      srcY0 = src_rb->Height - srcY1;
+      srcY1 = tmp;
+      mirror_y = true;
+   }
+
+   do_blorp_blit(intel, buffer_bit, src_irb, dst_irb,
+                 srcX0, srcY0, dstX0, dstY0, dstX1, dstY1, false, mirror_y);
+
+   /* If we're copying a packed depth stencil texture, the above do_blorp_blit
+    * copied depth (since buffer_bit != GL_STENCIL_BIT).  Now copy stencil as
+    * well.  There's no need to do a formats_match() check because the separate
+    * stencil buffer is always S8.
+    */
+   src_rb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
+   if (_mesa_get_format_bits(dst_image->TexFormat, GL_STENCIL_BITS) > 0 &&
+       src_rb != NULL) {
+      src_irb = intel_renderbuffer(src_rb);
+      do_blorp_blit(intel, GL_STENCIL_BUFFER_BIT, src_irb, dst_irb,
+                    srcX0, srcY0, dstX0, dstY0, dstX1, dstY1, false, mirror_y);
+   }
+
+   _mesa_delete_renderbuffer(ctx, dst_rb);
+   return true;
+}
+
+
 GLbitfield
 brw_blorp_framebuffer(struct intel_context *intel,
                      GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
@@ -1642,17 +1739,6 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
   src.set(brw, src_mt, src_level, src_layer);
   dst.set(brw, dst_mt, dst_level, dst_layer);

-   /* If we are blitting from sRGB to linear or vice versa, we still want the
-    * blit to be a direct copy, so we need source and destination to use the
-    * same format.  However, we want the destination sRGB/linear state to be
-    * correct (so that sRGB blending is used when doing an MSAA resolve to an
-    * sRGB surface, and linear blending is used when doing an MSAA resolve to
-    * a linear surface).  Since blorp blits don't support any format
-    * conversion (except between sRGB and linear), we can accomplish this by
-    * simply setting up the source to use the same format as the destination.
-    */
-   assert(_mesa_get_srgb_format_linear(src_mt->format) ==
-          _mesa_get_srgb_format_linear(dst_mt->format));
   src.brw_surfaceformat = dst.brw_surfaceformat;

   use_wm_prog = true;
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -278,7 +278,23 @@ brwCreateContext(int api,
   }

   /* WM maximum threads is number of EUs times number of threads per EU. */
-   if (intel->gen >= 7) {
+   assert(intel->gen <= 7);
+
+   if (intel->is_haswell) {
+      if (intel->gt == 1) {
+	 brw->max_wm_threads = 102;
+	 brw->max_vs_threads = 70;
+	 brw->urb.size = 128;
+	 brw->urb.max_vs_entries = 640;
+	 brw->urb.max_gs_entries = 256;
+      } else if (intel->gt == 2) {
+	 brw->max_wm_threads = 204;
+	 brw->max_vs_threads = 280;
+	 brw->urb.size = 256;
+	 brw->urb.max_vs_entries = 1664;
+	 brw->urb.max_gs_entries = 640;
+      }
+   } else if (intel->gen == 7) {
      if (intel->gt == 1) {
 	 brw->max_wm_threads = 48;
 	 brw->max_vs_threads = 36;
@@ -360,6 +376,7 @@ brwCreateContext(int api,

   ctx->Const.NativeIntegers = true;
   ctx->Const.UniformBooleanTrue = 1;
+   ctx->Const.UniformBufferOffsetAlignment = 16;

   ctx->Const.ForceGLSLExtensionsWarn = driQueryOptionb(&intel->optionCache, "force_glsl_extensions_warn");

--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1217,6 +1217,14 @@ brw_blorp_framebuffer(struct intel_context *intel,
                      GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
                      GLbitfield mask, GLenum filter);

+bool
+brw_blorp_copytexsubimage(struct intel_context *intel,
+                          struct gl_renderbuffer *src_rb,
+                          struct gl_texture_image *dst_image,
+                          int srcX0, int srcY0,
+                          int dstX0, int dstY0,
+                          int width, int height);
+
 /* gen6_multisample_state.c */
 void
 gen6_emit_3dstate_multisample(struct brw_context *brw,
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -951,8 +951,8 @@ fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
 {
   assert(intel->gen >= 7);
   assert(dst.type == BRW_REGISTER_TYPE_UD);
-   assert(x.type = BRW_REGISTER_TYPE_F);
-   assert(y.type = BRW_REGISTER_TYPE_F);
+   assert(x.type == BRW_REGISTER_TYPE_F);
+   assert(y.type == BRW_REGISTER_TYPE_F);

   /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
    *
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -225,7 +225,7 @@ void upload_default_color(struct brw_context *brw,
 /* gen6_sf_state.c */
 uint32_t
 get_attr_override(struct brw_vue_map *vue_map, int urb_entry_read_offset,
-                  int fs_attr, bool two_side_color);
+                  int fs_attr, bool two_side_color, uint32_t *max_source_attr);

 #ifdef __cplusplus
 }
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -283,6 +283,25 @@ gen6_blorp_emit_blend_state(struct brw_context *brw,
   blend->blend1.write_disable_b = false;
   blend->blend1.write_disable_a = false;

+   /* When blitting from an XRGB source to a ARGB destination, we need to
+    * interpret the missing channel as 1.0.  Blending can do that for us:
+    * we simply use the RGB values from the fragment shader ("source RGB"),
+    * but smash the alpha channel to 1.
+    */
+   if (_mesa_get_format_bits(params->dst.mt->format, GL_ALPHA_BITS) > 0 &&
+       _mesa_get_format_bits(params->src.mt->format, GL_ALPHA_BITS) == 0) {
+      blend->blend0.blend_enable = 1;
+      blend->blend0.ia_blend_enable = 1;
+
+      blend->blend0.blend_func = BRW_BLENDFUNCTION_ADD;
+      blend->blend0.ia_blend_func = BRW_BLENDFUNCTION_ADD;
+
+      blend->blend0.source_blend_factor = BRW_BLENDFACTOR_SRC_COLOR;
+      blend->blend0.dest_blend_factor = BRW_BLENDFACTOR_ZERO;
+      blend->blend0.ia_source_blend_factor = BRW_BLENDFACTOR_ONE;
+      blend->blend0.ia_dest_blend_factor = BRW_BLENDFACTOR_ZERO;
+   }
+
   return cc_blend_state_offset;
 }

--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -54,9 +54,8 @@
 */
 uint32_t
 get_attr_override(struct brw_vue_map *vue_map, int urb_entry_read_offset,
-                  int fs_attr, bool two_side_color)
+                  int fs_attr, bool two_side_color, uint32_t *max_source_attr)
 {
-   int attr_override, slot;
   int vs_attr = _mesa_frag_attrib_to_vert_result(fs_attr);
   if (vs_attr < 0 || vs_attr == VERT_RESULT_HPOS) {
      /* These attributes will be overwritten by the fragment shader's
@@ -67,7 +66,7 @@ get_attr_override(struct brw_vue_map *vue_map, int urb_entry_read_offset,
   }

   /* Find the VUE slot for this attribute. */
-   slot = vue_map->vert_result_to_slot[vs_attr];
+   int slot = vue_map->vert_result_to_slot[vs_attr];

   /* If there was only a back color written but not front, use back
    * as the color instead of undefined
@@ -89,23 +88,29 @@ get_attr_override(struct brw_vue_map *vue_map, int urb_entry_read_offset,
    * Each increment of urb_entry_read_offset represents a 256-bit value, so
    * it counts for two 128-bit VUE slots.
    */
-   attr_override = slot - 2 * urb_entry_read_offset;
-   assert (attr_override >= 0 && attr_override < 32);
+   int source_attr = slot - 2 * urb_entry_read_offset;
+   assert(source_attr >= 0 && source_attr < 32);

   /* If we are doing two-sided color, and the VUE slot following this one
    * represents a back-facing color, then we need to instruct the SF unit to
    * do back-facing swizzling.
    */
-   if (two_side_color) {
-      if (vue_map->slot_to_vert_result[slot] == VERT_RESULT_COL0 &&
-          vue_map->slot_to_vert_result[slot+1] == VERT_RESULT_BFC0)
-         attr_override |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
-      else if (vue_map->slot_to_vert_result[slot] == VERT_RESULT_COL1 &&
-               vue_map->slot_to_vert_result[slot+1] == VERT_RESULT_BFC1)
-         attr_override |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
+   bool swizzling = two_side_color &&
+      ((vue_map->slot_to_vert_result[slot] == VERT_RESULT_COL0 &&
+        vue_map->slot_to_vert_result[slot+1] == VERT_RESULT_BFC0) ||
+       (vue_map->slot_to_vert_result[slot] == VERT_RESULT_COL1 &&
+        vue_map->slot_to_vert_result[slot+1] == VERT_RESULT_BFC1));
+
+   /* Update max_source_attr.  If swizzling, the SF will read this slot + 1. */
+   if (*max_source_attr < source_attr + swizzling)
+      *max_source_attr = source_attr + swizzling;
+
+   if (swizzling) {
+      return source_attr |
+         (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
   }

-   return attr_override;
+   return source_attr;
 }

 static void
@@ -113,7 +118,6 @@ upload_sf_state(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
-   uint32_t urb_entry_read_length;
   /* BRW_NEW_FRAGMENT_PROGRAM */
   uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead);
   /* _NEW_LIGHT */
@@ -130,21 +134,7 @@ upload_sf_state(struct brw_context *brw)
   uint16_t attr_overrides[FRAG_ATTRIB_MAX];
   uint32_t point_sprite_origin;

-   /* CACHE_NEW_VS_PROG */
-   urb_entry_read_length = ((brw->vs.prog_data->vue_map.num_slots + 1) / 2 -
-			    urb_entry_read_offset);
-   if (urb_entry_read_length == 0) {
-      /* Setting the URB entry read length to 0 causes undefined behavior, so
-       * if we have no URB data to read, set it to 1.
-       */
-      urb_entry_read_length = 1;
-   }
-
-   dw1 =
-      GEN6_SF_SWIZZLE_ENABLE |
-      num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
-      urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
-      urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+   dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT;

   dw2 = GEN6_SF_STATISTICS_ENABLE |
         GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
@@ -280,6 +270,7 @@ upload_sf_state(struct brw_context *brw)
   /* Create the mapping from the FS inputs we produce to the VS outputs
    * they source from.
    */
+   uint32_t max_source_attr = 0;
   for (; attr < FRAG_ATTRIB_MAX; attr++) {
      enum glsl_interp_qualifier interp_qualifier =
         brw->fragment_program->InterpQualifier[attr];
@@ -315,12 +306,30 @@ upload_sf_state(struct brw_context *brw)
      attr_overrides[input_index++] =
         get_attr_override(&brw->vs.prog_data->vue_map,
 			   urb_entry_read_offset, attr,
-                           ctx->VertexProgram._TwoSideEnabled);
+                           ctx->VertexProgram._TwoSideEnabled,
+                           &max_source_attr);
   }

   for (; input_index < FRAG_ATTRIB_MAX; input_index++)
      attr_overrides[input_index] = 0;

+   /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
+    * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
+    *
+    * "This field should be set to the minimum length required to read the
+    *  maximum source attribute.  The maximum source attribute is indicated
+    *  by the maximum value of the enabled Attribute # Source Attribute if
+    *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
+    *  enable is not set.
+    *  read_length = ceiling((max_source_attr + 1) / 2)
+    *
+    *  [errata] Corruption/Hang possible if length programmed larger than
+    *  recommended"
+    */
+   uint32_t urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
+      dw1 |= urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+             urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+
   BEGIN_BATCH(20);
   OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
   OUT_BATCH(dw1);
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -34,7 +34,6 @@ upload_sbe_state(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
-   uint32_t urb_entry_read_length;
   /* BRW_NEW_FRAGMENT_PROGRAM */
   uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead);
   /* _NEW_LIGHT */
@@ -48,22 +47,8 @@ upload_sbe_state(struct brw_context *brw)
   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
   uint32_t point_sprite_origin;

-   /* CACHE_NEW_VS_PROG */
-   urb_entry_read_length = ((brw->vs.prog_data->vue_map.num_slots + 1) / 2 -
-			    urb_entry_read_offset);
-   if (urb_entry_read_length == 0) {
-      /* Setting the URB entry read length to 0 causes undefined behavior, so
-       * if we have no URB data to read, set it to 1.
-       */
-      urb_entry_read_length = 1;
-   }
-
   /* FINISHME: Attribute Swizzle Control Mode? */
-   dw1 =
-      GEN7_SBE_SWIZZLE_ENABLE |
-      num_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT |
-      urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
-      urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
+   dw1 = GEN7_SBE_SWIZZLE_ENABLE | num_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT;

   /* _NEW_POINT
    *
@@ -84,6 +69,7 @@ upload_sbe_state(struct brw_context *brw)
   /* Create the mapping from the FS inputs we produce to the VS outputs
    * they source from.
    */
+   uint32_t max_source_attr = 0;
   for (; attr < FRAG_ATTRIB_MAX; attr++) {
      enum glsl_interp_qualifier interp_qualifier =
         brw->fragment_program->InterpQualifier[attr];
@@ -118,9 +104,25 @@ upload_sbe_state(struct brw_context *brw)
      attr_overrides[input_index++] =
         get_attr_override(&brw->vs.prog_data->vue_map,
 			   urb_entry_read_offset, attr,
-                           ctx->VertexProgram._TwoSideEnabled);
+                           ctx->VertexProgram._TwoSideEnabled,
+                           &max_source_attr);
   }

+   /* From the Ivy Bridge PRM, Volume 2, Part 1, documentation for
+    * 3DSTATE_SBE DWord 1 bits 15:11, "Vertex URB Entry Read Length":
+    *
+    * "This field should be set to the minimum length required to read the
+    *  maximum source attribute.  The maximum source attribute is indicated
+    *  by the maximum value of the enabled Attribute # Source Attribute if
+    *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
+    *  enable is not set.
+    *
+    *  read_length = ceiling((max_source_attr + 1) / 2)"
+    */
+   uint32_t urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
+   dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
+          urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
+
   for (; input_index < FRAG_ATTRIB_MAX; input_index++)
      attr_overrides[input_index] = 0;

--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -531,6 +531,36 @@ intel_renderbuffer_update_wrapper(struct intel_context *intel,
   return true;
 }

+/**
+ * Create a fake intel_renderbuffer that wraps a gl_texture_image.
+ */
+struct intel_renderbuffer *
+intel_create_fake_renderbuffer_wrapper(struct intel_context *intel,
+                                       struct gl_texture_image *image)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct intel_renderbuffer *irb;
+   struct gl_renderbuffer *rb;
+
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
+      return NULL;
+   }
+
+   rb = &irb->Base.Base;
+
+   _mesa_init_renderbuffer(rb, 0);
+   rb->ClassID = INTEL_RB_CLASS;
+
+   if (!intel_renderbuffer_update_wrapper(intel, irb, image, image->Face)) {
+      intel_delete_renderbuffer(ctx, rb);
+      return NULL;
+   }
+
+   return irb;
+}
+
 void
 intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb)
 {
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -140,6 +140,10 @@ intel_create_wrapped_renderbuffer(struct gl_context * ctx,
 				  int width, int height,
 				  gl_format format);

+struct intel_renderbuffer *
+intel_create_fake_renderbuffer_wrapper(struct intel_context *intel,
+                                       struct gl_texture_image *image);
+
 extern void
 intel_fbo_init(struct intel_context *intel);

--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -41,6 +41,9 @@
 #include "intel_fbo.h"
 #include "intel_tex.h"
 #include "intel_blit.h"
+#ifndef I915
+#include "brw_context.h"
+#endif

 #define FILE_DEBUG_FLAG DEBUG_TEXTURE

@@ -177,15 +180,28 @@ intelCopyTexSubImage(struct gl_context *ctx, GLuint dims,
                     GLint x, GLint y,
                     GLsizei width, GLsizei height)
 {
-   if (dims == 3 || !intel_copy_texsubimage(intel_context(ctx),
-                               intel_texture_image(texImage),
-                               xoffset, yoffset,
-                               intel_renderbuffer(rb), x, y, width, height)) {
-      fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
-      _mesa_meta_CopyTexSubImage(ctx, dims, texImage,
-                                 xoffset, yoffset, zoffset,
-                                 rb, x, y, width, height);
+   struct intel_context *intel = intel_context(ctx);
+   if (dims != 3) {
+#ifndef I915
+      /* Try BLORP first.  It can handle almost everything. */
+      if (brw_blorp_copytexsubimage(intel, rb, texImage, x, y,
+                                    xoffset, yoffset, width, height))
+         return;
+#endif
+
+      /* Next, try the BLT engine. */
+      if (intel_copy_texsubimage(intel_context(ctx),
+                                 intel_texture_image(texImage),
+                                 xoffset, yoffset,
+                                 intel_renderbuffer(rb), x, y, width, height))
+         return;
   }
+
+   /* Finally, fall back to meta.  This will likely be slow. */
+   fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
+   _mesa_meta_CopyTexSubImage(ctx, dims, texImage,
+                              xoffset, yoffset, zoffset,
+                              rb, x, y, width, height);
 }


--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -2152,13 +2152,6 @@ _mesa_BindBufferRange(GLenum target, GLuint index,
                     (int) size);
         return;
      }
-
-      if (offset + size > bufObj->Size) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glBindBufferRange(offset + size %d > buffer size %d)",
-                     (int) (offset + size), (int) (bufObj->Size));
-         return;
-      }
   }

   switch (target) {
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -297,7 +297,7 @@ static const struct extension extension_table[] = {
   { "GL_ATI_texture_float",                       o(ARB_texture_float),                       GL,             2002 },
   { "GL_ATI_texture_mirror_once",                 o(ATI_texture_mirror_once),                 GL,             2006 },
   { "GL_IBM_multimode_draw_arrays",               o(dummy_true),                              GL,             1998 },
-   { "GL_IBM_rasterpos_clip",                      o(dummy_true),                              GL,             1996 },
+   { "GL_IBM_rasterpos_clip",                      o(dummy_true),                              GLL,            1996 },
   { "GL_IBM_texture_mirrored_repeat",             o(dummy_true),                              GLL,            1998 },
   { "GL_INGR_blend_func_separate",                o(EXT_blend_func_separate),                 GLL,            1999 },
   { "GL_MESA_pack_invert",                        o(MESA_pack_invert),                        GL,             2002 },
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -225,9 +225,6 @@ descriptor=[

 # GL_OES_point_sprite
  [ "POINT_SPRITE_NV", "CONTEXT_BOOL(Point.PointSprite), extra_NV_point_sprite_ARB_point_sprite" ],
-
-# GL_ARB_vertex_shader
-  [ "MAX_VARYING_FLOATS_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_vertex_shader" ],
 ]},


@@ -362,6 +359,7 @@ descriptor=[

 # GL_ARB_vertex_shader
  [ "MAX_VERTEX_UNIFORM_COMPONENTS_ARB", "CONTEXT_INT(Const.VertexProgram.MaxUniformComponents), extra_ARB_vertex_shader" ],
+  [ "MAX_VARYING_FLOATS_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_vertex_shader" ],

 # GL_EXT_framebuffer_blit
 # NOTE: GL_DRAW_FRAMEBUFFER_BINDING_EXT == GL_FRAMEBUFFER_BINDING_EXT
--- a/src/mesa/main/renderbuffer.h
+++ b/src/mesa/main/renderbuffer.h
@@ -29,6 +29,10 @@
 #include "glheader.h"
 #include "mtypes.h"

+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct gl_context;
 struct gl_framebuffer;
 struct gl_renderbuffer;
@@ -62,6 +66,8 @@ _mesa_reference_renderbuffer(struct gl_renderbuffer **ptr,
      _mesa_reference_renderbuffer_(ptr, rb);
 }
      
-
+#ifdef __cplusplus
+}
+#endif

 #endif /* RENDERBUFFER_H */
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -929,6 +929,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
   _mesa_propagate_uniforms_to_driver_storage(uni, offset, count);
 }

+
 /**
 * Called via glGetUniformLocation().
 *
@@ -944,73 +945,35 @@ _mesa_get_uniform_location(struct gl_context *ctx,
                           const GLchar *name,
                           unsigned *out_offset)
 {
-   const size_t len = strlen(name);
-   long offset;
-   bool array_lookup;
+   /* Page 80 (page 94 of the PDF) of the OpenGL 2.1 spec says:
+    *
+    *     "The first element of a uniform array is identified using the
+    *     name of the uniform array appended with "[0]". Except if the last
+    *     part of the string name indicates a uniform array, then the
+    *     location of the first element of that array can be retrieved by
+    *     either using the name of the uniform array, or the name of the
+    *     uniform array appended with "[0]"."
+    *
+    * Note: since uniform names are not allowed to use whitespace, and array
+    * indices within uniform names are not allowed to use "+", "-", or leading
+    * zeros, it follows that each uniform has a unique name up to the possible
+    * ambiguity with "[0]" noted above.  Therefore we don't need to worry
+    * about mal-formed inputs--they will properly fail when we try to look up
+    * the uniform name in shProg->UniformHash.
+    */
+
+   const GLchar *base_name_end;
+   long offset = parse_program_resource_name(name, &base_name_end);
+   bool array_lookup = offset >= 0;
   char *name_copy;

-   /* If the name ends with a ']', assume that it refers to some element of an
-    * array.  Malformed array references will fail the hash table look up
-    * below, so it doesn't matter that they are not caught here.  This code
-    * only wants to catch the "leaf" array references so that arrays of
-    * structures containing arrays will be handled correctly.
-    */
-   if (name[len-1] == ']') {
-      unsigned i;
-
-      /* Walk backwards over the string looking for a non-digit character.
-       * This had better be the opening bracket for an array index.
-       *
-       * Initially, i specifies the location of the ']'.  Since the string may
-       * contain only the ']' charcater, walk backwards very carefully.
-       */
-      for (i = len - 1; (i > 0) && isdigit(name[i-1]); --i)
-	 /* empty */ ;
-
-      /* Page 80 (page 94 of the PDF) of the OpenGL 2.1 spec says:
-       *
-       *     "The first element of a uniform array is identified using the
-       *     name of the uniform array appended with "[0]". Except if the last
-       *     part of the string name indicates a uniform array, then the
-       *     location of the first element of that array can be retrieved by
-       *     either using the name of the uniform array, or the name of the
-       *     uniform array appended with "[0]"."
-       *
-       * Page 79 (page 93 of the PDF) of the OpenGL 2.1 spec says:
-       *
-       *     "name must be a null terminated string, without white space."
-       *
-       * Return an error if there is no opening '[' to match the closing ']'.
-       * An error will also be returned if there is intervening white space
-       * (or other non-digit characters) before the opening '['.
-       */
-      if ((i == 0) || name[i-1] != '[')
-	 return GL_INVALID_INDEX;
-
-      /* Return an error if there are no digits between the opening '[' to
-       * match the closing ']'.
-       */
-      if (i == (len - 1))
-	 return GL_INVALID_INDEX;
-
-      /* Make a new string that is a copy of the old string up to (but not
-       * including) the '[' character.
-       */
-      name_copy = (char *) malloc(i);
-      memcpy(name_copy, name, i - 1);
-      name_copy[i-1] = '\0';
-
-      offset = strtol(&name[i], NULL, 10);
-      if (offset < 0) {
-	 free(name_copy);
-	 return GL_INVALID_INDEX;
-      }
-
-      array_lookup = true;
+   if (array_lookup) {
+      name_copy = (char *) malloc(base_name_end - name + 1);
+      memcpy(name_copy, name, base_name_end - name);
+      name_copy[base_name_end - name] = '\0';
   } else {
      name_copy = (char *) name;
      offset = 0;
-      array_lookup = false;
   }

   unsigned location = 0;
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -167,6 +167,10 @@ _mesa_GetActiveUniformsiv(GLuint program,
 void GLAPIENTRY
 _mesa_GetUniformiv(GLhandleARB, GLint, GLint *);

+long
+_mesa_parse_program_resource_name(const GLchar *name,
+                                  const GLchar **out_base_name_end);
+
 unsigned
 _mesa_get_uniform_location(struct gl_context *ctx, struct gl_shader_program *shProg,
 			   const GLchar *name, unsigned *offset);
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2375,7 +2375,7 @@ print_program(struct prog_instruction *mesa_instructions,
   }
 }

-class add_uniform_to_shader : public uniform_field_visitor {
+class add_uniform_to_shader : public program_resource_visitor {
 public:
   add_uniform_to_shader(struct gl_shader_program *shader_program,
 			 struct gl_program_parameter_list *params)
@@ -2387,7 +2387,7 @@ public:
   void process(ir_variable *var)
   {
      this->idx = -1;
-      this->uniform_field_visitor::process(var);
+      this->program_resource_visitor::process(var);

      var->location = this->idx;
   }