Compare commits
44 Commits
mesa-11.2.
...
mesa-11.2.
Author | SHA1 | Date | |
---|---|---|---|
|
bcb7e8b084 | ||
|
0efb7fb803 | ||
|
11654dda56 | ||
|
a715e4f40f | ||
|
afd4774721 | ||
|
864eba84dd | ||
|
98380e02d5 | ||
|
7ce73f2608 | ||
|
3b6bda665a | ||
|
c1763ec83c | ||
|
b4045c43ae | ||
|
9bcedb9553 | ||
|
4b83793283 | ||
|
b0ff22cce6 | ||
|
fa6debe633 | ||
|
db8df3ac13 | ||
|
06c15635ef | ||
|
582ae91e3a | ||
|
64cd74d3b5 | ||
|
6ac586ad88 | ||
|
2c592ae450 | ||
|
0cf5990385 | ||
|
d8dd8f30bd | ||
|
9de08603e7 | ||
|
a0268d6cb5 | ||
|
ed74d27034 | ||
|
aa422705dc | ||
|
f755b71f45 | ||
|
c64875ec68 | ||
|
017f647451 | ||
|
adec0763a9 | ||
|
b290082b45 | ||
|
9edda787f5 | ||
|
d9f03f7fa5 | ||
|
26620402b6 | ||
|
d867628244 | ||
|
1d760368ee | ||
|
938e6dfdee | ||
|
41dbfcd1cd | ||
|
1e9d8fa8a8 | ||
|
580cab2d99 | ||
|
9a5bcf1571 | ||
|
c693ddf731 | ||
|
7155c2441e |
@@ -2625,6 +2625,13 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (num_attr >= ARRAY_SIZE(to_assign)) {
|
||||
linker_error(prog, "too many %s (max %u)",
|
||||
target_index == MESA_SHADER_VERTEX ?
|
||||
"vertex shader inputs" : "fragment shader outputs",
|
||||
(unsigned)ARRAY_SIZE(to_assign));
|
||||
return false;
|
||||
}
|
||||
to_assign[num_attr].slots = slots;
|
||||
to_assign[num_attr].var = var;
|
||||
num_attr++;
|
||||
|
@@ -1006,6 +1006,9 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx,
|
||||
geometry_cookie = xcb_get_geometry (dri2_dpy->conn, drawable);
|
||||
buffers_reply = xcb_dri2_get_buffers_reply (dri2_dpy->conn,
|
||||
buffers_cookie, NULL);
|
||||
if (buffers_reply == NULL)
|
||||
return NULL;
|
||||
|
||||
buffers = xcb_dri2_get_buffers_buffers (buffers_reply);
|
||||
if (buffers == NULL) {
|
||||
return NULL;
|
||||
|
@@ -44,7 +44,6 @@
|
||||
#include "egllog.h"
|
||||
|
||||
|
||||
#define MIN2(A, B) (((A) < (B)) ? (A) : (B))
|
||||
|
||||
|
||||
/**
|
||||
|
@@ -40,9 +40,16 @@ extern "C" {
|
||||
|
||||
#define _EGL_MAX_EXTENSIONS_LEN 1000
|
||||
|
||||
/* Hardcoded, conservative default for EGL_LARGEST_PBUFFER,
|
||||
* this is used to implement EGL_LARGEST_PBUFFER.
|
||||
*/
|
||||
#define _EGL_MAX_PBUFFER_WIDTH 4096
|
||||
#define _EGL_MAX_PBUFFER_HEIGHT 4096
|
||||
|
||||
#define _EGL_VENDOR_STRING "Mesa Project"
|
||||
|
||||
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
|
||||
#define MIN2(A, B) (((A) < (B)) ? (A) : (B))
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@@ -307,6 +307,12 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
|
||||
if (err != EGL_SUCCESS)
|
||||
return _eglError(err, func);
|
||||
|
||||
/* if EGL_LARGEST_PBUFFER in use, clamp width and height */
|
||||
if (surf->LargestPbuffer) {
|
||||
surf->Width = MIN2(surf->Width, _EGL_MAX_PBUFFER_WIDTH);
|
||||
surf->Height = MIN2(surf->Height, _EGL_MAX_PBUFFER_HEIGHT);
|
||||
}
|
||||
|
||||
return EGL_TRUE;
|
||||
}
|
||||
|
||||
|
@@ -108,11 +108,11 @@ emit_segment(struct draw_stage *stage, struct prim_header *header,
|
||||
}
|
||||
|
||||
|
||||
static inline unsigned
|
||||
static inline bool
|
||||
stipple_test(int counter, ushort pattern, int factor)
|
||||
{
|
||||
int b = (counter / factor) & 0xf;
|
||||
return (1 << b) & pattern;
|
||||
return !!((1 << b) & pattern);
|
||||
}
|
||||
|
||||
|
||||
@@ -126,7 +126,7 @@ stipple_line(struct draw_stage *stage, struct prim_header *header)
|
||||
const float *pos0 = v0->data[pos];
|
||||
const float *pos1 = v1->data[pos];
|
||||
float start = 0;
|
||||
int state = 0;
|
||||
bool state = 0;
|
||||
|
||||
float x0 = pos0[0];
|
||||
float x1 = pos1[0];
|
||||
@@ -143,29 +143,29 @@ stipple_line(struct draw_stage *stage, struct prim_header *header)
|
||||
stipple->counter = 0;
|
||||
|
||||
|
||||
/* XXX ToDo: intead of iterating pixel-by-pixel, use a look-up table.
|
||||
/* XXX ToDo: instead of iterating pixel-by-pixel, use a look-up table.
|
||||
*/
|
||||
for (i = 0; i < length; i++) {
|
||||
int result = stipple_test( (int) stipple->counter+i,
|
||||
(ushort) stipple->pattern, stipple->factor );
|
||||
bool result = stipple_test((int)stipple->counter + i,
|
||||
(ushort)stipple->pattern, stipple->factor);
|
||||
if (result != state) {
|
||||
/* changing from "off" to "on" or vice versa */
|
||||
if (state) {
|
||||
if (start != i) {
|
||||
if (state) {
|
||||
if (start != i) {
|
||||
/* finishing an "on" segment */
|
||||
emit_segment( stage, header, start / length, i / length );
|
||||
emit_segment(stage, header, start / length, i / length);
|
||||
}
|
||||
}
|
||||
else {
|
||||
}
|
||||
else {
|
||||
/* starting an "on" segment */
|
||||
start = (float) i;
|
||||
}
|
||||
state = result;
|
||||
start = (float)i;
|
||||
}
|
||||
state = result;
|
||||
}
|
||||
}
|
||||
|
||||
if (state && start < length)
|
||||
emit_segment( stage, header, start / length, 1.0 );
|
||||
emit_segment(stage, header, start / length, 1.0);
|
||||
|
||||
stipple->counter += length;
|
||||
}
|
||||
|
@@ -1388,7 +1388,9 @@ static boolean parse_declaration( struct translate_ctx *ctx )
|
||||
if (str_match_nocase_whole(&cur, "ATOMIC")) {
|
||||
decl.Declaration.Atomic = 1;
|
||||
ctx->cur = cur;
|
||||
} else if (str_match_nocase_whole(&cur, "SHARED")) {
|
||||
}
|
||||
} else if (file == TGSI_FILE_MEMORY) {
|
||||
if (str_match_nocase_whole(&cur, "SHARED")) {
|
||||
decl.Declaration.Shared = 1;
|
||||
ctx->cur = cur;
|
||||
}
|
||||
|
@@ -116,6 +116,12 @@ struct lp_rast_plane {
|
||||
|
||||
/* one-pixel sized trivial reject offsets for each plane */
|
||||
uint32_t eo;
|
||||
/*
|
||||
* We rely on this struct being 64bit aligned (ideally it would be 128bit
|
||||
* but that's quite the waste) and therefore on 32bit we need padding
|
||||
* since otherwise (even with the 64bit number in there) it wouldn't be.
|
||||
*/
|
||||
uint32_t pad;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@@ -94,6 +94,8 @@ lp_setup_alloc_triangle(struct lp_scene *scene,
|
||||
unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
|
||||
struct lp_rast_triangle *tri;
|
||||
|
||||
STATIC_ASSERT(sizeof(struct lp_rast_plane) % 8 == 0);
|
||||
|
||||
*tri_size = (sizeof(struct lp_rast_triangle) +
|
||||
3 * input_array_sz +
|
||||
plane_sz);
|
||||
|
@@ -1634,7 +1634,9 @@ CodeEmitterNV50::emitTEX(const TexInstruction *i)
|
||||
code[1] |= (i->tex.mask & 0xc) << 12;
|
||||
|
||||
if (i->tex.liveOnly)
|
||||
code[1] |= 4;
|
||||
code[1] |= 1 << 2;
|
||||
if (i->tex.derivAll)
|
||||
code[1] |= 1 << 3;
|
||||
|
||||
defId(i->def(0), 2);
|
||||
|
||||
|
@@ -1989,7 +1989,6 @@ Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
|
||||
void
|
||||
Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
|
||||
{
|
||||
Value *val;
|
||||
Value *arg[4], *src[8];
|
||||
Value *lod = NULL, *shd = NULL;
|
||||
unsigned int s, c, d;
|
||||
@@ -2032,17 +2031,6 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
|
||||
shd = src[n - 1];
|
||||
}
|
||||
|
||||
if (tgt.isCube()) {
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
|
||||
val = getScratch();
|
||||
mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
|
||||
mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
|
||||
mkOp1(OP_RCP, TYPE_F32, val, val);
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
|
||||
}
|
||||
|
||||
for (c = 0, d = 0; c < 4; ++c) {
|
||||
if (dst[c]) {
|
||||
texi->setDef(d++, dst[c]);
|
||||
|
@@ -67,6 +67,7 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
|
||||
tmp = bld.getScratch();
|
||||
|
||||
for (l = 0; l < 4; ++l) {
|
||||
Value *src[3], *val;
|
||||
// mov coordinates from lane l to all lanes
|
||||
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
|
||||
for (c = 0; c < dim; ++c) {
|
||||
@@ -92,10 +93,25 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
|
||||
add->lanes = 1; /* abused for .ndv */
|
||||
}
|
||||
|
||||
// normalize cube coordinates if necessary
|
||||
if (i->tex.target.isCube()) {
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
|
||||
val = bld.getScratch();
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
|
||||
bld.mkOp1(OP_RCP, TYPE_F32, val, val);
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
|
||||
} else {
|
||||
for (c = 0; c < dim; ++c)
|
||||
src[c] = crd[c];
|
||||
}
|
||||
|
||||
// texture
|
||||
bld.insert(tex = cloneForward(func, i));
|
||||
for (c = 0; c < dim; ++c)
|
||||
tex->setSrc(c + array, crd[c]);
|
||||
tex->setSrc(c + array, src[c]);
|
||||
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
|
||||
|
||||
// save results
|
||||
|
@@ -724,6 +724,23 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
|
||||
const int dref = arg;
|
||||
const int lod = i->tex.target.isShadow() ? (arg + 1) : arg;
|
||||
|
||||
/* Only normalize in the non-explicit derivatives case.
|
||||
*/
|
||||
if (i->tex.target.isCube() && i->op != OP_TXD) {
|
||||
Value *src[3], *val;
|
||||
int c;
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c));
|
||||
val = bld.getScratch();
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
|
||||
bld.mkOp1(OP_RCP, TYPE_F32, val, val);
|
||||
for (c = 0; c < 3; ++c) {
|
||||
i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(),
|
||||
i->getSrc(c), val));
|
||||
}
|
||||
}
|
||||
|
||||
// handle MS, which means looking up the MS params for this texture, and
|
||||
// adjusting the input coordinates to point at the right sample.
|
||||
if (i->tex.target.isMS()) {
|
||||
@@ -934,12 +951,14 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i)
|
||||
|
||||
handleTEX(i);
|
||||
i->op = OP_TEX; // no need to clone dPdx/dPdy later
|
||||
i->tex.derivAll = true;
|
||||
|
||||
for (c = 0; c < dim; ++c)
|
||||
crd[c] = bld.getScratch();
|
||||
|
||||
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
|
||||
for (l = 0; l < 4; ++l) {
|
||||
Value *src[3], *val;
|
||||
// mov coordinates from lane l to all lanes
|
||||
for (c = 0; c < dim; ++c)
|
||||
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
|
||||
@@ -949,10 +968,24 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i)
|
||||
// add dPdy from lane l to lanes dy
|
||||
for (c = 0; c < dim; ++c)
|
||||
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
|
||||
// normalize cube coordinates if necessary
|
||||
if (i->tex.target.isCube()) {
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
|
||||
val = bld.getScratch();
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
|
||||
bld.mkOp1(OP_RCP, TYPE_F32, val, val);
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
|
||||
} else {
|
||||
for (c = 0; c < dim; ++c)
|
||||
src[c] = crd[c];
|
||||
}
|
||||
// texture
|
||||
bld.insert(tex = cloneForward(func, i));
|
||||
for (c = 0; c < dim; ++c)
|
||||
tex->setSrc(c, crd[c]);
|
||||
tex->setSrc(c, src[c]);
|
||||
// save results
|
||||
for (c = 0; i->defExists(c); ++c) {
|
||||
Instruction *mov;
|
||||
|
@@ -615,6 +615,24 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
|
||||
const int lyr = arg - (i->tex.target.isMS() ? 2 : 1);
|
||||
const int chipset = prog->getTarget()->getChipset();
|
||||
|
||||
/* Only normalize in the non-explicit derivatives case. For explicit
|
||||
* derivatives, this is handled in handleManualTXD.
|
||||
*/
|
||||
if (i->tex.target.isCube() && i->dPdx[0].get() == NULL) {
|
||||
Value *src[3], *val;
|
||||
int c;
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c));
|
||||
val = bld.getScratch();
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
|
||||
bld.mkOp1(OP_RCP, TYPE_F32, val, val);
|
||||
for (c = 0; c < 3; ++c) {
|
||||
i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(),
|
||||
i->getSrc(c), val));
|
||||
}
|
||||
}
|
||||
|
||||
// Arguments to the TEX instruction are a little insane. Even though the
|
||||
// encoding is identical between SM20 and SM30, the arguments mean
|
||||
// different things between Fermi and Kepler+. A lot of arguments are
|
||||
@@ -728,9 +746,13 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
|
||||
}
|
||||
|
||||
Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL;
|
||||
for (int s = dim; s >= 1; --s)
|
||||
i->setSrc(s, i->getSrc(s - 1));
|
||||
i->setSrc(0, arrayIndex);
|
||||
if (arrayIndex) {
|
||||
for (int s = dim; s >= 1; --s)
|
||||
i->setSrc(s, i->getSrc(s - 1));
|
||||
i->setSrc(0, arrayIndex);
|
||||
} else {
|
||||
i->moveSources(0, 1);
|
||||
}
|
||||
|
||||
if (arrayIndex) {
|
||||
int sat = (i->op == OP_TXF) ? 1 : 0;
|
||||
@@ -852,7 +874,17 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
|
||||
Value *zero = bld.loadImm(bld.getSSA(), 0);
|
||||
int l, c;
|
||||
const int dim = i->tex.target.getDim() + i->tex.target.isCube();
|
||||
const int array = i->tex.target.isArray();
|
||||
|
||||
// This function is invoked after handleTEX lowering, so we have to expect
|
||||
// the arguments in the order that the hw wants them. For Fermi, array and
|
||||
// indirect are both in the leading arg, while for Kepler, array and
|
||||
// indirect are separate (and both precede the coordinates). Maxwell is
|
||||
// handled in a separate function.
|
||||
unsigned array;
|
||||
if (targ->getChipset() < NVISA_GK104_CHIPSET)
|
||||
array = i->tex.target.isArray() || i->tex.rIndirectSrc >= 0;
|
||||
else
|
||||
array = i->tex.target.isArray() + (i->tex.rIndirectSrc >= 0);
|
||||
|
||||
i->op = OP_TEX; // no need to clone dPdx/dPdy later
|
||||
|
||||
@@ -861,6 +893,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
|
||||
|
||||
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
|
||||
for (l = 0; l < 4; ++l) {
|
||||
Value *src[3], *val;
|
||||
// mov coordinates from lane l to all lanes
|
||||
for (c = 0; c < dim; ++c)
|
||||
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero);
|
||||
@@ -870,10 +903,24 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
|
||||
// add dPdy from lane l to lanes dy
|
||||
for (c = 0; c < dim; ++c)
|
||||
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
|
||||
// normalize cube coordinates
|
||||
if (i->tex.target.isCube()) {
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
|
||||
val = bld.getScratch();
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
|
||||
bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
|
||||
bld.mkOp1(OP_RCP, TYPE_F32, val, val);
|
||||
for (c = 0; c < 3; ++c)
|
||||
src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
|
||||
} else {
|
||||
for (c = 0; c < dim; ++c)
|
||||
src[c] = crd[c];
|
||||
}
|
||||
// texture
|
||||
bld.insert(tex = cloneForward(func, i));
|
||||
for (c = 0; c < dim; ++c)
|
||||
tex->setSrc(c + array, crd[c]);
|
||||
tex->setSrc(c + array, src[c]);
|
||||
// save results
|
||||
for (c = 0; i->defExists(c); ++c) {
|
||||
Instruction *mov;
|
||||
|
@@ -633,8 +633,6 @@ nv50_stream_output_validate(struct nv50_context *nv50)
|
||||
BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
|
||||
PUSH_DATA (push, ctrl);
|
||||
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
|
||||
|
||||
for (i = 0; i < nv50->num_so_targets; ++i) {
|
||||
struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
|
||||
struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
|
||||
|
@@ -1180,8 +1180,10 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
|
||||
}
|
||||
nv50->num_so_targets = num_targets;
|
||||
|
||||
if (nv50->so_targets_dirty)
|
||||
if (nv50->so_targets_dirty) {
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
|
||||
nv50->dirty |= NV50_NEW_STRMOUT;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -294,7 +294,6 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
|
||||
|
||||
if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS))
|
||||
return;
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB);
|
||||
|
||||
for (b = 0; b < nvc0->num_tfbbufs; ++b) {
|
||||
struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
|
||||
|
@@ -413,7 +413,7 @@ nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
|
||||
{
|
||||
unsigned s, i;
|
||||
|
||||
for (s = 0; s < 5; ++s)
|
||||
for (s = 0; s < 6; ++s)
|
||||
for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i)
|
||||
if (nvc0_context(pipe)->samplers[s][i] == hwcso)
|
||||
nvc0_context(pipe)->samplers[s][i] = NULL;
|
||||
@@ -1184,8 +1184,10 @@ nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
|
||||
}
|
||||
nvc0->num_tfbbufs = num_targets;
|
||||
|
||||
if (nvc0->tfbbuf_dirty)
|
||||
if (nvc0->tfbbuf_dirty) {
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB);
|
||||
nvc0->dirty |= NVC0_NEW_TFB_TARGETS;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -1203,8 +1203,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
|
||||
x0 = (float)info->src.box.x - x_range * (float)info->dst.box.x;
|
||||
y0 = (float)info->src.box.y - y_range * (float)info->dst.box.y;
|
||||
|
||||
x1 = x0 + 16384.0f * x_range;
|
||||
y1 = y0 + 16384.0f * y_range;
|
||||
x1 = x0 + 32768.0f * x_range;
|
||||
y1 = y0 + 32768.0f * y_range;
|
||||
|
||||
x0 *= (float)(1 << nv50_miptree(src)->ms_x);
|
||||
x1 *= (float)(1 << nv50_miptree(src)->ms_x);
|
||||
@@ -1302,6 +1302,17 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
|
||||
}
|
||||
nvc0->state.num_vtxelts = 2;
|
||||
|
||||
if (nvc0->state.prim_restart) {
|
||||
IMMED_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
|
||||
nvc0->state.prim_restart = 0;
|
||||
}
|
||||
|
||||
if (nvc0->state.index_bias) {
|
||||
IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0);
|
||||
IMMED_NVC0(push, NVC0_3D(VERTEX_ID_BASE), 0);
|
||||
nvc0->state.index_bias = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < info->dst.box.depth; ++i, z += dz) {
|
||||
if (info->dst.box.z + i) {
|
||||
BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
|
||||
@@ -1314,14 +1325,14 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
|
||||
*(vbuf++) = fui(y0);
|
||||
*(vbuf++) = fui(z);
|
||||
|
||||
*(vbuf++) = fui(16384 << nv50_miptree(dst)->ms_x);
|
||||
*(vbuf++) = fui(32768 << nv50_miptree(dst)->ms_x);
|
||||
*(vbuf++) = fui(0.0f);
|
||||
*(vbuf++) = fui(x1);
|
||||
*(vbuf++) = fui(y0);
|
||||
*(vbuf++) = fui(z);
|
||||
|
||||
*(vbuf++) = fui(0.0f);
|
||||
*(vbuf++) = fui(16384 << nv50_miptree(dst)->ms_y);
|
||||
*(vbuf++) = fui(32768 << nv50_miptree(dst)->ms_y);
|
||||
*(vbuf++) = fui(x0);
|
||||
*(vbuf++) = fui(y1);
|
||||
*(vbuf++) = fui(z);
|
||||
|
@@ -645,21 +645,21 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
|
||||
if (rviews[i]) {
|
||||
struct r600_texture *rtex =
|
||||
(struct r600_texture*)rviews[i]->base.texture;
|
||||
bool is_buffer = rviews[i]->base.texture->target == PIPE_BUFFER;
|
||||
|
||||
if (rviews[i]->base.texture->target != PIPE_BUFFER) {
|
||||
if (rtex->is_depth && !rtex->is_flushing_texture) {
|
||||
dst->views.compressed_depthtex_mask |= 1 << i;
|
||||
} else {
|
||||
dst->views.compressed_depthtex_mask &= ~(1 << i);
|
||||
}
|
||||
|
||||
/* Track compressed colorbuffers. */
|
||||
if (rtex->cmask.size) {
|
||||
dst->views.compressed_colortex_mask |= 1 << i;
|
||||
} else {
|
||||
dst->views.compressed_colortex_mask &= ~(1 << i);
|
||||
}
|
||||
if (!is_buffer && rtex->is_depth && !rtex->is_flushing_texture) {
|
||||
dst->views.compressed_depthtex_mask |= 1 << i;
|
||||
} else {
|
||||
dst->views.compressed_depthtex_mask &= ~(1 << i);
|
||||
}
|
||||
|
||||
/* Track compressed colorbuffers. */
|
||||
if (!is_buffer && rtex->cmask.size) {
|
||||
dst->views.compressed_colortex_mask |= 1 << i;
|
||||
} else {
|
||||
dst->views.compressed_colortex_mask &= ~(1 << i);
|
||||
}
|
||||
|
||||
/* Changing from array to non-arrays textures and vice versa requires
|
||||
* updating TEX_ARRAY_OVERRIDE in sampler states on R6xx-R7xx. */
|
||||
if (rctx->b.chip_class <= R700 &&
|
||||
|
@@ -598,9 +598,13 @@ bool expr_handler::fold_assoc(alu_node *n) {
|
||||
|
||||
unsigned op = n->bc.op;
|
||||
bool allow_neg = false, cur_neg = false;
|
||||
bool distribute_neg = false;
|
||||
|
||||
switch(op) {
|
||||
case ALU_OP2_ADD:
|
||||
distribute_neg = true;
|
||||
allow_neg = true;
|
||||
break;
|
||||
case ALU_OP2_MUL:
|
||||
case ALU_OP2_MUL_IEEE:
|
||||
allow_neg = true;
|
||||
@@ -632,7 +636,7 @@ bool expr_handler::fold_assoc(alu_node *n) {
|
||||
if (v1->is_const()) {
|
||||
literal arg = v1->get_const_value();
|
||||
apply_alu_src_mod(a->bc, 1, arg);
|
||||
if (cur_neg)
|
||||
if (cur_neg && distribute_neg)
|
||||
arg.f = -arg.f;
|
||||
|
||||
if (a == n)
|
||||
@@ -660,7 +664,7 @@ bool expr_handler::fold_assoc(alu_node *n) {
|
||||
if (v0->is_const()) {
|
||||
literal arg = v0->get_const_value();
|
||||
apply_alu_src_mod(a->bc, 0, arg);
|
||||
if (cur_neg)
|
||||
if (cur_neg && distribute_neg)
|
||||
arg.f = -arg.f;
|
||||
|
||||
if (last_arg == 0) {
|
||||
|
@@ -314,7 +314,8 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
||||
}
|
||||
}
|
||||
else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
|
||||
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
|
||||
!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
|
||||
PIPE_TRANSFER_PERSISTENT)) &&
|
||||
!(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
|
||||
r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
|
||||
assert(usage & PIPE_TRANSFER_WRITE);
|
||||
@@ -341,7 +342,8 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
||||
}
|
||||
/* Using a staging buffer in GTT for larger reads is much faster. */
|
||||
else if ((usage & PIPE_TRANSFER_READ) &&
|
||||
!(usage & PIPE_TRANSFER_WRITE) &&
|
||||
!(usage & (PIPE_TRANSFER_WRITE |
|
||||
PIPE_TRANSFER_PERSISTENT)) &&
|
||||
rbuffer->domains == RADEON_DOMAIN_VRAM &&
|
||||
r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
|
||||
struct r600_resource *staging;
|
||||
|
@@ -533,8 +533,14 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
|
||||
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
|
||||
return 0;
|
||||
|
||||
/* Overalign HTILE on Stoney to fix piglit/depthstencil-render-miplevels 585. */
|
||||
if (rscreen->family == CHIP_STONEY)
|
||||
/* Overalign HTILE on P2 configs to work around GPU hangs in
|
||||
* piglit/depthstencil-render-miplevels 585.
|
||||
*
|
||||
* This has been confirmed to help Kabini & Stoney, where the hangs
|
||||
* are always reproducible. I think I have seen the test hang
|
||||
* on Carrizo too, though it was very rare there.
|
||||
*/
|
||||
if (rscreen->chip_class >= CIK && num_pipes < 4)
|
||||
num_pipes = 4;
|
||||
|
||||
switch (num_pipes) {
|
||||
|
@@ -237,6 +237,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
|
||||
case PIPE_VIDEO_CAP_SUPPORTED:
|
||||
switch (codec) {
|
||||
case PIPE_VIDEO_FORMAT_MPEG12:
|
||||
return profile != PIPE_VIDEO_PROFILE_MPEG1;
|
||||
case PIPE_VIDEO_FORMAT_MPEG4:
|
||||
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
|
||||
if (rscreen->family < CHIP_PALM)
|
||||
@@ -257,7 +258,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
|
||||
case PIPE_VIDEO_CAP_MAX_WIDTH:
|
||||
return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
|
||||
case PIPE_VIDEO_CAP_MAX_HEIGHT:
|
||||
return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
|
||||
return (rscreen->family < CHIP_TONGA) ? 1152 : 4096;
|
||||
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
|
||||
return PIPE_FORMAT_NV12;
|
||||
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
|
||||
|
@@ -303,6 +303,7 @@ static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
|
||||
*/
|
||||
if (samplers->views.views[i] &&
|
||||
samplers->views.views[i]->texture &&
|
||||
samplers->views.views[i]->texture->target != PIPE_BUFFER &&
|
||||
((struct r600_texture*)samplers->views.views[i]->texture)->fmask.size)
|
||||
continue;
|
||||
|
||||
|
@@ -2209,6 +2209,7 @@ img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
|
||||
const float t[TGSI_QUAD_SIZE],
|
||||
const float p[TGSI_QUAD_SIZE],
|
||||
const uint faces[TGSI_QUAD_SIZE],
|
||||
const int8_t *offset,
|
||||
unsigned level,
|
||||
const float dudx, const float dvdx,
|
||||
const float dudy, const float dvdy,
|
||||
@@ -2268,6 +2269,8 @@ img_filter_2d_ewa(const struct sp_sampler_view *sp_sview,
|
||||
/* F *= formScale; */ /* no need to scale F as we don't use it below here */
|
||||
|
||||
args.level = level;
|
||||
args.offset = offset;
|
||||
|
||||
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
|
||||
/* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
|
||||
* and incrementally update the value of Ax^2+Bxy*Cy^2; when this
|
||||
@@ -2431,6 +2434,8 @@ mip_filter_linear_aniso(const struct sp_sampler_view *sp_sview,
|
||||
const float dvdy = (t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
|
||||
struct img_filter_args args;
|
||||
|
||||
args.offset = filt_args->offset;
|
||||
|
||||
if (filt_args->control == TGSI_SAMPLER_LOD_BIAS ||
|
||||
filt_args->control == TGSI_SAMPLER_LOD_NONE ||
|
||||
/* XXX FIXME */
|
||||
@@ -2503,8 +2508,8 @@ mip_filter_linear_aniso(const struct sp_sampler_view *sp_sview,
|
||||
* seem to be worth the extra running time.
|
||||
*/
|
||||
img_filter_2d_ewa(sp_sview, sp_samp, min_filter, mag_filter,
|
||||
s, t, p, filt_args->faces, level0,
|
||||
dudx, dvdx, dudy, dvdy, rgba);
|
||||
s, t, p, filt_args->faces, filt_args->offset,
|
||||
level0, dudx, dvdx, dudy, dvdy, rgba);
|
||||
}
|
||||
|
||||
if (DEBUG_TEX) {
|
||||
|
@@ -55,7 +55,7 @@ kernel::launch(command_queue &q,
|
||||
const auto reduced_grid_size =
|
||||
map(divides(), grid_size, block_size);
|
||||
void *st = exec.bind(&q, grid_offset);
|
||||
struct pipe_grid_info info;
|
||||
struct pipe_grid_info info = {};
|
||||
|
||||
// The handles are created during exec_context::bind(), so we need make
|
||||
// sure to call exec_context::bind() before retrieving them.
|
||||
|
@@ -140,7 +140,7 @@ static OMX_ERRORTYPE vid_dec_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam
|
||||
|
||||
r = omx_base_filter_Constructor(comp, name);
|
||||
if (r)
|
||||
return r;
|
||||
return r;
|
||||
|
||||
priv->profile = PIPE_VIDEO_PROFILE_UNKNOWN;
|
||||
|
||||
@@ -268,7 +268,7 @@ static OMX_ERRORTYPE vid_dec_SetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE i
|
||||
r = checkHeader(param, sizeof(OMX_PARAM_COMPONENTROLETYPE));
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
||||
if (!strcmp((char *)role->cRole, OMX_VID_DEC_MPEG2_ROLE)) {
|
||||
priv->profile = PIPE_VIDEO_PROFILE_MPEG2_MAIN;
|
||||
} else if (!strcmp((char *)role->cRole, OMX_VID_DEC_AVC_ROLE)) {
|
||||
@@ -321,7 +321,7 @@ static OMX_ERRORTYPE vid_dec_GetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE i
|
||||
strcpy((char *)role->cRole, OMX_VID_DEC_MPEG2_ROLE);
|
||||
else if (priv->profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH)
|
||||
strcpy((char *)role->cRole, OMX_VID_DEC_AVC_ROLE);
|
||||
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -419,6 +419,7 @@ static OMX_ERRORTYPE vid_dec_DecodeBuffer(omx_base_PortType *port, OMX_BUFFERHEA
|
||||
priv->in_buffers[i] = buf;
|
||||
priv->sizes[i] = buf->nFilledLen;
|
||||
priv->inputs[i] = buf->pBuffer;
|
||||
priv->timestamps[i] = buf->nTimeStamp;
|
||||
|
||||
while (priv->num_in_buffers > (!!(buf->nFlags & OMX_BUFFERFLAG_EOS) ? 0 : 1)) {
|
||||
bool eos = !!(priv->in_buffers[0]->nFlags & OMX_BUFFERFLAG_EOS);
|
||||
@@ -469,12 +470,13 @@ static OMX_ERRORTYPE vid_dec_DecodeBuffer(omx_base_PortType *port, OMX_BUFFERHEA
|
||||
priv->in_buffers[0] = priv->in_buffers[1];
|
||||
priv->sizes[0] = priv->sizes[1] - delta;
|
||||
priv->inputs[0] = priv->inputs[1] + delta;
|
||||
priv->timestamps[0] = priv->timestamps[1];
|
||||
}
|
||||
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
return OMX_ErrorNone;
|
||||
}
|
||||
|
||||
@@ -513,7 +515,7 @@ static void vid_dec_FillOutput(vid_dec_PrivateType *priv, struct pipe_video_buff
|
||||
|
||||
box.width = def->nFrameWidth / 2;
|
||||
box.height = def->nFrameHeight / 2;
|
||||
|
||||
|
||||
src = priv->pipe->transfer_map(priv->pipe, views[1]->texture, 0,
|
||||
PIPE_TRANSFER_READ, &box, &transfer);
|
||||
util_copy_rect(dst, views[1]->texture->format, def->nStride, 0, 0,
|
||||
@@ -526,9 +528,13 @@ static void vid_dec_FrameDecoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE*
|
||||
{
|
||||
vid_dec_PrivateType *priv = comp->pComponentPrivate;
|
||||
bool eos = !!(input->nFlags & OMX_BUFFERFLAG_EOS);
|
||||
OMX_TICKS timestamp;
|
||||
|
||||
if (!input->pInputPortPrivate)
|
||||
input->pInputPortPrivate = priv->Flush(priv);
|
||||
if (!input->pInputPortPrivate) {
|
||||
input->pInputPortPrivate = priv->Flush(priv, ×tamp);
|
||||
if (timestamp != OMX_VID_DEC_TIMESTAMP_INVALID)
|
||||
input->nTimeStamp = timestamp;
|
||||
}
|
||||
|
||||
if (input->pInputPortPrivate) {
|
||||
if (output->pInputPortPrivate) {
|
||||
@@ -539,6 +545,7 @@ static void vid_dec_FrameDecoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE*
|
||||
vid_dec_FillOutput(priv, input->pInputPortPrivate, output);
|
||||
}
|
||||
output->nFilledLen = output->nAllocLen;
|
||||
output->nTimeStamp = input->nTimeStamp;
|
||||
}
|
||||
|
||||
if (eos && input->pInputPortPrivate)
|
||||
|
@@ -59,6 +59,8 @@
|
||||
#define OMX_VID_DEC_AVC_NAME "OMX.mesa.video_decoder.avc"
|
||||
#define OMX_VID_DEC_AVC_ROLE "video_decoder.avc"
|
||||
|
||||
#define OMX_VID_DEC_TIMESTAMP_INVALID ((OMX_TICKS) -1)
|
||||
|
||||
struct vl_vlc;
|
||||
|
||||
DERIVEDCLASS(vid_dec_PrivateType, omx_base_filter_PrivateType)
|
||||
@@ -69,7 +71,7 @@ DERIVEDCLASS(vid_dec_PrivateType, omx_base_filter_PrivateType)
|
||||
struct pipe_video_codec *codec; \
|
||||
void (*Decode)(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left); \
|
||||
void (*EndFrame)(vid_dec_PrivateType *priv); \
|
||||
struct pipe_video_buffer *(*Flush)(vid_dec_PrivateType *priv); \
|
||||
struct pipe_video_buffer *(*Flush)(vid_dec_PrivateType *priv, OMX_TICKS *timestamp); \
|
||||
struct pipe_video_buffer *target, *shadow; \
|
||||
union { \
|
||||
struct { \
|
||||
@@ -100,6 +102,9 @@ DERIVEDCLASS(vid_dec_PrivateType, omx_base_filter_PrivateType)
|
||||
OMX_BUFFERHEADERTYPE *in_buffers[2]; \
|
||||
const void *inputs[2]; \
|
||||
unsigned sizes[2]; \
|
||||
OMX_TICKS timestamps[2]; \
|
||||
OMX_TICKS timestamp; \
|
||||
bool first_buf_in_frame; \
|
||||
bool frame_finished; \
|
||||
bool frame_started; \
|
||||
unsigned bytes_left; \
|
||||
|
@@ -45,6 +45,7 @@
|
||||
struct dpb_list {
|
||||
struct list_head list;
|
||||
struct pipe_video_buffer *buffer;
|
||||
OMX_TICKS timestamp;
|
||||
unsigned poc;
|
||||
};
|
||||
|
||||
@@ -82,7 +83,7 @@ static const uint8_t Default_8x8_Inter[64] = {
|
||||
|
||||
static void vid_dec_h264_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left);
|
||||
static void vid_dec_h264_EndFrame(vid_dec_PrivateType *priv);
|
||||
static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv);
|
||||
static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv, OMX_TICKS *timestamp);
|
||||
|
||||
void vid_dec_h264_Init(vid_dec_PrivateType *priv)
|
||||
{
|
||||
@@ -91,9 +92,10 @@ void vid_dec_h264_Init(vid_dec_PrivateType *priv)
|
||||
priv->Decode = vid_dec_h264_Decode;
|
||||
priv->EndFrame = vid_dec_h264_EndFrame;
|
||||
priv->Flush = vid_dec_h264_Flush;
|
||||
|
||||
|
||||
LIST_INITHEAD(&priv->codec_data.h264.dpb_list);
|
||||
priv->picture.h264.field_order_cnt[0] = priv->picture.h264.field_order_cnt[1] = INT_MAX;
|
||||
priv->first_buf_in_frame = true;
|
||||
}
|
||||
|
||||
static void vid_dec_h264_BeginFrame(vid_dec_PrivateType *priv)
|
||||
@@ -104,6 +106,9 @@ static void vid_dec_h264_BeginFrame(vid_dec_PrivateType *priv)
|
||||
return;
|
||||
|
||||
vid_dec_NeedTarget(priv);
|
||||
if (priv->first_buf_in_frame)
|
||||
priv->timestamp = priv->timestamps[0];
|
||||
priv->first_buf_in_frame = false;
|
||||
|
||||
priv->picture.h264.num_ref_frames = priv->picture.h264.pps->sps->max_num_ref_frames;
|
||||
|
||||
@@ -127,7 +132,8 @@ static void vid_dec_h264_BeginFrame(vid_dec_PrivateType *priv)
|
||||
priv->frame_started = true;
|
||||
}
|
||||
|
||||
static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv)
|
||||
static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv,
|
||||
OMX_TICKS *timestamp)
|
||||
{
|
||||
struct dpb_list *entry, *result = NULL;
|
||||
struct pipe_video_buffer *buf;
|
||||
@@ -146,6 +152,8 @@ static struct pipe_video_buffer *vid_dec_h264_Flush(vid_dec_PrivateType *priv)
|
||||
return NULL;
|
||||
|
||||
buf = result->buffer;
|
||||
if (timestamp)
|
||||
*timestamp = result->timestamp;
|
||||
|
||||
--priv->codec_data.h264.dpb_num;
|
||||
LIST_DEL(&result->list);
|
||||
@@ -159,6 +167,7 @@ static void vid_dec_h264_EndFrame(vid_dec_PrivateType *priv)
|
||||
struct dpb_list *entry;
|
||||
struct pipe_video_buffer *tmp;
|
||||
bool top_field_first;
|
||||
OMX_TICKS timestamp;
|
||||
|
||||
if (!priv->frame_started)
|
||||
return;
|
||||
@@ -181,7 +190,9 @@ static void vid_dec_h264_EndFrame(vid_dec_PrivateType *priv)
|
||||
if (!entry)
|
||||
return;
|
||||
|
||||
priv->first_buf_in_frame = true;
|
||||
entry->buffer = priv->target;
|
||||
entry->timestamp = priv->timestamp;
|
||||
entry->poc = MIN2(priv->picture.h264.field_order_cnt[0], priv->picture.h264.field_order_cnt[1]);
|
||||
LIST_ADDTAIL(&entry->list, &priv->codec_data.h264.dpb_list);
|
||||
++priv->codec_data.h264.dpb_num;
|
||||
@@ -192,7 +203,8 @@ static void vid_dec_h264_EndFrame(vid_dec_PrivateType *priv)
|
||||
return;
|
||||
|
||||
tmp = priv->in_buffers[0]->pInputPortPrivate;
|
||||
priv->in_buffers[0]->pInputPortPrivate = vid_dec_h264_Flush(priv);
|
||||
priv->in_buffers[0]->pInputPortPrivate = vid_dec_h264_Flush(priv, ×tamp);
|
||||
priv->in_buffers[0]->nTimeStamp = timestamp;
|
||||
priv->target = tmp;
|
||||
priv->frame_finished = priv->in_buffers[0]->pInputPortPrivate != NULL;
|
||||
}
|
||||
@@ -829,7 +841,7 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
|
||||
priv->picture.h264.field_order_cnt[0] = expectedPicOrderCnt + priv->codec_data.h264.delta_pic_order_cnt[0];
|
||||
priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt[0] +
|
||||
sps->offset_for_top_to_bottom_field + priv->codec_data.h264.delta_pic_order_cnt[1];
|
||||
|
||||
|
||||
} else if (!priv->picture.h264.bottom_field_flag)
|
||||
priv->picture.h264.field_order_cnt[0] = expectedPicOrderCnt + priv->codec_data.h264.delta_pic_order_cnt[0];
|
||||
else
|
||||
@@ -859,7 +871,7 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
|
||||
if (!priv->picture.h264.field_pic_flag) {
|
||||
priv->picture.h264.field_order_cnt[0] = tempPicOrderCnt;
|
||||
priv->picture.h264.field_order_cnt[1] = tempPicOrderCnt;
|
||||
|
||||
|
||||
} else if (!priv->picture.h264.bottom_field_flag)
|
||||
priv->picture.h264.field_order_cnt[0] = tempPicOrderCnt;
|
||||
else
|
||||
@@ -876,7 +888,7 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
|
||||
|
||||
priv->picture.h264.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
|
||||
priv->picture.h264.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
|
||||
|
||||
|
||||
if (slice_type == PIPE_H264_SLICE_TYPE_P ||
|
||||
slice_type == PIPE_H264_SLICE_TYPE_SP ||
|
||||
slice_type == PIPE_H264_SLICE_TYPE_B) {
|
||||
|
@@ -61,7 +61,7 @@ static uint8_t default_non_intra_matrix[64] = {
|
||||
|
||||
static void vid_dec_mpeg12_Decode(vid_dec_PrivateType *priv, struct vl_vlc *vlc, unsigned min_bits_left);
|
||||
static void vid_dec_mpeg12_EndFrame(vid_dec_PrivateType *priv);
|
||||
static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv);
|
||||
static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv, OMX_TICKS *timestamp);
|
||||
|
||||
void vid_dec_mpeg12_Init(vid_dec_PrivateType *priv)
|
||||
{
|
||||
@@ -131,10 +131,12 @@ static void vid_dec_mpeg12_EndFrame(vid_dec_PrivateType *priv)
|
||||
priv->in_buffers[0]->pInputPortPrivate = done;
|
||||
}
|
||||
|
||||
static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv)
|
||||
static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv, OMX_TICKS *timestamp)
|
||||
{
|
||||
struct pipe_video_buffer *result = priv->picture.mpeg12.ref[1];
|
||||
priv->picture.mpeg12.ref[1] = NULL;
|
||||
if (timestamp)
|
||||
*timestamp = OMX_VID_DEC_TIMESTAMP_INVALID;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@@ -179,7 +179,7 @@ static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam
|
||||
if (!screen->get_video_param(screen, PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH,
|
||||
PIPE_VIDEO_ENTRYPOINT_ENCODE, PIPE_VIDEO_CAP_SUPPORTED))
|
||||
return OMX_ErrorBadParameter;
|
||||
|
||||
|
||||
priv->stacked_frames_num = screen->get_video_param(screen,
|
||||
PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH,
|
||||
PIPE_VIDEO_ENTRYPOINT_ENCODE,
|
||||
@@ -242,7 +242,7 @@ static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam
|
||||
|
||||
port->Port_AllocateBuffer = vid_enc_AllocateOutBuffer;
|
||||
port->Port_FreeBuffer = vid_enc_FreeOutBuffer;
|
||||
|
||||
|
||||
priv->bitrate.eControlRate = OMX_Video_ControlRateDisable;
|
||||
priv->bitrate.nTargetBitrate = 0;
|
||||
|
||||
@@ -253,7 +253,7 @@ static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam
|
||||
priv->profile_level.eProfile = OMX_VIDEO_AVCProfileBaseline;
|
||||
priv->profile_level.eLevel = OMX_VIDEO_AVCLevel42;
|
||||
|
||||
priv->force_pic_type.IntraRefreshVOP = OMX_FALSE;
|
||||
priv->force_pic_type.IntraRefreshVOP = OMX_FALSE;
|
||||
priv->frame_num = 0;
|
||||
priv->pic_order_cnt = 0;
|
||||
priv->restricted_b_frames = debug_get_bool_option("OMX_USE_RESTRICTED_B_FRAMES", FALSE);
|
||||
@@ -380,7 +380,7 @@ static OMX_ERRORTYPE vid_enc_SetParameter(OMX_HANDLETYPE handle, OMX_INDEXTYPE i
|
||||
|
||||
port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX];
|
||||
port->sPortParam.nBufferSize = framesize * 512 / (16*16);
|
||||
|
||||
|
||||
priv->frame_rate = def->format.video.xFramerate;
|
||||
|
||||
priv->callbacks->EventHandler(comp, priv->callbackData, OMX_EventPortSettingsChanged,
|
||||
@@ -532,10 +532,10 @@ static OMX_ERRORTYPE vid_enc_SetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx,
|
||||
vid_enc_PrivateType *priv = comp->pComponentPrivate;
|
||||
OMX_ERRORTYPE r;
|
||||
int i;
|
||||
|
||||
|
||||
if (!config)
|
||||
return OMX_ErrorBadParameter;
|
||||
|
||||
|
||||
switch(idx) {
|
||||
case OMX_IndexConfigVideoIntraVOPRefresh: {
|
||||
OMX_CONFIG_INTRAREFRESHVOPTYPE *type = config;
|
||||
@@ -543,9 +543,9 @@ static OMX_ERRORTYPE vid_enc_SetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx,
|
||||
r = checkHeader(config, sizeof(OMX_CONFIG_INTRAREFRESHVOPTYPE));
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
||||
priv->force_pic_type = *type;
|
||||
|
||||
|
||||
break;
|
||||
}
|
||||
case OMX_IndexConfigCommonScale: {
|
||||
@@ -568,11 +568,11 @@ static OMX_ERRORTYPE vid_enc_SetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx,
|
||||
priv->scale = *scale;
|
||||
if (priv->scale.xWidth != 0xffffffff && priv->scale.xHeight != 0xffffffff) {
|
||||
struct pipe_video_buffer templat = {};
|
||||
|
||||
|
||||
templat.buffer_format = PIPE_FORMAT_NV12;
|
||||
templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
|
||||
templat.width = priv->scale.xWidth;
|
||||
templat.height = priv->scale.xHeight;
|
||||
templat.width = priv->scale.xWidth;
|
||||
templat.height = priv->scale.xHeight;
|
||||
templat.interlaced = false;
|
||||
for (i = 0; i < OMX_VID_ENC_NUM_SCALING_BUFFERS; ++i) {
|
||||
priv->scale_buffer[i] = priv->s_pipe->create_video_buffer(priv->s_pipe, &templat);
|
||||
@@ -615,7 +615,7 @@ static OMX_ERRORTYPE vid_enc_GetConfig(OMX_HANDLETYPE handle, OMX_INDEXTYPE idx,
|
||||
default:
|
||||
return omx_base_component_GetConfig(handle, idx, config);
|
||||
}
|
||||
|
||||
|
||||
return OMX_ErrorNone;
|
||||
}
|
||||
|
||||
@@ -1010,10 +1010,10 @@ static void enc_ControlPicture(omx_base_PortType *port, struct pipe_h264_enc_pic
|
||||
switch (priv->bitrate.eControlRate) {
|
||||
case OMX_Video_ControlRateVariable:
|
||||
rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE;
|
||||
break;
|
||||
break;
|
||||
case OMX_Video_ControlRateConstant:
|
||||
rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_CONSTANT;
|
||||
break;
|
||||
break;
|
||||
case OMX_Video_ControlRateVariableSkipFrames:
|
||||
rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_VARIABLE_SKIP;
|
||||
break;
|
||||
@@ -1023,8 +1023,8 @@ static void enc_ControlPicture(omx_base_PortType *port, struct pipe_h264_enc_pic
|
||||
default:
|
||||
rate_ctrl->rate_ctrl_method = PIPE_H264_ENC_RATE_CONTROL_METHOD_DISABLE;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
rate_ctrl->frame_rate_den = OMX_VID_ENC_CONTROL_FRAME_RATE_DEN_DEFAULT;
|
||||
rate_ctrl->frame_rate_num = ((priv->frame_rate) >> 16) * rate_ctrl->frame_rate_den;
|
||||
|
||||
@@ -1035,7 +1035,7 @@ static void enc_ControlPicture(omx_base_PortType *port, struct pipe_h264_enc_pic
|
||||
rate_ctrl->target_bitrate = priv->bitrate.nTargetBitrate;
|
||||
else
|
||||
rate_ctrl->target_bitrate = OMX_VID_ENC_BITRATE_MAX;
|
||||
rate_ctrl->peak_bitrate = rate_ctrl->target_bitrate;
|
||||
rate_ctrl->peak_bitrate = rate_ctrl->target_bitrate;
|
||||
if (rate_ctrl->target_bitrate < OMX_VID_ENC_BITRATE_MEDIAN)
|
||||
rate_ctrl->vbv_buffer_size = MIN2((rate_ctrl->target_bitrate * 2.75), OMX_VID_ENC_BITRATE_MEDIAN);
|
||||
else
|
||||
@@ -1051,7 +1051,7 @@ static void enc_ControlPicture(omx_base_PortType *port, struct pipe_h264_enc_pic
|
||||
rate_ctrl->peak_bits_picture_integer = rate_ctrl->target_bits_picture;
|
||||
rate_ctrl->peak_bits_picture_fraction = 0;
|
||||
}
|
||||
|
||||
|
||||
picture->quant_i_frames = priv->quant.nQpI;
|
||||
picture->quant_p_frames = priv->quant.nQpP;
|
||||
picture->quant_b_frames = priv->quant.nQpB;
|
||||
@@ -1069,7 +1069,7 @@ static void enc_HandleTask(omx_base_PortType *port, struct encode_task *task,
|
||||
unsigned size = priv->ports[OMX_BASE_FILTER_OUTPUTPORT_INDEX]->sPortParam.nBufferSize;
|
||||
struct pipe_video_buffer *vbuf = task->buf;
|
||||
struct pipe_h264_enc_picture_desc picture = {};
|
||||
|
||||
|
||||
/* -------------- scale input image --------- */
|
||||
enc_ScaleInput(port, &vbuf, &size);
|
||||
priv->s_pipe->flush(priv->s_pipe, NULL, 0);
|
||||
@@ -1160,7 +1160,7 @@ static OMX_ERRORTYPE vid_enc_EncodeFrame(omx_base_PortType *port, OMX_BUFFERHEAD
|
||||
priv->force_pic_type.IntraRefreshVOP) {
|
||||
enc_ClearBframes(port, inp);
|
||||
picture_type = PIPE_H264_ENC_PICTURE_TYPE_IDR;
|
||||
priv->force_pic_type.IntraRefreshVOP = OMX_FALSE;
|
||||
priv->force_pic_type.IntraRefreshVOP = OMX_FALSE;
|
||||
priv->frame_num = 0;
|
||||
} else if (priv->codec->profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE ||
|
||||
!(priv->pic_order_cnt % OMX_VID_ENC_P_PERIOD_DEFAULT) ||
|
||||
@@ -1169,7 +1169,7 @@ static OMX_ERRORTYPE vid_enc_EncodeFrame(omx_base_PortType *port, OMX_BUFFERHEAD
|
||||
} else {
|
||||
picture_type = PIPE_H264_ENC_PICTURE_TYPE_B;
|
||||
}
|
||||
|
||||
|
||||
task->pic_order_cnt = priv->pic_order_cnt++;
|
||||
|
||||
if (picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
|
||||
@@ -1245,7 +1245,7 @@ static void vid_enc_BufferEncoded(OMX_COMPONENTTYPE *comp, OMX_BUFFERHEADERTYPE*
|
||||
output->pBuffer = priv->t_pipe->transfer_map(priv->t_pipe, outp->bitstream, 0,
|
||||
PIPE_TRANSFER_READ_WRITE,
|
||||
&box, &outp->transfer);
|
||||
|
||||
|
||||
/* ------------- get size of result ----------------- */
|
||||
|
||||
priv->codec->get_feedback(priv->codec, task->feedback, &size);
|
||||
|
@@ -52,6 +52,7 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#define VMW_MAX_DEFAULT_TEXTURE_SIZE (128 * 1024 * 1024)
|
||||
#define VMW_FENCE_TIMEOUT_SECONDS 60
|
||||
|
||||
struct vmw_region
|
||||
{
|
||||
@@ -721,7 +722,7 @@ vmw_ioctl_fence_finish(struct vmw_winsys_screen *vws,
|
||||
memset(&arg, 0, sizeof(arg));
|
||||
|
||||
arg.handle = handle;
|
||||
arg.timeout_us = 10*1000000;
|
||||
arg.timeout_us = VMW_FENCE_TIMEOUT_SECONDS*1000000;
|
||||
arg.lazy = 0;
|
||||
arg.flags = vflags;
|
||||
|
||||
|
@@ -170,6 +170,8 @@ vmw_svga_winsys_surface_unmap(struct svga_winsys_context *swc,
|
||||
*rebind = vsrf->rebind;
|
||||
vsrf->rebind = FALSE;
|
||||
vmw_svga_winsys_buffer_unmap(&vsrf->screen->base, vsrf->buf);
|
||||
} else {
|
||||
*rebind = FALSE;
|
||||
}
|
||||
pipe_mutex_unlock(vsrf->mutex);
|
||||
}
|
||||
|
@@ -2441,8 +2441,10 @@ fs_visitor::opt_sampler_eot()
|
||||
* we have enough space, but it will make sure the dead code eliminator kills
|
||||
* the instruction that this will replace.
|
||||
*/
|
||||
if (tex_inst->header_size != 0)
|
||||
if (tex_inst->header_size != 0) {
|
||||
invalidate_live_intervals();
|
||||
return true;
|
||||
}
|
||||
|
||||
fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F,
|
||||
load_payload->sources + 1);
|
||||
@@ -2473,6 +2475,7 @@ fs_visitor::opt_sampler_eot()
|
||||
tex_inst->insert_before(cfg->blocks[cfg->num_blocks - 1], new_load_payload);
|
||||
tex_inst->src[0] = send_header;
|
||||
|
||||
invalidate_live_intervals();
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -5187,12 +5190,18 @@ fs_visitor::optimize()
|
||||
void
|
||||
fs_visitor::fixup_3src_null_dest()
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
|
||||
if (inst->is_3src() && inst->dst.is_null()) {
|
||||
inst->dst = fs_reg(VGRF, alloc.allocate(dispatch_width / 8),
|
||||
inst->dst.type);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (progress)
|
||||
invalidate_live_intervals();
|
||||
}
|
||||
|
||||
void
|
||||
@@ -5228,7 +5237,7 @@ fs_visitor::allocate_registers()
|
||||
* SIMD8. There's probably actually some intermediate point where
|
||||
* SIMD16 with a couple of spills is still better.
|
||||
*/
|
||||
if (dispatch_width == 16) {
|
||||
if (dispatch_width == 16 && min_dispatch_width <= 8) {
|
||||
fail("Failure to register allocate. Reduce number of "
|
||||
"live scalar values to avoid this.");
|
||||
} else {
|
||||
@@ -5470,6 +5479,13 @@ fs_visitor::run_cs()
|
||||
if (shader_time_index >= 0)
|
||||
emit_shader_time_begin();
|
||||
|
||||
if (devinfo->is_haswell && prog_data->total_shared > 0) {
|
||||
/* Move SLM index from g0.0[27:24] to sr0.1[11:8] */
|
||||
const fs_builder abld = bld.exec_all().group(1, 0);
|
||||
abld.MOV(retype(suboffset(brw_sr0_reg(), 1), BRW_REGISTER_TYPE_UW),
|
||||
suboffset(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW), 1));
|
||||
}
|
||||
|
||||
emit_nir_code();
|
||||
|
||||
if (failed)
|
||||
|
@@ -407,6 +407,7 @@ public:
|
||||
bool spilled_any_registers;
|
||||
|
||||
const unsigned dispatch_width; /**< 8 or 16 */
|
||||
unsigned min_dispatch_width;
|
||||
|
||||
int shader_time_index;
|
||||
|
||||
|
@@ -1021,6 +1021,18 @@ fs_visitor::init()
|
||||
unreachable("unhandled shader stage");
|
||||
}
|
||||
|
||||
if (stage == MESA_SHADER_COMPUTE) {
|
||||
const brw_cs_prog_data *cs_prog_data =
|
||||
(const brw_cs_prog_data *) prog_data;
|
||||
unsigned size = cs_prog_data->local_size[0] *
|
||||
cs_prog_data->local_size[1] *
|
||||
cs_prog_data->local_size[2];
|
||||
size = DIV_ROUND_UP(size, devinfo->max_cs_threads);
|
||||
min_dispatch_width = size > 16 ? 32 : (size > 8 ? 16 : 8);
|
||||
} else {
|
||||
min_dispatch_width = 8;
|
||||
}
|
||||
|
||||
this->prog_data = this->stage_prog_data;
|
||||
|
||||
this->failed = false;
|
||||
|
@@ -338,8 +338,6 @@ brw_emit_mi_flush(struct brw_context *brw)
|
||||
}
|
||||
brw_emit_pipe_control_flush(brw, flags);
|
||||
}
|
||||
|
||||
brw_render_cache_set_clear(brw);
|
||||
}
|
||||
|
||||
int
|
||||
|
@@ -736,6 +736,22 @@ brw_notification_reg(void)
|
||||
WRITEMASK_X);
|
||||
}
|
||||
|
||||
static inline struct brw_reg
|
||||
brw_sr0_reg(void)
|
||||
{
|
||||
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
|
||||
BRW_ARF_STATE,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
BRW_REGISTER_TYPE_UD,
|
||||
BRW_VERTICAL_STRIDE_8,
|
||||
BRW_WIDTH_8,
|
||||
BRW_HORIZONTAL_STRIDE_1,
|
||||
BRW_SWIZZLE_XYZW,
|
||||
WRITEMASK_XYZW);
|
||||
}
|
||||
|
||||
static inline struct brw_reg
|
||||
brw_acc_reg(unsigned width)
|
||||
{
|
||||
|
@@ -1033,6 +1033,7 @@ vec4_visitor::opt_register_coalesce()
|
||||
|
||||
if (is_nop_mov) {
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@@ -685,9 +685,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
case nir_intrinsic_load_instance_id:
|
||||
case nir_intrinsic_load_base_instance:
|
||||
case nir_intrinsic_load_draw_id:
|
||||
case nir_intrinsic_load_invocation_id:
|
||||
case nir_intrinsic_load_tess_level_inner:
|
||||
case nir_intrinsic_load_tess_level_outer: {
|
||||
case nir_intrinsic_load_invocation_id: {
|
||||
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
|
||||
src_reg val = src_reg(nir_system_values[sv]);
|
||||
assert(val.file != BAD_FILE);
|
||||
|
@@ -402,6 +402,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
}
|
||||
} else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
|
||||
dst.type = BRW_REGISTER_TYPE_F;
|
||||
unsigned swiz = BRW_SWIZZLE_WZYX;
|
||||
|
||||
/* This is a read of gl_TessLevelOuter[], which lives in the
|
||||
* high 4 DWords of the Patch URB header, in reverse order.
|
||||
@@ -414,6 +415,8 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
dst.writemask = WRITEMASK_XYZ;
|
||||
break;
|
||||
case GL_ISOLINES:
|
||||
/* Isolines are not reversed; swizzle .zw -> .xy */
|
||||
swiz = BRW_SWIZZLE_ZWZW;
|
||||
dst.writemask = WRITEMASK_XY;
|
||||
return;
|
||||
default:
|
||||
@@ -422,7 +425,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
|
||||
dst_reg tmp(this, glsl_type::vec4_type);
|
||||
emit_output_urb_read(tmp, 1, src_reg());
|
||||
emit(MOV(dst, swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
|
||||
emit(MOV(dst, swizzle(src_reg(tmp), swiz)));
|
||||
} else {
|
||||
emit_output_urb_read(dst, imm_offset, indirect_offset);
|
||||
}
|
||||
@@ -475,8 +478,15 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
* Patch URB Header at DWords 4-7. However, it's reversed, so
|
||||
* instead of .xyzw we have .wzyx.
|
||||
*/
|
||||
swiz = BRW_SWIZZLE_WZYX;
|
||||
mask = writemask_for_backwards_vector(mask);
|
||||
if (key->tes_primitive_mode == GL_ISOLINES) {
|
||||
/* Isolines .xy should be stored in .zw, in order. */
|
||||
swiz = BRW_SWIZZLE4(0, 0, 0, 1);
|
||||
mask <<= 2;
|
||||
} else {
|
||||
/* Other domains are reversed; store .wzyx instead of .xyzw. */
|
||||
swiz = BRW_SWIZZLE_WZYX;
|
||||
mask = writemask_for_backwards_vector(mask);
|
||||
}
|
||||
}
|
||||
|
||||
emit_urb_write(swizzle(value, swiz), mask,
|
||||
|
@@ -28,6 +28,7 @@
|
||||
*/
|
||||
|
||||
#include "brw_vec4_tes.h"
|
||||
#include "brw_cfg.h"
|
||||
|
||||
namespace brw {
|
||||
|
||||
@@ -53,39 +54,10 @@ vec4_tes_visitor::make_reg_for_system_value(int location, const glsl_type *type)
|
||||
void
|
||||
vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
|
||||
{
|
||||
const struct brw_tes_prog_data *tes_prog_data =
|
||||
(const struct brw_tes_prog_data *) prog_data;
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_tess_level_outer: {
|
||||
dst_reg dst(this, glsl_type::vec4_type);
|
||||
nir_system_values[SYSTEM_VALUE_TESS_LEVEL_OUTER] = dst;
|
||||
|
||||
dst_reg temp(this, glsl_type::vec4_type);
|
||||
vec4_instruction *read =
|
||||
emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
|
||||
read->offset = 1;
|
||||
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
|
||||
emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
|
||||
case nir_intrinsic_load_tess_level_outer:
|
||||
case nir_intrinsic_load_tess_level_inner:
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_tess_level_inner: {
|
||||
dst_reg dst(this, glsl_type::vec2_type);
|
||||
nir_system_values[SYSTEM_VALUE_TESS_LEVEL_INNER] = dst;
|
||||
|
||||
/* Set up the message header to reference the proper parts of the URB */
|
||||
dst_reg temp(this, glsl_type::vec4_type);
|
||||
vec4_instruction *read =
|
||||
emit(VEC4_OPCODE_URB_READ, temp, input_read_header);
|
||||
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
|
||||
if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
|
||||
emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX)));
|
||||
} else {
|
||||
read->offset = 1;
|
||||
emit(MOV(dst, src_reg(temp)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
vec4_visitor::nir_setup_system_value_intrinsic(instr);
|
||||
}
|
||||
@@ -105,6 +77,25 @@ vec4_tes_visitor::setup_payload()
|
||||
|
||||
reg = setup_uniforms(reg);
|
||||
|
||||
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (inst->src[i].file != ATTR)
|
||||
continue;
|
||||
|
||||
struct brw_reg grf =
|
||||
brw_vec4_grf(reg + inst->src[i].nr / 2, 4 * (inst->src[i].nr % 2));
|
||||
grf = stride(grf, 0, 4, 1);
|
||||
grf.swizzle = inst->src[i].swizzle;
|
||||
grf.type = inst->src[i].type;
|
||||
grf.abs = inst->src[i].abs;
|
||||
grf.negate = inst->src[i].negate;
|
||||
|
||||
inst->src[i] = grf;
|
||||
}
|
||||
}
|
||||
|
||||
reg += 8 * prog_data->urb_read_length;
|
||||
|
||||
this->first_non_payload_grf = reg;
|
||||
}
|
||||
|
||||
@@ -148,12 +139,36 @@ vec4_tes_visitor::emit_urb_write_opcode(bool complete)
|
||||
void
|
||||
vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
{
|
||||
const struct brw_tes_prog_data *tes_prog_data =
|
||||
(const struct brw_tes_prog_data *) prog_data;
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_tess_coord:
|
||||
/* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
|
||||
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
|
||||
src_reg(brw_vec8_grf(1, 0))));
|
||||
break;
|
||||
case nir_intrinsic_load_tess_level_outer:
|
||||
if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
|
||||
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
|
||||
swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
|
||||
BRW_SWIZZLE_ZWZW)));
|
||||
} else {
|
||||
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
|
||||
swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
|
||||
BRW_SWIZZLE_WZYX)));
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_tess_level_inner:
|
||||
if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
|
||||
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
|
||||
swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
|
||||
BRW_SWIZZLE_WZYX)));
|
||||
} else {
|
||||
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
|
||||
src_reg(ATTR, 1, glsl_type::float_type)));
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
emit(TES_OPCODE_GET_PRIMITIVE_ID,
|
||||
get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
|
||||
@@ -169,6 +184,19 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
header = src_reg(this, glsl_type::uvec4_type);
|
||||
emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
|
||||
input_read_header, indirect_offset);
|
||||
} else {
|
||||
/* Arbitrarily only push up to 24 vec4 slots worth of data,
|
||||
* which is 12 registers (since each holds 2 vec4 slots).
|
||||
*/
|
||||
const unsigned max_push_slots = 24;
|
||||
if (imm_offset < max_push_slots) {
|
||||
emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D),
|
||||
src_reg(ATTR, imm_offset, glsl_type::ivec4_type)));
|
||||
prog_data->urb_read_length =
|
||||
MAX2(prog_data->urb_read_length,
|
||||
DIV_ROUND_UP(imm_offset + 1, 2));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
dst_reg temp(this, glsl_type::ivec4_type);
|
||||
|
@@ -140,9 +140,9 @@ copy_image_with_memcpy(struct brw_context *brw,
|
||||
_mesa_get_format_block_size(src_mt->format, &src_bw, &src_bh);
|
||||
|
||||
assert(src_width % src_bw == 0);
|
||||
assert(src_height % src_bw == 0);
|
||||
assert(src_height % src_bh == 0);
|
||||
assert(src_x % src_bw == 0);
|
||||
assert(src_y % src_bw == 0);
|
||||
assert(src_y % src_bh == 0);
|
||||
|
||||
/* If we are on the same miptree, same level, and same slice, then
|
||||
* intel_miptree_map won't let us map it twice. We have to do things a
|
||||
@@ -153,7 +153,7 @@ copy_image_with_memcpy(struct brw_context *brw,
|
||||
|
||||
if (same_slice) {
|
||||
assert(dst_x % src_bw == 0);
|
||||
assert(dst_y % src_bw == 0);
|
||||
assert(dst_y % src_bh == 0);
|
||||
|
||||
map_x1 = MIN2(src_x, dst_x);
|
||||
map_y1 = MIN2(src_y, dst_y);
|
||||
|
@@ -1065,7 +1065,28 @@ brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo)
|
||||
if (!_mesa_set_search(brw->render_cache, bo))
|
||||
return;
|
||||
|
||||
brw_emit_mi_flush(brw);
|
||||
if (brw->gen >= 6) {
|
||||
if (brw->gen == 6) {
|
||||
/* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
|
||||
* Flush Enable = 1, a PIPE_CONTROL with any non-zero
|
||||
* post-sync-op is required.
|
||||
*/
|
||||
brw_emit_post_sync_nonzero_flush(brw);
|
||||
}
|
||||
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_CONST_CACHE_INVALIDATE);
|
||||
} else {
|
||||
brw_emit_mi_flush(brw);
|
||||
}
|
||||
|
||||
brw_render_cache_set_clear(brw);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -50,7 +50,7 @@ intel_miptree_create_for_teximage(struct brw_context *brw,
|
||||
width <<= 1;
|
||||
if (height != 1)
|
||||
height <<= 1;
|
||||
if (depth != 1)
|
||||
if (intelObj->base.Target == GL_TEXTURE_3D)
|
||||
depth <<= 1;
|
||||
}
|
||||
|
||||
|
@@ -387,6 +387,7 @@ st_update_renderbuffer_surface(struct st_context *st,
|
||||
{
|
||||
struct pipe_context *pipe = st->pipe;
|
||||
struct pipe_resource *resource = strb->texture;
|
||||
struct st_texture_object *stTexObj = NULL;
|
||||
unsigned rtt_width = strb->Base.Width;
|
||||
unsigned rtt_height = strb->Base.Height;
|
||||
unsigned rtt_depth = strb->Base.Depth;
|
||||
@@ -398,9 +399,18 @@ st_update_renderbuffer_surface(struct st_context *st,
|
||||
*/
|
||||
boolean enable_srgb = (st->ctx->Color.sRGBEnabled &&
|
||||
_mesa_get_format_color_encoding(strb->Base.Format) == GL_SRGB);
|
||||
enum pipe_format format = (enable_srgb) ?
|
||||
util_format_srgb(resource->format) :
|
||||
util_format_linear(resource->format);
|
||||
enum pipe_format format = resource->format;
|
||||
|
||||
if (strb->is_rtt) {
|
||||
stTexObj = st_texture_object(strb->Base.TexImage->TexObject);
|
||||
if (stTexObj->surface_based)
|
||||
format = stTexObj->surface_format;
|
||||
}
|
||||
|
||||
format = (enable_srgb) ?
|
||||
util_format_srgb(format) :
|
||||
util_format_linear(format);
|
||||
|
||||
unsigned first_layer, last_layer, level;
|
||||
|
||||
if (resource->target == PIPE_TEXTURE_1D_ARRAY) {
|
||||
@@ -431,8 +441,8 @@ st_update_renderbuffer_surface(struct st_context *st,
|
||||
|
||||
/* Adjust for texture views */
|
||||
if (strb->is_rtt && resource->array_size > 1 &&
|
||||
strb->Base.TexImage->TexObject->Immutable) {
|
||||
struct gl_texture_object *tex = strb->Base.TexImage->TexObject;
|
||||
stTexObj->base.Immutable) {
|
||||
struct gl_texture_object *tex = &stTexObj->base;
|
||||
first_layer += tex->MinLayer;
|
||||
if (!strb->rtt_layered)
|
||||
last_layer += tex->MinLayer;
|
||||
@@ -492,8 +502,6 @@ st_render_texture(struct gl_context *ctx,
|
||||
|
||||
st_update_renderbuffer_surface(st, strb);
|
||||
|
||||
strb->Base.Format = st_pipe_format_to_mesa_format(pt->format);
|
||||
|
||||
/* Invalidate buffer state so that the pipe's framebuffer state
|
||||
* gets updated.
|
||||
* That's where the new renderbuffer (which we just created) gets
|
||||
|
@@ -2886,10 +2886,13 @@ st_finalize_texture(struct gl_context *ctx,
|
||||
/* Need to import images in main memory or held in other textures.
|
||||
*/
|
||||
if (stImage && stObj->pt != stImage->pt) {
|
||||
GLuint depth = stObj->depth0;
|
||||
if (stObj->base.Target == GL_TEXTURE_3D)
|
||||
depth = u_minify(depth, level);
|
||||
if (level == 0 ||
|
||||
(stImage->base.Width == u_minify(stObj->width0, level) &&
|
||||
stImage->base.Height == u_minify(stObj->height0, level) &&
|
||||
stImage->base.Depth == u_minify(stObj->depth0, level))) {
|
||||
stImage->base.Depth == depth)) {
|
||||
/* src image fits expected dest mipmap level size */
|
||||
copy_image_data_to_texture(st, stObj, level, stImage);
|
||||
}
|
||||
|
@@ -900,11 +900,16 @@ void
|
||||
_swrast_render_finish( struct gl_context *ctx )
|
||||
{
|
||||
SWcontext *swrast = SWRAST_CONTEXT(ctx);
|
||||
struct gl_query_object *query = ctx->Query.CurrentOcclusionObject;
|
||||
|
||||
_swrast_flush(ctx);
|
||||
|
||||
if (swrast->Driver.SpanRenderFinish)
|
||||
swrast->Driver.SpanRenderFinish( ctx );
|
||||
|
||||
if (query && (query->Target == GL_ANY_SAMPLES_PASSED ||
|
||||
query->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE))
|
||||
query->Result = !!query->Result;
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user