Switch between memcpy implementations according to src/dest alignment.

Debug off
Quieten depend
2006-02-01 18:42:16 +00:00 · 2006-02-01 18:30:16 +00:00 · 2006-02-01 18:29:56 +00:00 · 2006-02-01 18:29:25 +00:00 · 2006-02-01 18:14:09 +00:00 · 2006-02-01 18:13:23 +00:00
50 changed files with 5662 additions and 2642 deletions
--- a/configs/linux-dri
+++ b/configs/linux-dri
@@ -9,7 +9,7 @@ CONFIG_NAME = linux-dri
 CC = gcc
 CXX = g++

-MKDEP = /usr/X11R6/bin/makedepend
+#MKDEP = /usr/X11R6/bin/makedepend
 #MKDEP = gcc -M
 #MKDEP_OPTIONS = -MF depend

@@ -63,5 +63,7 @@ WINDOW_SYSTEM=dri

 # gamma are missing because they have not been converted to use the new
 # interface.
-DRI_DIRS = i810 i830 i915 mach64 mga r128 r200 r300 radeon s3v \
+DRI_DIRS = i810 i915 mach64 mga r128 r200 r300 radeon s3v \
 	savage sis tdfx trident unichrome ffb
+
+DRI_DIRS = i915 
--- a/progs/demos/Makefile
+++ b/progs/demos/Makefile
@@ -25,6 +25,7 @@ PROGS = \
 	fplight \
 	gamma \
 	gears \
+	gearbox \
 	geartrain \
 	glinfo \
 	gloss \
@@ -73,9 +74,8 @@ PROGS = \

 ##### TARGETS #####

-default: $(PROGS)
+default: readtex.o $(PROGS)

-$(PROGS): readtex.o

 readtex.c: $(TOP)/progs/util/readtex.c
 	cp $< .
--- a/progs/demos/gearbox.c
+++ b/progs/demos/gearbox.c
@@ -0,0 +1,486 @@
+/*
+ * Use glCopyTexSubImage2D to draw animated gears on the sides of a box.
+ *
+ * Brian Paul
+ * 27 January 2006
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <GL/glut.h>
+
+#ifndef M_PI
+#define M_PI 3.14159265
+#endif
+
+static GLint WinWidth = 800, WinHeight = 500;
+static GLint TexWidth, TexHeight;
+static GLuint TexObj = 1;
+static GLenum IntFormat = GL_RGBA;
+
+static GLboolean WireFrame = GL_FALSE;
+
+static GLint T0 = 0;
+static GLint Frames = 0;
+static GLint Win = 0;
+
+static GLfloat ViewRotX = 20.0, ViewRotY = 30.0, ViewRotZ = 0.0;
+static GLint Gear1, Gear2, Gear3;
+static GLfloat GearRot = 0.0;
+static GLfloat CubeRot = 0.0;
+
+
+/**
+  Draw a gear wheel.  You'll probably want to call this function when
+  building a display list since we do a lot of trig here.
+ 
+  Input:  inner_radius - radius of hole at center
+          outer_radius - radius at center of teeth
+          width - width of gear
+          teeth - number of teeth
+          tooth_depth - depth of tooth
+ **/
+static void
+gear(GLfloat inner_radius, GLfloat outer_radius, GLfloat width,
+     GLint teeth, GLfloat tooth_depth)
+{
+  GLint i;
+  GLfloat r0, r1, r2;
+  GLfloat angle, da;
+  GLfloat u, v, len;
+
+  r0 = inner_radius;
+  r1 = outer_radius - tooth_depth / 2.0;
+  r2 = outer_radius + tooth_depth / 2.0;
+
+  da = 2.0 * M_PI / teeth / 4.0;
+
+  glShadeModel(GL_FLAT);
+
+  glNormal3f(0.0, 0.0, 1.0);
+
+  /* draw front face */
+  glBegin(GL_QUAD_STRIP);
+  for (i = 0; i <= teeth; i++) {
+    angle = i * 2.0 * M_PI / teeth;
+    glVertex3f(r0 * cos(angle), r0 * sin(angle), width * 0.5);
+    glVertex3f(r1 * cos(angle), r1 * sin(angle), width * 0.5);
+    if (i < teeth) {
+      glVertex3f(r0 * cos(angle), r0 * sin(angle), width * 0.5);
+      glVertex3f(r1 * cos(angle + 3 * da), r1 * sin(angle + 3 * da), width * 0.5);
+    }
+  }
+  glEnd();
+
+  /* draw front sides of teeth */
+  glBegin(GL_QUADS);
+  da = 2.0 * M_PI / teeth / 4.0;
+  for (i = 0; i < teeth; i++) {
+    angle = i * 2.0 * M_PI / teeth;
+
+    glVertex3f(r1 * cos(angle), r1 * sin(angle), width * 0.5);
+    glVertex3f(r2 * cos(angle + da), r2 * sin(angle + da), width * 0.5);
+    glVertex3f(r2 * cos(angle + 2 * da), r2 * sin(angle + 2 * da), width * 0.5);
+    glVertex3f(r1 * cos(angle + 3 * da), r1 * sin(angle + 3 * da), width * 0.5);
+  }
+  glEnd();
+
+  glNormal3f(0.0, 0.0, -1.0);
+
+  /* draw back face */
+  glBegin(GL_QUAD_STRIP);
+  for (i = 0; i <= teeth; i++) {
+    angle = i * 2.0 * M_PI / teeth;
+    glVertex3f(r1 * cos(angle), r1 * sin(angle), -width * 0.5);
+    glVertex3f(r0 * cos(angle), r0 * sin(angle), -width * 0.5);
+    if (i < teeth) {
+      glVertex3f(r1 * cos(angle + 3 * da), r1 * sin(angle + 3 * da), -width * 0.5);
+      glVertex3f(r0 * cos(angle), r0 * sin(angle), -width * 0.5);
+    }
+  }
+  glEnd();
+
+  /* draw back sides of teeth */
+  glBegin(GL_QUADS);
+  da = 2.0 * M_PI / teeth / 4.0;
+  for (i = 0; i < teeth; i++) {
+    angle = i * 2.0 * M_PI / teeth;
+
+    glVertex3f(r1 * cos(angle + 3 * da), r1 * sin(angle + 3 * da), -width * 0.5);
+    glVertex3f(r2 * cos(angle + 2 * da), r2 * sin(angle + 2 * da), -width * 0.5);
+    glVertex3f(r2 * cos(angle + da), r2 * sin(angle + da), -width * 0.5);
+    glVertex3f(r1 * cos(angle), r1 * sin(angle), -width * 0.5);
+  }
+  glEnd();
+
+  /* draw outward faces of teeth */
+  glBegin(GL_QUAD_STRIP);
+  for (i = 0; i < teeth; i++) {
+    angle = i * 2.0 * M_PI / teeth;
+
+    glVertex3f(r1 * cos(angle), r1 * sin(angle), width * 0.5);
+    glVertex3f(r1 * cos(angle), r1 * sin(angle), -width * 0.5);
+    u = r2 * cos(angle + da) - r1 * cos(angle);
+    v = r2 * sin(angle + da) - r1 * sin(angle);
+    len = sqrt(u * u + v * v);
+    u /= len;
+    v /= len;
+    glNormal3f(v, -u, 0.0);
+    glVertex3f(r2 * cos(angle + da), r2 * sin(angle + da), width * 0.5);
+    glVertex3f(r2 * cos(angle + da), r2 * sin(angle + da), -width * 0.5);
+    glNormal3f(cos(angle), sin(angle), 0.0);
+    glVertex3f(r2 * cos(angle + 2 * da), r2 * sin(angle + 2 * da), width * 0.5);
+    glVertex3f(r2 * cos(angle + 2 * da), r2 * sin(angle + 2 * da), -width * 0.5);
+    u = r1 * cos(angle + 3 * da) - r2 * cos(angle + 2 * da);
+    v = r1 * sin(angle + 3 * da) - r2 * sin(angle + 2 * da);
+    glNormal3f(v, -u, 0.0);
+    glVertex3f(r1 * cos(angle + 3 * da), r1 * sin(angle + 3 * da), width * 0.5);
+    glVertex3f(r1 * cos(angle + 3 * da), r1 * sin(angle + 3 * da), -width * 0.5);
+    glNormal3f(cos(angle), sin(angle), 0.0);
+  }
+
+  glVertex3f(r1 * cos(0), r1 * sin(0), width * 0.5);
+  glVertex3f(r1 * cos(0), r1 * sin(0), -width * 0.5);
+
+  glEnd();
+
+  glShadeModel(GL_SMOOTH);
+
+  /* draw inside radius cylinder */
+  glBegin(GL_QUAD_STRIP);
+  for (i = 0; i <= teeth; i++) {
+    angle = i * 2.0 * M_PI / teeth;
+    glNormal3f(-cos(angle), -sin(angle), 0.0);
+    glVertex3f(r0 * cos(angle), r0 * sin(angle), -width * 0.5);
+    glVertex3f(r0 * cos(angle), r0 * sin(angle), width * 0.5);
+  }
+  glEnd();
+
+}
+
+static void
+cleanup(void)
+{
+   glDeleteTextures(1, &TexObj);
+   glDeleteLists(Gear1, 1);
+   glDeleteLists(Gear2, 1);
+   glDeleteLists(Gear3, 1);
+   glutDestroyWindow(Win);
+}
+
+
+static void
+DrawGears(void)
+{
+   if (WireFrame) {
+      glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
+   }
+
+   glPushMatrix();
+      glRotatef(20/*ViewRotX*/, 1.0, 0.0, 0.0);
+      glRotatef(ViewRotY, 0.0, 1.0, 0.0);
+      glRotatef(ViewRotZ, 0.0, 0.0, 1.0);
+
+      glPushMatrix();
+         glTranslatef(-3.0, -2.0, 0.0);
+         glRotatef(GearRot, 0.0, 0.0, 1.0);
+         glCallList(Gear1);
+      glPopMatrix();
+
+      glPushMatrix();
+         glTranslatef(3.1, -2.0, 0.0);
+         glRotatef(-2.0 * GearRot - 9.0, 0.0, 0.0, 1.0);
+         glCallList(Gear2);
+      glPopMatrix();
+
+      glPushMatrix();
+         glTranslatef(-3.1, 4.2, 0.0);
+         glRotatef(-2.0 * GearRot - 25.0, 0.0, 0.0, 1.0);
+         glCallList(Gear3);
+      glPopMatrix();
+
+  glPopMatrix();
+
+  glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+}
+
+
+static void
+DrawCube(void)
+{
+   static const GLfloat texcoords[4][2] = {
+      { 0, 0 }, { 1, 0 }, { 1, 1 }, { 0, 1 }
+   };
+   static const GLfloat vertices[4][2] = {
+      { -1, -1 }, { 1, -1 }, { 1, 1 }, { -1, 1 }
+   };
+   static const GLfloat xforms[6][4] = {
+      {   0, 0, 1, 0 },
+      {  90, 0, 1, 0 },
+      { 180, 0, 1, 0 },
+      { 270, 0, 1, 0 },
+      {  90, 1, 0, 0 },
+      { -90, 1, 0, 0 }
+   };
+   static const GLfloat mat[4] = { 1.0, 1.0, 0.5, 1.0 };
+   GLint i, j;
+
+   glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE, mat);
+   glEnable(GL_TEXTURE_2D);
+
+   glPushMatrix();
+      glRotatef(ViewRotX, 1.0, 0.0, 0.0);
+      glRotatef(15, 1, 0, 0);
+      glRotatef(CubeRot, 0, 1, 0);
+      glScalef(4, 4, 4);
+
+      for (i = 0; i < 6; i++) {
+         glPushMatrix();
+            glRotatef(xforms[i][0], xforms[i][1], xforms[i][2], xforms[i][3]);
+            glTranslatef(0, 0, 1.1);
+            glBegin(GL_POLYGON);
+               glNormal3f(0, 0, 1);
+               for (j = 0; j < 4; j++) {
+                  glTexCoord2fv(texcoords[j]);
+                  glVertex2fv(vertices[j]);
+               }
+            glEnd();
+         glPopMatrix();
+      }
+   glPopMatrix();
+
+   glDisable(GL_TEXTURE_2D);
+}
+
+
+static void
+draw(void)
+{
+   float ar;
+
+   glMatrixMode(GL_MODELVIEW);
+   glLoadIdentity();
+   glTranslatef(0.0, 0.0, -40.0);
+
+   glDisable(GL_SCISSOR_TEST);
+   glClear(GL_DEPTH_BUFFER_BIT);
+   glEnable(GL_SCISSOR_TEST);
+
+   /* draw gears */
+   glViewport(0, 0, TexWidth, TexHeight);
+   glScissor(0, 0, TexWidth, TexHeight);
+   glClearColor(0.5, 0.5, 0.8, 0.0);
+   glClearColor(1, 1, 1, 0);
+   glClear(GL_COLOR_BUFFER_BIT);
+
+   glMatrixMode(GL_PROJECTION);
+   glLoadIdentity();
+   glFrustum(-1.0, 1.0, -1.0, 1.0, 5.0, 60.0);
+   glMatrixMode(GL_MODELVIEW);
+
+   DrawGears();
+
+   glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, TexWidth, TexHeight);
+   
+   /* draw textured cube */
+   glViewport(TexWidth, 0, WinWidth - TexWidth, WinHeight);
+   glScissor(TexWidth, 0, WinWidth - TexWidth, WinHeight);
+   glClearColor(0.5, 0.5, 0.8, 0.0);
+   glClear(GL_COLOR_BUFFER_BIT);
+
+   ar = (float) (WinWidth - TexWidth) / WinHeight;
+   glMatrixMode(GL_PROJECTION);
+   glLoadIdentity();
+   glFrustum(-ar, ar, -1.0, 1.0, 5.0, 60.0);
+   glMatrixMode(GL_MODELVIEW);
+
+   DrawCube();
+
+   /* finish up */
+   glutSwapBuffers();
+
+   Frames++;
+   {
+      GLint t = glutGet(GLUT_ELAPSED_TIME);
+      if (t - T0 >= 5000) {
+         GLfloat seconds = (t - T0) / 1000.0;
+         GLfloat fps = Frames / seconds;
+         printf("%d frames in %6.3f seconds = %6.3f FPS\n", Frames, seconds, fps);
+         T0 = t;
+         Frames = 0;
+      }
+   }
+}
+
+
+static void
+idle(void)
+{
+  static double t0 = -1.;
+  double dt, t = glutGet(GLUT_ELAPSED_TIME) / 1000.0;
+  if (t0 < 0.0)
+    t0 = t;
+  dt = t - t0;
+  t0 = t;
+
+  GearRot += 70.0 * dt;  /* 70 degrees per second */
+  GearRot = fmod(GearRot, 360.0); /* prevents eventual overflow */
+
+  CubeRot += 15.0 * dt;
+
+  glutPostRedisplay();
+}
+
+
+/* change view angle, exit upon ESC */
+static void
+key(unsigned char k, int x, int y)
+{
+   (void) x;
+   (void) y;
+   switch (k) {
+   case 'w':
+      WireFrame = !WireFrame;
+      break;
+   case 'z':
+      ViewRotZ += 5.0;
+      break;
+   case 'Z':
+      ViewRotZ -= 5.0;
+      break;
+   case 27:  /* Escape */
+      cleanup();
+      exit(0);
+      break;
+   default:
+      return;
+   }
+   glutPostRedisplay();
+}
+
+/* change view angle */
+static void
+special(int k, int x, int y)
+{
+   (void) x;
+   (void) y;
+   switch (k) {
+   case GLUT_KEY_UP:
+      ViewRotX += 5.0;
+      break;
+   case GLUT_KEY_DOWN:
+      ViewRotX -= 5.0;
+      break;
+   case GLUT_KEY_LEFT:
+      ViewRotY += 5.0;
+      break;
+   case GLUT_KEY_RIGHT:
+      ViewRotY -= 5.0;
+      break;
+   default:
+      return;
+   }
+   glutPostRedisplay();
+}
+
+
+/* new window size or exposure */
+static void
+reshape(int width, int height)
+{
+  WinWidth = width;
+  WinHeight = height;
+}
+
+
+static void
+init(int argc, char *argv[])
+{
+  static GLfloat pos[4] = {5.0, 5.0, 10.0, 0.0};
+  static GLfloat red[4] = {0.8, 0.1, 0.0, 1.0};
+  static GLfloat green[4] = {0.0, 0.8, 0.2, 1.0};
+  static GLfloat blue[4] = {0.2, 0.2, 1.0, 1.0};
+  GLint i;
+
+  glLightfv(GL_LIGHT0, GL_POSITION, pos);
+#if 0
+  glEnable(GL_CULL_FACE);
+#endif
+  glEnable(GL_LIGHTING);
+  glEnable(GL_LIGHT0);
+  glEnable(GL_DEPTH_TEST);
+
+  /* make the gears */
+  Gear1 = glGenLists(1);
+  glNewList(Gear1, GL_COMPILE);
+  glMaterialfv(GL_FRONT, GL_AMBIENT_AND_DIFFUSE, red);
+  gear(1.0, 4.0, 1.0, 20, 0.7);
+  glEndList();
+
+  Gear2 = glGenLists(1);
+  glNewList(Gear2, GL_COMPILE);
+  glMaterialfv(GL_FRONT, GL_AMBIENT_AND_DIFFUSE, green);
+  gear(0.5, 2.0, 2.0, 10, 0.7);
+  glEndList();
+
+  Gear3 = glGenLists(1);
+  glNewList(Gear3, GL_COMPILE);
+  glMaterialfv(GL_FRONT, GL_AMBIENT_AND_DIFFUSE, blue);
+  gear(1.3, 2.0, 0.5, 10, 0.7);
+  glEndList();
+
+  glEnable(GL_NORMALIZE);
+
+  /* xxx make size dynamic */
+  TexWidth = 256;
+  TexHeight = 256;
+
+   glBindTexture(GL_TEXTURE_2D, TexObj);
+   glTexImage2D(GL_TEXTURE_2D, 0, IntFormat, TexWidth, TexHeight, 0,
+                GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+   glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
+
+  for ( i=1; i<argc; i++ ) {
+    if (strcmp(argv[i], "-info")==0) {
+      printf("GL_RENDERER   = %s\n", (char *) glGetString(GL_RENDERER));
+      printf("GL_VERSION    = %s\n", (char *) glGetString(GL_VERSION));
+      printf("GL_VENDOR     = %s\n", (char *) glGetString(GL_VENDOR));
+      printf("GL_EXTENSIONS = %s\n", (char *) glGetString(GL_EXTENSIONS));
+    }
+  }
+}
+
+
+static void 
+visible(int vis)
+{
+  if (vis == GLUT_VISIBLE)
+    glutIdleFunc(idle);
+  else
+    glutIdleFunc(NULL);
+}
+
+
+int
+main(int argc, char *argv[])
+{
+   glutInit(&argc, argv);
+   glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE);
+
+   glutInitWindowSize(WinWidth, WinHeight);
+   Win = glutCreateWindow("gearbox");
+   init(argc, argv);
+
+   glutDisplayFunc(draw);
+   glutReshapeFunc(reshape);
+   glutKeyboardFunc(key);
+   glutSpecialFunc(special);
+   glutVisibilityFunc(visible);
+
+   glutMainLoop();
+   return 0;             /* ANSI C requires main to return int. */
+}
--- a/progs/demos/texdown.c
+++ b/progs/demos/texdown.c
@@ -116,6 +116,12 @@ TypeStr(GLenum type)
   }
 }

+/* On x86, there is a performance cliff for memcpy to texture memory
+ * for sources below 64 byte alignment.  We do our best with this in
+ * the driver, but it is better if the images are correctly aligned to
+ * start with:
+ */
+#define ALIGN (1<<7)

 static void
 MeasureDownloadRate(void)
@@ -128,13 +134,20 @@ MeasureDownloadRate(void)
   int count;
   int i;

-   texImage = (GLubyte *) malloc(bytes);
-   getImage = (GLubyte *) malloc(bytes);
+   texImage = (GLubyte *) malloc(bytes + ALIGN);
+   getImage = (GLubyte *) malloc(bytes + ALIGN);
   if (!texImage || !getImage) {
      DownloadRate = 0.0;
      return;
   }

+   texImage = (GLubyte *)((((unsigned)texImage) + ALIGN) & ~(ALIGN-1));
+   getImage = (GLubyte *)((((unsigned)getImage) + ALIGN) & ~(ALIGN-1));
+
+   for (i = 1; !(((unsigned)texImage) & i); i<<=1)
+      ;
+   printf("texture image alignment: %d bytes\n", i);
+      
   for (i = 0; i < bytes; i++) {
      texImage[i] = i & 0xff;
   }
@@ -178,12 +191,15 @@ MeasureDownloadRate(void)
                      FormatTable[Format].Type, texImage);
      }

+#if 1
      /* draw a tiny polygon to force texture into texram */
      glBegin(GL_TRIANGLES);
      glTexCoord2f(0, 0);     glVertex2f(1, 1);
      glTexCoord2f(1, 0);     glVertex2f(3, 1);
      glTexCoord2f(0.5, 1);   glVertex2f(2, 3);
      glEnd();
+/*       glFinish(); */
+#endif

      t1 = glutGet(GLUT_ELAPSED_TIME) * 0.001;
      time = t1 - t0;
@@ -209,8 +225,8 @@ MeasureDownloadRate(void)
   }
 #endif

-   free(texImage);
-   free(getImage);
+/*    free(texImage); */
+/*    free(getImage); */

   {
      GLint err = glGetError();
--- a/progs/demos/texobj.c
+++ b/progs/demos/texobj.c
@@ -92,7 +92,7 @@ static void idle( void )
   dt = t - t0;
   t0 = t;
   Angle += 120.0*dt;
-   glutPostRedisplay();
+/*    glutPostRedisplay(); */
 }


--- a/progs/tests/Makefile
+++ b/progs/tests/Makefile
@@ -47,6 +47,7 @@ SOURCES = antialias.c \
 	sharedtex.c \
 	stencilwrap.c \
 	stencil_wrap.c \
+	subtexrate.c \
 	tex1d.c \
 	texfilt.c \
 	texline.c \
--- a/progs/tests/subtexrate.c
+++ b/progs/tests/subtexrate.c
@@ -0,0 +1,352 @@
+/*
+ * Measure glTexSubImage and glCopyTexSubImage speed
+ *
+ * Brian Paul
+ * 26 Jan 2006
+ */
+
+#define GL_GLEXT_PROTOTYPES
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <GL/glut.h>
+
+static GLint WinWidth = 1024, WinHeight = 512;
+static GLint TexWidth = 512, TexHeight = 512;
+
+static GLuint TexObj = 1;
+
+static GLenum IntFormat = GL_RGBA;
+static GLenum ReadFormat = GL_BGRA; /* for glReadPixels */
+
+static GLboolean DrawQuad = GL_TRUE;
+
+
+/**
+ * draw teapot image, size TexWidth by TexHeight
+ */
+static void
+DrawTestImage(void)
+{
+   GLfloat ar;
+
+   glViewport(0, 0, TexWidth, TexHeight);
+   glScissor(0, 0, TexWidth, TexHeight);
+   glEnable(GL_SCISSOR_TEST);
+
+   glClearColor(0.5, 0.5, 0.5, 0.0);
+   glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+
+   ar = (float) TexWidth / TexHeight;
+
+   glMatrixMode(GL_PROJECTION);
+   glLoadIdentity();
+   glFrustum(-ar, ar, -1.0, 1.0, 5.0, 25.0);
+   glMatrixMode(GL_MODELVIEW);
+
+   glEnable(GL_LIGHTING);
+   glEnable(GL_LIGHT0);
+   glEnable(GL_DEPTH_TEST);
+   glFrontFace(GL_CW);
+   glPushMatrix();
+   glRotatef(45, 1, 0, 0);
+   glRotatef(45, 0, 1, 0);
+   glutSolidTeapot(2.3);
+   glPopMatrix();
+   glFrontFace(GL_CCW);
+   glDisable(GL_DEPTH_TEST);
+   glDisable(GL_LIGHTING);
+
+   glDisable(GL_SCISSOR_TEST);
+
+   glViewport(0, 0, WinWidth, WinHeight);
+   glFinish();
+}
+
+
+/**
+ * Do glCopyTexSubImage2D call (update texture with framebuffer data)
+ * If doSubRect is true, do the copy in four pieces instead of all at once.
+ */
+static void
+DoCopyTex(GLboolean doSubRect)
+{
+   if (doSubRect) {
+      /* copy in four parts */
+      int w = TexWidth / 2, h = TexHeight / 2;
+      int x0 = 0, y0 = 0;
+      int x1 = w, y1 = h;
+#if 1
+      glCopyTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, x0, y0, w, h);
+      glCopyTexSubImage2D(GL_TEXTURE_2D, 0, x1, y0, x1, y0, w, h);
+      glCopyTexSubImage2D(GL_TEXTURE_2D, 0, x0, y1, x0, y1, w, h);
+      glCopyTexSubImage2D(GL_TEXTURE_2D, 0, x1, y1, x1, y1, w, h);
+#else
+      /* scramble */
+      glCopyTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, x1, y1, w, h);
+      glCopyTexSubImage2D(GL_TEXTURE_2D, 0, x1, y0, x0, y1, w, h);
+      glCopyTexSubImage2D(GL_TEXTURE_2D, 0, x0, y1, x1, y0, w, h);
+      glCopyTexSubImage2D(GL_TEXTURE_2D, 0, x1, y1, x0, y0, w, h);
+#endif
+   }
+   else {
+      glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, TexWidth, TexHeight);
+   }
+}
+
+
+/**
+ * Do glTexSubImage2D (update texture w/ user data)
+ * If doSubRect, do update in four pieces, else all at once.
+ */
+static void
+SubTex(GLboolean doSubRect, const GLubyte *image)
+{
+   if (doSubRect) {
+      /* four pieces */
+      int w = TexWidth / 2, h = TexHeight / 2;
+      int x0 = 0, y0 = 0;
+      int x1 = w, y1 = h;
+      glPixelStorei(GL_UNPACK_ROW_LENGTH, TexWidth);
+      glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+
+      glPixelStorei(GL_UNPACK_SKIP_ROWS, y0);
+      glPixelStorei(GL_UNPACK_SKIP_PIXELS, x0);
+      glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, w, h,
+                      ReadFormat, GL_UNSIGNED_BYTE, image);
+
+      glPixelStorei(GL_UNPACK_SKIP_ROWS, y0);
+      glPixelStorei(GL_UNPACK_SKIP_PIXELS, x1);
+      glTexSubImage2D(GL_TEXTURE_2D, 0, x1, y0, w, h,
+                      ReadFormat, GL_UNSIGNED_BYTE, image);
+
+      glPixelStorei(GL_UNPACK_SKIP_ROWS, y1);
+      glPixelStorei(GL_UNPACK_SKIP_PIXELS, x0);
+      glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y1, w, h,
+                      ReadFormat, GL_UNSIGNED_BYTE, image);
+
+      glPixelStorei(GL_UNPACK_SKIP_ROWS, y1);
+      glPixelStorei(GL_UNPACK_SKIP_PIXELS, x1);
+      glTexSubImage2D(GL_TEXTURE_2D, 0, x1, y1, w, h,
+                      ReadFormat, GL_UNSIGNED_BYTE, image);
+   }
+   else {
+      /* all at once */
+      glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, TexWidth, TexHeight,
+                      ReadFormat, GL_UNSIGNED_BYTE, image);
+   }
+}
+
+
+/**
+ * Measure gl[Copy]TexSubImage rate.
+ * This actually also includes time to render a quad and SwapBuffers.
+ */
+static void
+RunTest(GLboolean copyTex, GLboolean doSubRect)
+{
+   double t0, t1;
+   int iters = 0;
+   float copyRate, mbRate;
+   float rot = 0.0;
+   int bpp, r, g, b, a;
+   int w, h;
+   GLubyte *image = NULL;
+
+   glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_RED_SIZE, &r);
+   glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_GREEN_SIZE, &g);
+   glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_BLUE_SIZE, &b);
+   glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_ALPHA_SIZE, &a);
+   bpp = (r + g + b + a) / 8;
+
+   if (!copyTex) {
+      /* read image from frame buffer */
+      image = (GLubyte *) malloc(TexWidth * TexHeight * bpp);
+      glPixelStorei(GL_PACK_ALIGNMENT, 1);
+      glReadPixels(0, 0, TexWidth, TexHeight,
+                   ReadFormat, GL_UNSIGNED_BYTE, image);
+   }
+
+   glEnable(GL_TEXTURE_2D);
+   glViewport(WinWidth / 2, 0, WinWidth / 2, WinHeight);
+
+   t0 = glutGet(GLUT_ELAPSED_TIME) / 1000.0;
+
+   do {
+      if (copyTex)
+         /* Framebuffer -> Texture */
+         DoCopyTex(doSubRect);
+      else {
+         /* Main Mem -> Texture */
+         SubTex(doSubRect, image);
+      }
+
+      /* draw textured quad */
+      if (DrawQuad) {
+         glPushMatrix();
+            glRotatef(rot, 0, 0, 1);
+            glTranslatef(1, 0, 0);
+            glBegin(GL_POLYGON);
+               glTexCoord2f(0, 0);  glVertex2f(-1, -1);
+               glTexCoord2f(1, 0);  glVertex2f( 1, -1);
+               glTexCoord2f(1, 1);  glVertex2f( 1,  1);
+               glTexCoord2f(0, 1);  glVertex2f(-1,  1);
+            glEnd();
+         glPopMatrix();
+      }
+
+      iters++;
+      rot += 2.0;
+
+      t1 = glutGet(GLUT_ELAPSED_TIME) / 1000.0;
+      if (DrawQuad) {
+         glutSwapBuffers();
+      }
+   } while (t1 - t0 < 5.0);
+
+   glDisable(GL_TEXTURE_2D);
+   if (image)
+      free(image);
+
+   if (doSubRect) {
+      w = TexWidth / 2;
+      h = TexHeight / 2;
+      iters *= 4;
+   }
+   else {
+      w = TexWidth;
+      h = TexHeight;
+   }
+
+   copyRate = iters / (t1 - t0);
+   mbRate = w * h * bpp * copyRate / (1024 * 1024);
+
+   if (copyTex)
+      printf("glCopyTexSubImage: %d x %d, %d Bpp:\n", w, h, bpp);
+   else
+      printf("glTexSubImage: %d x %d, %d Bpp:\n", w, h, bpp);
+   printf("   %d calls in %.2f = %.2f calls/sec, %.2f MB/s\n",
+          iters, t1-t0, copyRate, mbRate);
+}
+
+
+static void
+Draw(void)
+{
+   glClearColor(0.2, 0.2, 0.8, 0);
+   glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+
+   DrawTestImage();
+   if (!DrawQuad) {
+      glutSwapBuffers();
+   }
+
+/*    RunTest(GL_FALSE, GL_FALSE); */
+/*    RunTest(GL_FALSE, GL_TRUE); */
+   while (1) {
+      RunTest(GL_TRUE, GL_FALSE);
+      RunTest(GL_TRUE, GL_TRUE);
+
+      glutSwapBuffers();
+   }
+
+   printf("exiting\n");
+   exit(0);
+}
+
+
+static void
+Reshape(int width, int height)
+{
+   glViewport(0, 0, width, height);
+   glMatrixMode(GL_PROJECTION);
+   glLoadIdentity();
+   glFrustum(-1.0, 1.0, -1.0, 1.0, 5.0, 25.0);
+   glMatrixMode(GL_MODELVIEW);
+   glLoadIdentity();
+   glTranslatef(0.0, 0.0, -15.0);
+}
+
+
+static void
+Key(unsigned char key, int x, int y)
+{
+   (void) x;
+   (void) y;
+   switch (key) {
+      case 27:
+         exit(0);
+         break;
+   }
+   glutPostRedisplay();
+}
+
+
+static void
+SpecialKey(int key, int x, int y)
+{
+   (void) x;
+   (void) y;
+   switch (key) {
+      case GLUT_KEY_UP:
+         break;
+      case GLUT_KEY_DOWN:
+         break;
+      case GLUT_KEY_LEFT:
+         break;
+      case GLUT_KEY_RIGHT:
+         break;
+   }
+   glutPostRedisplay();
+}
+
+
+static void
+Init(void)
+{
+   /* create initial, empty teximage */
+   glBindTexture(GL_TEXTURE_2D, TexObj);
+   glTexImage2D(GL_TEXTURE_2D, 0, IntFormat, TexWidth, TexHeight, 0,
+                GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+   glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+}
+
+
+
+static void
+ParseArgs(int argc, char *argv[])
+{
+   int i;
+   for (i = 1; i < argc; i++) {
+      if (strcmp(argv[i], "-nodraw") == 0)
+         DrawQuad = GL_FALSE;
+   }
+}
+
+
+int
+main(int argc, char *argv[])
+{
+   GLint mode = GLUT_RGB | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH;
+   glutInit(&argc, argv);
+
+   ParseArgs(argc, argv);
+
+   glutInitWindowPosition(0, 0);
+   glutInitWindowSize(WinWidth, WinHeight);
+   glutInitDisplayMode(mode);
+   glutCreateWindow(argv[0]);
+   glutReshapeFunc(Reshape);
+   glutKeyboardFunc(Key);
+   glutSpecialFunc(SpecialKey);
+   glutDisplayFunc(Draw);
+
+   printf("GL_RENDERER: %s\n", (char *) glGetString(GL_RENDERER));
+   Init();
+
+   glutMainLoop();
+   return 0;
+}
--- a/src/mesa/drivers/dri/Makefile.template
+++ b/src/mesa/drivers/dri/Makefile.template
@@ -89,7 +89,7 @@ $(LIB_DIR)/$(LIBNAME): $(LIBNAME)
 depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
 	touch depend
 	$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDES) $(C_SOURCES) $(ASM_SOURCES) \
-		> /dev/null 
+		>& /dev/null 


 # Emacs tags
--- a/src/mesa/drivers/dri/i915/Makefile
+++ b/src/mesa/drivers/dri/i915/Makefile
@@ -5,33 +5,45 @@ include $(TOP)/configs/current
 LIBNAME = i915_dri.so

 DRIVER_SOURCES = \
+	bufmgr_fake.c \
+	intel_regions.c \
+	intel_mipmap_tree.c \
+	i915_tex_layout.c \
+	intel_tex_image.c \
+	intel_tex_subimage.c \
+	intel_tex_copy.c \
+	intel_tex_validate.c \
+	intel_tex_format.c \
+	intel_tex.c \
+	i915_tex.c \
+	i915_texstate.c \
 	i915_context.c \
 	i915_debug.c \
 	i915_fragprog.c \
 	i915_metaops.c \
 	i915_program.c \
 	i915_state.c \
-	i915_tex.c \
 	i915_texprog.c \
-	i915_texstate.c \
 	i915_vtbl.c \
+	intel_batchbuffer.c \
+	intel_context.c \
+	intel_ioctl.c \
+	intel_pixel.c \
+	intel_screen.c \
+	intel_span.c \
+	intel_state.c \
+	intel_tris.c 
+
+
+DISABLED = \
+	intel_render.c \
 	i830_context.c \
 	i830_metaops.c \
 	i830_state.c \
 	i830_texblend.c \
 	i830_tex.c \
 	i830_texstate.c \
-	i830_vtbl.c \
-	intel_batchbuffer.c \
-	intel_context.c \
-	intel_ioctl.c \
-	intel_pixel.c \
-	intel_render.c \
-	intel_screen.c \
-	intel_span.c \
-	intel_state.c \
-	intel_tex.c \
-	intel_tris.c 
+	i830_vtbl.c 

 C_SOURCES = \
 	$(COMMON_SOURCES) \
--- a/src/mesa/drivers/dri/i915/bufmgr.h
+++ b/src/mesa/drivers/dri/i915/bufmgr.h
@@ -0,0 +1,164 @@
+#ifndef BUFMGR_H
+#define BUFMGR_H
+
+#include "intel_context.h"
+
+/* Note that this is destined to be external to Mesa, so don't use GL
+ * types like GLuint, etc.
+ */
+
+/* The buffer manager context.  Opaque.
+ */
+struct bufmgr;
+struct bm_buffer_list;
+
+struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel );
+
+/* struct bufmgr *bmCreate( ... ); */
+/* struct bufmgr *bmAttach( ... ); */
+
+/* Define an address space.  Doesn't really do anything, but the
+ * information could be used to validate the bmInitPool() requests.
+ */
+void bmInitMemType( struct bufmgr *,
+		    unsigned mem_type,
+		    unsigned long size );
+
+
+/* Create a pool of a given memory type, from a certain offset and a
+ * certain size.  
+ *
+ * Also passed in is a virtual pointer to the start of the pool.  This
+ * is useful in the faked-out version in i915 so that MapBuffer can
+ * return a pointer to a buffer residing in AGP space.  
+ *
+ * Flags passed into a pool are inherited by all buffers allocated in
+ * that pool.  So pools representing the static front,back,depth
+ * buffer allocations should have MEM_AGP|NO_UPLOAD|NO_EVICT|NO_MOVE to match
+ * the behaviour of the legacy allocations.
+ *
+ * Returns -1 for failure, pool number for success.
+ */
+int bmInitPool( struct bufmgr *, 
+		unsigned long low_offset,
+		void *low_virtual,
+		unsigned long size,
+		unsigned flags);
+
+
+/* Flags for validate and other calls.  If both NO_UPLOAD and NO_EVICT
+ * are specified, ValidateBuffers is essentially a query.
+ */
+#define BM_MEM_LOCAL   0x1
+#define BM_MEM_AGP     0x2
+#define BM_MEM_VRAM    0x4	/* not yet used */
+#define BM_WRITE       0x8	/* not yet used */
+#define BM_READ        0x10	/* not yet used */
+#define BM_NO_UPLOAD   0x20
+#define BM_NO_EVICT    0x40
+#define BM_NO_MOVE     0x80	/* not yet used */
+#define BM_NO_ALLOC    0x100	/* legacy "fixed" buffers only */
+
+
+#define BM_MEM_MASK (BM_MEM_LOCAL|BM_MEM_AGP|BM_MEM_VRAM)
+
+
+
+/* Stick closely to ARB_vbo semantics - they're well defined and
+ * understood, and drivers can just pass the calls through without too
+ * much thunking.
+ */
+void bmGenBuffers(struct bufmgr *, unsigned n, unsigned *buffers);
+void bmDeleteBuffers(struct bufmgr *, unsigned n, unsigned *buffers);
+
+
+/* Hook to inform faked buffer manager about fixed-position
+ * front,depth,back buffers.  These may move to a fully memory-managed
+ * scheme, or they may continue to be managed as is.
+ */
+unsigned bmBufferStatic(struct bufmgr *,
+			unsigned buffer,
+			unsigned size,
+			unsigned pool);
+
+
+
+/* The driver has more intimate knowledge of the hardare than a GL
+ * client would, so flags here is more proscriptive than the usage
+ * values in the ARB_vbo interface:
+ */
+void bmBufferData(struct bufmgr *, 
+		  unsigned buffer, 
+		  unsigned size, 
+		  const void *data, 
+		  unsigned flags );
+
+void bmBufferSubData(struct bufmgr *, 
+		     unsigned buffer, 
+		     unsigned offset, 
+		     unsigned size, 
+		     const void *data );
+
+void *bmMapBuffer( struct bufmgr *,
+		   unsigned buffer, 
+		   unsigned access );
+
+void bmUnmapBuffer( struct bufmgr *,
+		    unsigned buffer );
+
+/* To be called prior to emitting commands to hardware which reference
+ * these buffers.  
+ *
+ * NewBufferList() and AddBuffer() build up a list of buffers to be
+ * validated.  The buffer list provides information on where the
+ * buffers should be placed and whether their contents need to be
+ * preserved on copying.  The offset data elements are return values
+ * from this function telling the driver exactly where the buffers are
+ * currently located.
+ *
+ * ValidateBufferList() performs the actual validation and returns the
+ * buffer pools and offsets within the pools.
+ *
+ * FenceBufferList() must be called to set fences and other
+ * housekeeping before unlocking after a successful call to
+ * ValidateBufferList(). The buffer manager knows how to emit and test
+ * fences directly through the drm and without callbacks to the
+ * driver.
+ */
+struct bm_buffer_list *bmNewBufferList( void );
+
+void bmAddBuffer( struct bm_buffer_list *list,
+		  unsigned buffer,
+		  unsigned flags,
+		  unsigned *pool_return,
+		  unsigned *offset_return );
+
+int bmValidateBufferList( struct bufmgr *, 
+			  struct bm_buffer_list *,
+			  unsigned flags );
+
+unsigned bmFenceBufferList( struct bufmgr *,
+			struct bm_buffer_list * );
+
+void bmFreeBufferList( struct bm_buffer_list * );
+
+
+/* This functionality is used by the buffer manager, not really sure
+ * if we need to be exposing it in this way, probably libdrm will
+ * offer equivalent calls.
+ *
+ * For now they can stay, but will likely change/move before final:
+ */
+unsigned bmSetFence( struct bufmgr * );
+int bmTestFence( struct bufmgr *, unsigned fence );
+void bmFinishFence( struct bufmgr *, unsigned fence );
+
+void bmFlushReadCaches( struct bufmgr *bm );
+void bmFlushDrawCache( struct bufmgr *bm );
+
+void bm_fake_NotifyContendedLockTake( struct bufmgr * );
+
+
+#define DBG(...)  do { if (0) _mesa_printf(__VA_ARGS__); } while(0)
+
+#endif
--- a/src/mesa/drivers/dri/i915/bufmgr_fake.c
+++ b/src/mesa/drivers/dri/i915/bufmgr_fake.c
@@ -0,0 +1,843 @@
+/* Fake version of the buffer manager so that we can prototype the
+ * changes in a driver fairly quickly.  Basically wraps the old style
+ * memory management in the new programming interface.
+ *
+ * This version imports code from the via memory manager to closer
+ * approximate the behaviour of a true memory manager.  In particular,
+ * in this version we do not expect to lose texture memory contents on
+ * context switches.
+ */
+#include "bufmgr.h"
+
+#include "intel_context.h"
+#include "intel_ioctl.h"
+
+#include "hash.h"
+#include "simple_list.h"
+#include "mm.h"
+
+struct _mesa_HashTable;
+
+
+/* Maximum number of buffers to pass to bmValidateBufferList:
+ */
+#define BM_LIST_MAX 32
+#define BM_POOL_MAX 8
+
+
+/* Wrapper around mm.c's mem_block, which understands that you must
+ * wait for fences to expire before memory can be freed.  This is
+ * specific to our use of memcpy for uploads - an upload that was
+ * processed through the command queue wouldn't need to care about
+ * fences.
+ */
+struct block {
+   struct block *next, *prev;
+   int mem_type;
+   struct pool *pool;		/* BM_MEM_AGP */
+   struct mem_block *mem;	/* BM_MEM_AGP */
+   unsigned fence;		/* BM_MEM_AGP, Split to read_fence, write_fence */
+   void *virtual;               
+   struct buffer *buf;
+};
+
+
+struct buffer {
+   unsigned id;			/* debug only */
+   unsigned size;
+   unsigned alignment;
+   unsigned mapped;
+   unsigned flags;
+   struct block *block;
+};
+
+struct pool {
+   unsigned flags;
+   struct mem_block *heap;
+   void *virtual;
+   struct block lru;
+   struct block freed;
+};
+
+struct bufmgr {
+   struct intel_context *intel;
+   struct pool pool[BM_POOL_MAX];
+   unsigned nr_pools;
+
+   struct _mesa_HashTable *hash;
+
+   unsigned buf_nr;		/* for generating ids */
+};
+
+
+/* List of buffers to validate: 
+ */
+struct bm_buffer_list {
+   struct {
+      unsigned buffer;
+      unsigned *offset_return;
+      unsigned *memtype_return;
+   } elem[BM_LIST_MAX];
+
+   unsigned nr;
+};
+
+
+
+
+static struct block *alloc_from_pool( struct bufmgr *bm,				
+				      unsigned pool_nr,
+				      unsigned size, 
+				      unsigned align )
+{
+   struct pool *pool = &bm->pool[pool_nr];
+   struct block *block = (struct block *)calloc(sizeof *block, 1);
+   if (!block)
+      return NULL;
+
+   DBG("alloc_from_pool %d sz 0x%x\n", pool_nr, size);
+
+   block->mem = mmAllocMem(pool->heap, size, align, 0);
+   if (!block->mem) {
+      DBG("\t- failed\n");
+      free(block);
+      return NULL;
+   }
+
+   make_empty_list(block);
+   block->pool = pool;
+   block->mem_type = pool->flags & BM_MEM_MASK;
+   block->virtual = pool->virtual + block->mem->ofs;
+
+   DBG("\t- offset 0x%x\n", block->mem->ofs);
+   return block;
+}
+
+
+static struct block *alloc_local( unsigned size )
+{
+   struct block *block = (struct block *)calloc(sizeof *block, 1);
+   if (!block)
+      return NULL;
+
+   DBG("alloc_local 0x%x\n", size);
+
+   block->mem_type = BM_MEM_LOCAL;
+   block->virtual = malloc(size);
+   if (!block->virtual) {
+      free(block);
+      return NULL;
+   }
+
+   return block;
+}
+
+
+
+
+static struct block *alloc_block( struct bufmgr *bm,
+				  unsigned size,
+				  unsigned align,
+				  int flags )
+{
+   GLuint i;
+
+   for (i = 0; i < bm->nr_pools; i++) {
+      if (bm->pool[i].flags & BM_NO_ALLOC)
+	 continue;
+
+      if ((bm->pool[i].flags & flags & BM_MEM_MASK) == 0)
+	 continue;
+      
+      return alloc_from_pool(bm, i, size, align);
+   }
+   
+   if (flags & BM_MEM_LOCAL)
+      return alloc_local(size);
+
+   return NULL;
+}
+
+static int bmAllocMem( struct bufmgr *bm,
+		       struct buffer *buf )	
+{
+   buf->block = alloc_block(bm, buf->size, 4, buf->flags);
+
+   if (buf->block)
+      buf->block->buf = buf;
+
+   assert(buf->block);
+   return buf->block != NULL;
+}
+
+
+/* Release the card storage associated with buf:
+ */
+static void free_block( struct bufmgr *bm, struct block *block )
+{
+   if (!block) 
+      return;
+
+   switch (block->mem_type) {
+   case BM_MEM_AGP:
+   case BM_MEM_VRAM:
+      if (bmTestFence(bm, block->fence)) {
+         mmFreeMem(block->mem);
+         free(block);
+      }
+      else {
+	 block->buf = NULL;
+         move_to_tail(&block->pool->freed, block);
+      }
+      break;
+
+   case BM_MEM_LOCAL:
+      free(block->virtual);
+      free(block);
+      break;
+
+   default:
+      free(block);
+      break;
+   }
+}
+
+static int delayed_free( struct bufmgr *bm )
+{
+   struct block *block, *tmp;
+   int ret = 0;
+   int i;
+
+   for (i = 0; i < bm->nr_pools; i++) {
+      foreach_s(block, tmp, &bm->pool[i].freed ) {
+	 if (bmTestFence(bm, block->fence)) {
+	    ret += block->mem->size;
+	    remove_from_list(block);
+	    mmFreeMem(block->mem);
+	    free(block);
+	 }
+      }
+   }
+   
+   return ret;
+}
+
+
+static int move_buffers( struct bufmgr *bm, 
+			 struct buffer *buffers[],
+			 int nr,
+			 int flags )
+{
+   struct block *newMem[BM_LIST_MAX];
+   GLint i;
+   GLuint nr_uploads = 0;
+
+   DBG("%s\n", __FUNCTION__);
+
+   memset(newMem, 0, sizeof(newMem));
+
+   /* First do all the allocations (or fail):
+    */ 
+   for (i = 0; i < nr; i++) {    
+      if (!buffers[i]->block) {
+/* 	 if (flags & BM_NO_ALLOC) */
+/* 	    goto cleanup; */
+
+	 newMem[i] = alloc_block(bm, 
+				 buffers[i]->size,
+				 buffers[i]->alignment,
+				 flags & BM_MEM_MASK);
+
+	 if (!newMem[i]) 
+	    goto cleanup;
+	
+      } 
+      else if (!(buffers[i]->block->mem_type & flags)) { 
+	 if (flags & BM_NO_UPLOAD)
+	    goto cleanup;
+
+	 assert(!buffers[i]->mapped);
+
+	 DBG("try to move buffer %d size 0x%x to pools 0x%x\n", 
+		      buffers[i]->id, buffers[i]->size, flags & BM_MEM_MASK);
+
+	 newMem[i] = alloc_block(bm, 
+				 buffers[i]->size,
+				 buffers[i]->alignment,
+				 flags & BM_MEM_MASK);
+
+	 if (!newMem[i]) 
+	    goto cleanup;
+      }
+   }
+
+
+   /* Now copy all the image data and free the old texture memory.
+    */
+   for (i = 0; i < nr; i++) {    
+      if (newMem[i]) {
+	 if (buffers[i]->block) {
+	    /* XXX: To be replaced with DMA, GTT bind, and other
+	     * mechanisms in final version.  Memcpy (or sse_memcpy) is
+	     * probably pretty good for local->agp uploads.
+	     */
+	    _mesa_printf("* %d\n", buffers[i]->size);
+	    memcpy(newMem[i]->virtual,
+		   buffers[i]->block->virtual, 
+		   buffers[i]->size);
+	    
+	    free_block(bm, buffers[i]->block);
+	    nr_uploads++;
+	 }
+
+	 buffers[i]->block = newMem[i];
+	 buffers[i]->block->buf = buffers[i];
+      }
+   }
+
+   /* Tell hardware that its texture and other caches may be invalid: 
+    */
+   if (nr_uploads && (flags & (BM_MEM_AGP|BM_MEM_VRAM)))
+      bmFlushReadCaches(bm);   
+
+   DBG("%s - success\n", __FUNCTION__);
+   return 1;
+
+ cleanup:
+   /* Release any allocations made prior to failure:
+    */
+   for (i = 0; i < nr; i++) {    
+      if (newMem[i]) 
+	 free_block(bm, newMem[i]);
+   }
+   
+   _mesa_printf("%s - fail\n", __FUNCTION__);
+   return 0;   
+}
+
+
+static unsigned evict_lru( struct bufmgr *bm,
+			   unsigned flags)
+{
+   int i;
+
+   DBG("%s\n", __FUNCTION__);
+
+   if (flags & BM_NO_EVICT)
+      return 0;
+
+   /* XXX: this is broken with >1 active pool - all the first pool
+    * will be evicted before starting on the second.  Actually, maybe
+    * you want that in some situations...
+    */
+   for (i = 0; i < bm->nr_pools; i++) {
+      if ((bm->pool[i].flags & flags & BM_MEM_MASK) && 
+	  !(bm->pool[i].flags & BM_NO_EVICT)) {
+	 struct block *block = bm->pool[i].lru.next;
+	 unsigned size = block->buf->size;
+
+	 if (block == &bm->pool[i].lru ||
+	     !bmTestFence(bm, block->fence)) 
+	    return 0;
+   
+	 move_buffers(bm, &block->buf, 1, BM_MEM_LOCAL);
+	 return size;
+      }
+   }
+
+   return 0;
+}
+
+#if 0
+/* Speculatively move texture images which haven't been used in a
+ * while back to local memory.
+ */
+static void viaSwapOutWork( struct bufmgr *bm )
+{
+   unsigned total = 0;
+   unsigned target;
+
+   if (bm->thrashing) {
+      target = 1*1024*1024;
+   }
+   else if (bmIsTexMemLow(bm)) {
+      target = 64*1024;
+   }
+   else {
+      return;
+   }
+
+   while (1) {
+      unsigned size = evict_lru(bm);
+      if (!size)
+         return;
+
+      total += size;
+      if (total >= target)
+         return;
+   }
+}
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+/***********************************************************************
+ * Public functions
+ */
+
+
+/* The initialization functions are skewed in the fake implementation.
+ * This call would be to attach to an existing manager, rather than to
+ * create a local one.
+ */
+struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel )
+{
+   struct bufmgr *bm = (struct bufmgr *)calloc(sizeof(*bm), 1);
+   
+   bm->intel = intel;
+   bm->hash = _mesa_NewHashTable();
+
+   return bm;
+}
+
+
+void bmInitMemType( struct bufmgr *bm,
+		    unsigned mem_type,
+		    unsigned long size )
+{
+   /* Nothing really to do.  Could store and use to validate
+    * bmInitPool requests.
+    */
+}
+
+
+
+/* The virtual pointer would go away in a true implementation.
+ */
+int bmInitPool( struct bufmgr *bm, 
+		unsigned long low_offset,
+		void *low_virtual,
+		unsigned long size,
+		unsigned flags)
+{
+   GLuint i;
+
+   if (bm->nr_pools >= BM_POOL_MAX)
+      return -1;
+
+   i = bm->nr_pools++;
+   
+   DBG("bmInitPool %d low_offset %x sz %x\n",
+		i, low_offset, size);
+   
+   bm->pool[i].heap = mmInit( low_offset, size );
+   bm->pool[i].virtual = low_virtual - low_offset;
+   bm->pool[i].flags = flags;
+   
+   make_empty_list(&bm->pool[i].lru);
+   make_empty_list(&bm->pool[i].freed);
+
+   return i;
+}
+
+
+
+void bmGenBuffers(struct bufmgr *bm, unsigned n, unsigned *buffers)
+{
+   unsigned i;
+
+   for (i = 0; i < n; i++) {
+      struct buffer *buf = calloc(sizeof(*buf), 1);
+      buf->id = ++bm->buf_nr;
+      buf->alignment = 12;	/* page-alignment to fit in with AGP swapping */
+      buf->flags = BM_MEM_AGP|BM_MEM_VRAM|BM_MEM_LOCAL;
+      buffers[i] = buf->id;
+      _mesa_HashInsert(bm->hash, buffers[i], buf);
+   }
+}
+
+
+void bmDeleteBuffers(struct bufmgr *bm, unsigned n, unsigned *buffers)
+{
+   unsigned i;
+   
+   for (i = 0; i < n; i++) {
+      struct buffer *buf = _mesa_HashLookup(bm->hash, buffers[i]);
+      if (buf) {
+         free_block(bm, buf->block);	
+         free(buf);
+	 _mesa_HashRemove(bm->hash, buffers[i]);
+      }
+   }
+}
+
+
+
+
+/* Hook to inform faked buffer manager about fixed-position
+ * front,depth,back buffers.  These may move to a fully memory-managed
+ * scheme, or they may continue to be managed as is.  It will probably
+ * be useful to pass a fixed offset here one day.
+ */
+unsigned bmBufferStatic(struct bufmgr *bm,
+			unsigned buffer,
+			unsigned size,
+			unsigned pool )
+{
+   struct buffer *buf = (struct buffer *)_mesa_HashLookup( bm->hash, buffer );
+   
+   assert(!buf->block);
+   assert(bm->pool[pool].flags & BM_NO_EVICT);
+   assert(bm->pool[pool].flags & BM_NO_MOVE);
+
+   buf->size = size;
+   buf->flags = bm->pool[pool].flags;
+   buf->alignment = 0;
+   buf->block = alloc_from_pool(bm, pool, buf->size, buf->alignment);
+   if (!buf->block)
+      return 0;
+
+   buf->block->buf = buf;
+   return buf->block->mem->ofs;
+}
+
+
+#if 0
+/* How wise/useful is this?
+ */
+void bmBufferSetParams( struct bufmgr *bm,
+			unsigned buffer,
+			unsigned flags,
+			unsigned alignment )
+{
+   struct buffer *buf = (struct buffer *)_mesa_HashLookup( bm->hash, buffer );
+   assert(!buf->block);
+   buf->flags = flags;
+   buf->alignment = alignment;
+}
+#endif
+
+
+
+/* If buffer size changes, create new buffer in local memory.
+ * Otherwise update in place.
+ */
+void bmBufferData(struct bufmgr *bm, 
+		  unsigned buffer, 
+		  unsigned size, 
+		  const void *data, 
+		  unsigned flags )
+{
+   struct buffer *buf = (struct buffer *)_mesa_HashLookup( bm->hash, buffer );
+
+   DBG("bmBufferData %d sz 0x%x data: %p\n", buffer, size, data);
+
+   assert(!buf->mapped);
+
+   if (buf->block) {
+      if ((buf->block->mem_type != BM_MEM_LOCAL && !bmTestFence(bm, buf->block->fence)) ||
+	  (buf->size && buf->size != size) ||
+	  (data == NULL)) {
+	 free_block(bm, buf->block);
+	 buf->block = NULL;
+      }
+   }
+   
+   buf->size = size;
+
+   if (data != NULL) {      
+      bmAllocMem(bm, buf);
+      memcpy(buf->block->virtual, data, size);
+   }
+}
+
+/* Update the buffer in place, in whatever space it is currently resident:
+ */
+void bmBufferSubData(struct bufmgr *bm, 
+		     unsigned buffer, 
+		     unsigned offset, 
+		     unsigned size, 
+		     const void *data )
+{
+   struct buffer *buf = (struct buffer *)_mesa_HashLookup( bm->hash, buffer );
+
+   DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buffer, offset, size);
+
+   if (buf->block == 0)
+      bmAllocMem(bm, buf);
+
+   if (buf->block->mem_type != BM_MEM_LOCAL)
+      bmFinishFence(bm, buf->block->fence);
+
+   if (size) 
+      memcpy(buf->block->virtual + offset, data, size); 
+}
+
+
+/* Return a pointer to whatever space the buffer is currently resident in:
+ */
+void *bmMapBuffer( struct bufmgr *bm,
+		   unsigned buffer, 
+		   unsigned access )
+{
+   struct buffer *buf = (struct buffer *)_mesa_HashLookup( bm->hash, buffer );
+
+   DBG("bmMapBuffer %d\n", buffer);
+
+   if (buf->mapped)
+      return NULL;
+
+   buf->mapped = 1;
+
+   if (buf->block == 0)
+      bmAllocMem(bm, buf);
+
+   /* Finish any outstanding operations to/from this memory:
+    */
+   if (buf->block->mem_type != BM_MEM_LOCAL) 
+      bmFinishFence(bm, buf->block->fence);
+
+   return buf->block->virtual;
+}
+
+void bmUnmapBuffer( struct bufmgr *bm, unsigned buffer )
+{
+   struct buffer *buf = (struct buffer *)_mesa_HashLookup( bm->hash, buffer );
+
+   DBG("bmUnmapBuffer %d\n", buffer);
+   buf->mapped = 0;
+}
+
+
+/* Add a mechanism to tell the manager about some fixed buffers such
+ * as the (fixed) front, back and depth buffers.  Something like this
+ * may be needed even in a finalized version if we keep the static
+ * management of these buffers.
+ * 
+ * These are excluded from the buffer memory management in this file,
+ * but are presented to the driver by the same interface.  In the
+ * future they may become managed.
+ */
+#if 0
+void bm_fake_SetFixedBufferParams( struct bufmgr *bm
+                                   unsigned buffer,
+                                   unsigned offset,
+                                   unsigned size )
+{
+}
+#endif
+
+
+/* Build the list of buffers to validate:
+ */
+struct bm_buffer_list *bmNewBufferList( void )
+{
+   struct bm_buffer_list *list = calloc(sizeof(*list), 1);
+   DBG("bmNewBufferList\n");
+   return list;
+}
+
+void bmAddBuffer( struct bm_buffer_list *list,
+		  unsigned buffer,
+		  unsigned flags,
+		  unsigned *memtype_return,
+		  unsigned *offset_return )
+{
+   assert(list->nr < BM_LIST_MAX);
+
+
+   list->elem[list->nr].buffer = buffer;
+   list->elem[list->nr].memtype_return = memtype_return;
+   list->elem[list->nr].offset_return = offset_return;
+
+   DBG("bmAddBuffer nr %d buf %d\n", 
+		list->nr, buffer);
+
+   list->nr++;
+}
+		
+void bmFreeBufferList( struct bm_buffer_list *list )
+{
+   free(list);
+}
+
+
+
+
+/* To be called prior to emitting commands to hardware which reference
+ * these buffers.  The buffer_usage list provides information on where
+ * the buffers should be placed and whether their contents need to be
+ * preserved on copying.  The offset and pool data elements are return
+ * values from this function telling the driver exactly where the
+ * buffers are currently located.
+ */
+int bmValidateBufferList( struct bufmgr *bm,
+			  struct bm_buffer_list *list,
+			  unsigned flags )
+{
+   struct buffer *bufs[BM_LIST_MAX];
+   unsigned i;
+
+   DBG("%s\n", __FUNCTION__);
+
+   if (list->nr > BM_LIST_MAX)
+      return 0;
+
+   for (i = 0; i < list->nr; i++)
+      bufs[i] = _mesa_HashLookup(bm->hash, list->elem[i].buffer);
+
+   
+   /* The old story: evict one texture after another until allocation
+    * succeeds.  This is a pretty poor strategy but really hard to do
+    * better without more infrastucture...  Which is coming - hooray!
+    */
+   while (!move_buffers(bm, bufs, list->nr, flags)) {
+      if (!delayed_free(bm) &&
+	  !evict_lru(bm, flags))
+	 return 0;
+      exit(1);
+   }
+
+
+   for (i = 0; i < list->nr; i++) {
+      DBG("%d: buf %d ofs 0x%x\n",
+		   i, bufs[i]->id, bufs[i]->block->mem->ofs);
+
+      if (list->elem[i].offset_return)
+	 list->elem[i].offset_return[0] = bufs[i]->block->mem->ofs;
+      
+      if (list->elem[i].memtype_return)
+	 list->elem[i].memtype_return[0] = bufs[i]->block->mem_type;
+   }
+   
+   return 1;
+}
+
+
+/* After commands are emitted but before unlocking, this must be
+ * called so that the buffer manager can correctly age the buffers.
+ * The buffer manager keeps track of the list of validated buffers, so
+ * already knows what to apply the fence to.
+ *
+ * The buffer manager knows how to emit and test fences directly
+ * through the drm and without callbacks or whatever into the driver.
+ */
+unsigned bmFenceBufferList( struct bufmgr *bm, struct bm_buffer_list *list )
+{
+
+   DBG("%s (%d bufs)\n", __FUNCTION__, list->nr);
+
+   if (list->nr) {
+      unsigned i;
+      unsigned fence = bmSetFence( bm );
+
+      /* Move all buffers to head of resident list and set their fences
+       */
+      for (i = 0; i < list->nr; i++) {
+	 struct buffer *buf = _mesa_HashLookup(bm->hash, list->elem[i].buffer);
+
+	 move_to_head(&buf->block->pool->lru, buf->block);
+	 buf->block->fence = fence;
+      }
+
+      return fence;
+   }
+   else
+      return 0;
+}
+
+
+/* This functionality is used by the buffer manager, not really sure
+ * if we need to be exposing it in this way, probably libdrm will
+ * offer equivalent calls.
+ *
+ * For now they can stay, but will likely change/move before final:
+ */
+unsigned bmSetFence( struct bufmgr *bm )
+{
+   assert(bm->intel->batch.space == bm->intel->batch.size);
+   assert(bm->intel->locked);
+
+   return intelEmitIrqLocked( bm->intel );
+}
+
+int bmTestFence( struct bufmgr *bm, unsigned fence )
+{
+/*    if (fence % 1024 == 0) */
+/*       _mesa_printf("%d %d\n", fence, bm->intel->sarea->last_dispatch); */
+
+   return fence <= bm->intel->sarea->last_dispatch;
+}
+
+void bmFinishFence( struct bufmgr *bm, unsigned fence )
+{
+   if (!bmTestFence(bm, fence))
+      intelWaitIrq( bm->intel, fence );
+}
+
+
+/* There is a need to tell the hardware to flush various caches
+ * before we can start reading and writing video memory.
+ *
+ * TODO: Need a flag value to tell hardware which caches have changed?
+ * Who would we rely on to populate the flag?
+ */
+
+
+/* If new data is uploaded/mapped to video or agp memory, need to
+ * flush the texture and other read caches to ensure the new version
+ * is picked up.  Can be done immediately after the upload (ie. within
+ * ValidateBuffers).
+ */
+void bmFlushReadCaches( struct bufmgr *bm )
+{
+}
+
+/* If a buffer which has been written to is going to be evicted, read
+ * by bmGetBufferData or mappped with bmMapBuffer, need to flush the
+ * write cache first.  Probably want to make sure this happens
+ * immediately after the last write and before the fence (how to
+ * tell?).  If we wait until just prior the evict/read/map, would then
+ * have to emit another fence and wait for the hw queue to drain to be
+ * sure the caches had flushed.
+ *
+ * A possible strategy:
+ * - every once in a while, when there is no last_draw_flush_fence outstanding,
+ *     emit a draw-cache flush just prior to the fence.
+ * - note the fence (last_draw_flush_fence)
+ * - note the most recently retired value of last_draw_flush_fence in
+ *      last_retired_draw_flush_fence
+ * - keep track of which fence each buffer is last written to in
+ *      buffer.last_write_fence
+ * - on evict/read/map, check:
+ *      - if buffer.last_write_fence > last_draw_flush_fence {
+ *            emit_flush
+ *            last_draw_flush_fence = emit fence 
+ *        }
+ *        if last_write_fence > last_retired_draw_flush_fence {
+ *            finish_fence(last_draw_flush_fence)
+ *            last_retired_draw_flush_fence = last_draw_fence
+ *        }
+ *   
+ */
+void bmFlushDrawCache( struct bufmgr *bm )
+{
+}
+
+/* Specifically ignore texture memory sharing.
+ */
+void bm_fake_NotifyContendedLockTake( struct bufmgr *bm )
+{
+   fprintf(stderr, "did we just lose texture memory? oh well, never mind\n");
+}
+
+
--- a/src/mesa/drivers/dri/i915/i830_metaops.c
+++ b/src/mesa/drivers/dri/i915/i830_metaops.c
@@ -295,6 +295,7 @@ static void draw_quad(i830ContextPtr i830,
 		      GLfloat s0, GLfloat s1,
 		      GLfloat t0, GLfloat t1 )
 {
+#if 0
   GLuint vertex_size = 8;
   GLuint *vb = intelEmitInlinePrimitiveLocked( &i830->intel, 
 						PRIM3D_TRIFAN, 
@@ -350,6 +351,7 @@ static void draw_quad(i830ContextPtr i830,

 /*    fprintf(stderr, "%s: DV1: %x\n",  */
 /* 	   __FUNCTION__, i830->meta.Buffer[I830_DESTREG_DV1]); */
+#endif
 }

 void 
@@ -440,7 +442,7 @@ i830TryTextureReadPixels( GLcontext *ctx,
   int textureFormat;
   GLenum glTextureFormat;
   int src_offset = i830->meta.Buffer[I830_DESTREG_CBUFADDR2];
-   int destOffset = intelAgpOffsetFromVirtual( &i830->intel, pixels);
+   int destOffset = 0;
   int destFormat, depthFormat, destPitch;
   drm_clip_rect_t tmp;

@@ -594,7 +596,7 @@ i830TryTextureDrawPixels( GLcontext *ctx,
   int textureFormat;
   GLenum glTextureFormat;
   int dst_offset = i830->meta.Buffer[I830_DESTREG_CBUFADDR2];
-   int src_offset = intelAgpOffsetFromVirtual( intel, pixels );
+   int src_offset = 0;

   if (INTEL_DEBUG & DEBUG_PIXEL)
      fprintf(stderr, "%s\n", __FUNCTION__);
--- a/src/mesa/drivers/dri/i915/i830_tex.c
+++ b/src/mesa/drivers/dri/i915/i830_tex.c
@@ -200,7 +200,7 @@ intelTextureObjectPtr i830AllocTexObj( struct gl_texture_object *texObj )

   texObj->DriverData = t;
   t->intel.base.tObj = texObj;
-   t->intel.dirty = I830_UPLOAD_TEX_ALL;
+   t->intel.dirty = ~0;
   make_empty_list( &t->intel.base );

   t->Setup[I830_TEXREG_TM0LI] = 0; /* not used */
@@ -268,7 +268,7 @@ static void i830TexParameter( GLcontext *ctx, GLenum target,
      return;
   }

-   t->intel.dirty = I830_UPLOAD_TEX_ALL;
+   t->intel.dirty = ~0;
 }


--- a/src/mesa/drivers/dri/i915/i830_texstate.c
+++ b/src/mesa/drivers/dri/i915/i830_texstate.c
@@ -246,7 +246,7 @@ static GLboolean i830SetTexImages( i830ContextPtr i830,
   t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MAX_MIP_MASK;
   t->Setup[I830_TEXREG_TM0S3] &= ~TM0S3_MIN_MIP_MASK;
   t->Setup[I830_TEXREG_TM0S3] |= ((numLevels - 1)*4) << TM0S3_MIN_MIP_SHIFT;
-   t->intel.dirty = I830_UPLOAD_TEX_ALL;
+   t->intel.dirty = ~0;

   return intelUploadTexImages( &i830->intel, &t->intel, 0 );
 }
@@ -259,11 +259,7 @@ static void i830_import_tex_unit( i830ContextPtr i830,
   if(INTEL_DEBUG&DEBUG_TEXTURE)
      fprintf(stderr, "%s unit(%d)\n", __FUNCTION__, unit);
   
-   if (i830->intel.CurrentTexObj[unit]) 
-      i830->intel.CurrentTexObj[unit]->base.bound &= ~(1U << unit);
-
   i830->intel.CurrentTexObj[unit] = (intelTextureObjectPtr)t;
-   t->intel.base.bound |= (1 << unit);

   I830_STATECHANGE( i830, I830_UPLOAD_TEX(unit) );

@@ -285,7 +281,7 @@ static void i830_import_tex_unit( i830ContextPtr i830,
   i830->state.Tex[unit][I830_TEXREG_CUBE] = t->Setup[I830_TEXREG_CUBE];
   i830->state.Tex[unit][I830_TEXREG_MCS] |= MAP_UNIT(unit);

-   t->intel.dirty &= ~I830_UPLOAD_TEX(unit);
+   t->intel.dirty &= ~(1<<unit); /* This is broken! */
 }


@@ -317,7 +313,7 @@ static GLboolean enable_tex_common( GLcontext *ctx, GLuint unit )
    * time.
    */
   if (i830->intel.CurrentTexObj[unit] != &t->intel || 
-       (t->intel.dirty & I830_UPLOAD_TEX(unit))) {
+       (t->intel.dirty & (1<<unit))) {
      i830_import_tex_unit( i830, t, unit);
   }

@@ -419,14 +415,10 @@ static GLboolean disable_tex( GLcontext *ctx, GLuint unit )
    * one if nothing is enabled.
    */

-   if ( i830->intel.CurrentTexObj[unit] != NULL ) {
-      /* The old texture is no longer bound to this texture unit.
-       * Mark it as such.
-       */
-
-      i830->intel.CurrentTexObj[unit]->base.bound &= ~(1U << 0);
-      i830->intel.CurrentTexObj[unit] = NULL;
-   }
+   /* The old texture is no longer bound to this texture unit.
+    * Mark it as such.
+    */
+   i830->intel.CurrentTexObj[unit] = NULL;

   return GL_TRUE;
 }
--- a/src/mesa/drivers/dri/i915/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i830_vtbl.c
@@ -441,7 +441,6 @@ static void i830_emit_flush( intelContextPtr intel )

 void i830InitVtbl( i830ContextPtr i830 )
 {
-   i830->intel.vtbl.alloc_tex_obj = i830AllocTexObj;
   i830->intel.vtbl.check_vertex_size = i830_check_vertex_size;
   i830->intel.vtbl.clear_with_tris = i830ClearWithTris;
   i830->intel.vtbl.destroy = i830_destroy_context;
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -41,6 +41,9 @@
 #include "utils.h"
 #include "i915_reg.h"

+#include "bufmgr.h"
+#include "intel_regions.h"
+
 /***************************************
 * Mesa's Driver Functions
 ***************************************/
@@ -101,10 +104,13 @@ GLboolean i915CreateContext( const __GLcontextModes *mesaVis,
   struct dd_function_table functions;
   i915ContextPtr i915 = (i915ContextPtr) CALLOC_STRUCT(i915_context);
   intelContextPtr intel = &i915->intel;
+   intelScreenPrivate *intelScreen;
   GLcontext *ctx = &intel->ctx;

   if (!i915) return GL_FALSE;

+   _mesa_printf( "\ntexmem branch (i915)\n\n");
+   
   i915InitVtbl( i915 );

   i915InitDriverFunctions( &functions );
@@ -119,48 +125,71 @@ GLboolean i915CreateContext( const __GLcontextModes *mesaVis,
   ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS;
   ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS;

-   intel->nr_heaps = 1;
-   intel->texture_heaps[0] = 
-      driCreateTextureHeap( 0, intel,
-			    intel->intelScreen->tex.size,
-			    12,
-			    I830_NR_TEX_REGIONS,
-			    intel->sarea->texList,
-			    & intel->sarea->texAge,
-			    & intel->swapped,
-			    sizeof( struct i915_texture_object ),
-			    (destroy_texture_object_t *)intelDestroyTexObj );
+   intel->bm = bm_fake_intel_Attach( intel );

-   /* FIXME: driCalculateMaxTextureLevels assumes that mipmaps are
-    * tightly packed, but they're not in Intel graphics
-    * hardware.
+   bmInitPool(intel->bm,
+              intel->intelScreen->tex.offset, /* low offset */
+              intel->intelScreen->tex.map, /* low virtual */
+              intel->intelScreen->tex.size,
+	      BM_MEM_AGP);
+
+   intelScreen = intel->intelScreen;
+
+   /* These are still static, but create regions for them.  
    */
-   ctx->Const.MaxTextureUnits = 1;
-   driCalculateMaxTextureLevels( intel->texture_heaps,
-				 intel->nr_heaps,
-				 &intel->ctx.Const,
-				 4,
-				 11, /* max 2D texture size is 2048x2048 */
-				 8,  /* 3D texture */
-				 11, /* cube texture. */
-				 11, /* rect texture */
-				 12,
-				 GL_FALSE );
+   intel->front_region = 
+      intel_region_create_static(intel,
+				 BM_MEM_AGP,
+				 intelScreen->front.offset,
+				 intelScreen->front.map,
+				 intelScreen->cpp,
+				 intelScreen->front.pitch,
+				 intelScreen->height);
+
+
+   intel->back_region = 
+      intel_region_create_static(intel,
+				 BM_MEM_AGP,
+				 intelScreen->back.offset,
+				 intelScreen->back.map,
+				 intelScreen->cpp,
+				 intelScreen->back.pitch,
+				 intelScreen->height);
+
+   /* Still assuming front.cpp == depth.cpp
+    */
+   intel->depth_region = 
+      intel_region_create_static(intel,
+				 BM_MEM_AGP,
+				 intelScreen->depth.offset,
+				 intelScreen->depth.map,
+				 intelScreen->cpp,
+				 intelScreen->depth.pitch,
+				 intelScreen->height);
+
+   intelInitBatchBuffer(intel);
+
+   /* Advertise the full hardware capabilities.  The new memory
+    * manager should cope much better with overload situations:
+    */
+   ctx->Const.MaxTextureLevels = 11;
+   ctx->Const.Max3DTextureLevels = 8;
+   ctx->Const.MaxCubeTextureLevels = 11;
+   ctx->Const.MaxTextureRectSize = (1<<11);
   ctx->Const.MaxTextureUnits = I915_TEX_UNITS;

   /* GL_ARB_fragment_program limits - don't think Mesa actually
    * validates programs against these, and in any case one ARB
    * instruction can translate to more than one HW instruction, so
    * we'll still have to check and fallback each time.
-    */
-   
+    */   
   ctx->Const.FragmentProgram.MaxNativeTemps = I915_MAX_TEMPORARY;
   ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* 8 tex, 2 color, fog */
   ctx->Const.FragmentProgram.MaxNativeParameters = I915_MAX_CONSTANT;
   ctx->Const.FragmentProgram.MaxNativeAluInstructions = I915_MAX_ALU_INSN;
   ctx->Const.FragmentProgram.MaxNativeTexInstructions = I915_MAX_TEX_INSN;
   ctx->Const.FragmentProgram.MaxNativeInstructions = (I915_MAX_ALU_INSN + 
-						I915_MAX_TEX_INSN);
+						       I915_MAX_TEX_INSN);
   ctx->Const.FragmentProgram.MaxNativeTexIndirections = I915_MAX_TEX_INDIRECT;
   ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* I don't think we have one */

--- a/src/mesa/drivers/dri/i915/i915_context.h
+++ b/src/mesa/drivers/dri/i915/i915_context.h
@@ -43,6 +43,7 @@
 #define I915_UPLOAD_PROGRAM          0x8
 #define I915_UPLOAD_CONSTANTS        0x10
 #define I915_UPLOAD_FOG              0x20
+#define I915_UPLOAD_INVARIENT        0x40
 #define I915_UPLOAD_TEX(i)           (0x00010000<<(i))
 #define I915_UPLOAD_TEX_ALL          (0x00ff0000)
 #define I915_UPLOAD_TEX_0_SHIFT      16
@@ -163,8 +164,6 @@ struct i915_fragment_program {
   GLuint nr_params;
      

-
-
   /* Helpers for i915_texprog.c:
    */
   GLuint src_texture;		/* Reg containing sampled texture color,
@@ -185,13 +184,6 @@ struct i915_fragment_program {



-struct i915_texture_object
-{
-   struct intel_texture_object intel;
-   GLenum lastTarget;
-   GLboolean refs_border_color;
-   GLuint Setup[I915_TEX_SETUP_SIZE];
-};

 #define I915_TEX_UNITS 8

@@ -220,6 +212,8 @@ struct i915_context

   GLuint last_ReallyEnabled;
   GLuint vertex_fog;
+   GLuint lodbias_ss2[MAX_TEXTURE_UNITS];
+

   struct i915_fragment_program tex_program;
   struct i915_fragment_program *current_program;
@@ -229,10 +223,6 @@ struct i915_context


 typedef struct i915_context *i915ContextPtr;
-typedef struct i915_texture_object *i915TextureObjectPtr;
-
-#define I915_CONTEXT(ctx)	((i915ContextPtr)(ctx))
-


 #define I915_STATECHANGE(i915, flag)					\
@@ -317,7 +307,6 @@ extern void i915_update_fog( GLcontext *ctx );
 */
 extern void i915UpdateTextureState( intelContextPtr intel );
 extern void i915InitTextureFuncs( struct dd_function_table *functions );
-extern intelTextureObjectPtr i915AllocTexObj( struct gl_texture_object *texObj );

 /*======================================================================
 * i915_metaops.c
@@ -346,6 +335,22 @@ i915ClearWithTris( intelContextPtr intel, GLbitfield mask,
 */
 extern void i915ValidateFragmentProgram( i915ContextPtr i915 );
 extern void i915InitFragProgFuncs( struct dd_function_table *functions );
+
+/*======================================================================
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static inline struct i915_context *
+i915_context( GLcontext *ctx )
+{
+   return (struct i915_context *)ctx;
+}
+
+
+
+#define I915_CONTEXT(ctx)	i915_context(ctx)
+
+
 	
 #endif

--- a/src/mesa/drivers/dri/i915/i915_metaops.c
+++ b/src/mesa/drivers/dri/i915/i915_metaops.c
@@ -387,6 +387,7 @@ static void draw_quad(i915ContextPtr i915,
 		      GLfloat s0, GLfloat s1,
 		      GLfloat t0, GLfloat t1 )
 {
+#if 0
   GLuint vertex_size = 8;
   GLuint *vb = intelEmitInlinePrimitiveLocked( &i915->intel, 
 						PRIM3D_TRIFAN, 
@@ -440,6 +441,7 @@ static void draw_quad(i915ContextPtr i915,
   tmp.v.u0 = s0;
   for (i = 0 ; i < vertex_size ; i++)
      vb[i] = tmp.ui[i];
+#endif
 }

 void 
@@ -447,7 +449,7 @@ i915ClearWithTris(intelContextPtr intel, GLbitfield mask,
 		  GLboolean all,
 		  GLint cx, GLint cy, GLint cw, GLint ch)
 {
-   i915ContextPtr i915 = I915_CONTEXT( intel );
+   i915ContextPtr i915 = i915_context( &intel->ctx );
   __DRIdrawablePrivate *dPriv = intel->driDrawable;
   intelScreenPrivate *screen = intel->intelScreen;
   int x0, y0, x1, y1;
--- a/src/mesa/drivers/dri/i915/i915_tex.c
+++ b/src/mesa/drivers/dri/i915/i915_tex.c
@@ -45,76 +45,10 @@



-
-
-
-/**
- * Allocate space for and load the mesa images into the texture memory block.
- * This will happen before drawing with a new texture, or drawing with a
- * texture after it was swapped out or teximaged again.
- */
-
-intelTextureObjectPtr i915AllocTexObj( struct gl_texture_object *texObj )
-{
-   i915TextureObjectPtr t = CALLOC_STRUCT( i915_texture_object );
-   if ( !t ) 
-      return NULL;
-
-   texObj->DriverData = t;
-   t->intel.base.tObj = texObj;
-   t->intel.dirty = I915_UPLOAD_TEX_ALL;
-   make_empty_list( &t->intel.base );
-   return &t->intel;
-}
-
-
-static void i915TexParameter( GLcontext *ctx, GLenum target,
-			     struct gl_texture_object *tObj,
-			     GLenum pname, const GLfloat *params )
-{
-   i915TextureObjectPtr t = (i915TextureObjectPtr) tObj->DriverData;
- 
-   switch (pname) {
-   case GL_TEXTURE_MIN_FILTER:
-   case GL_TEXTURE_MAG_FILTER:
-   case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-   case GL_TEXTURE_WRAP_S:
-   case GL_TEXTURE_WRAP_T:
-   case GL_TEXTURE_WRAP_R:
-   case GL_TEXTURE_BORDER_COLOR:
-      t->intel.dirty = I915_UPLOAD_TEX_ALL;
-      break;
-
-   case GL_TEXTURE_COMPARE_MODE:
-      t->intel.dirty = I915_UPLOAD_TEX_ALL;
-      break;
-   case GL_TEXTURE_COMPARE_FUNC:
-      t->intel.dirty = I915_UPLOAD_TEX_ALL;
-      break;
-
-   case GL_TEXTURE_BASE_LEVEL:
-   case GL_TEXTURE_MAX_LEVEL:
-   case GL_TEXTURE_MIN_LOD:
-   case GL_TEXTURE_MAX_LOD:
-      /* The i915 and its successors can do a lot of this without
-       * reloading the textures.  A project for someone?
-       */
-      intelFlush( ctx );
-      driSwapOutTextureObject( (driTextureObject *) t );
-      t->intel.dirty = I915_UPLOAD_TEX_ALL;
-      break;
-
-   default:
-      return;
-   }
-}
-
-
 static void i915TexEnv( GLcontext *ctx, GLenum target, 
 			GLenum pname, const GLfloat *param )
 {
   i915ContextPtr i915 = I915_CONTEXT( ctx );
-   GLuint unit = ctx->Texture.CurrentUnit;

   switch (pname) {
   case GL_TEXTURE_ENV_COLOR: 	/* Should be a tracked param */
@@ -139,13 +73,12 @@ static void i915TexEnv( GLcontext *ctx, GLenum target,
      break;

   case GL_TEXTURE_LOD_BIAS: {
-      int b = (int) ((*param) * 16.0);
+      GLuint unit = ctx->Texture.CurrentUnit;
+      GLint b = (int) ((*param) * 16.0);
      if (b > 255) b = 255;
      if (b < -256) b = -256;
      I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
-      i915->state.Tex[unit][I915_TEXREG_SS2] &= ~SS2_LOD_BIAS_MASK;
-      i915->state.Tex[unit][I915_TEXREG_SS2] |= 
-	 ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK);
+      i915->lodbias_ss2[unit] = ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK);
      break;
   }

@@ -156,15 +89,8 @@ static void i915TexEnv( GLcontext *ctx, GLenum target,


 static void i915BindTexture( GLcontext *ctx, GLenum target,
-			    struct gl_texture_object *texObj )
+                             struct gl_texture_object *texobj )
 {
-   i915TextureObjectPtr tex = (i915TextureObjectPtr)texObj->DriverData;
-
-   if (tex->lastTarget != texObj->Target) {
-      tex->intel.dirty = I915_UPLOAD_TEX_ALL;
-      tex->lastTarget = texObj->Target;
-   }
-
   /* Need this if image format changes between bound textures.
    * Could try and shortcircuit by checking for differences in
    * state between incoming and outgoing textures:
@@ -178,5 +104,4 @@ void i915InitTextureFuncs( struct dd_function_table *functions )
 {
   functions->BindTexture = i915BindTexture;
   functions->TexEnv = i915TexEnv;
-   functions->TexParameter = i915TexParameter;
 }
--- a/src/mesa/drivers/dri/i915/i915_tex_layout.c
+++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c
@@ -0,0 +1,340 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/* Code to layout images in a mipmap tree for i915 and i945
+ * respectively.
+ */
+
+#include "intel_mipmap_tree.h"
+#include "macros.h"
+
+static GLint initial_offsets[6][2] = { {0,0},
+				       {0,2},
+				       {1,0},
+				       {1,2},
+				       {1,1},
+				       {1,3} };
+
+
+static GLint step_offsets[6][2] = { {0,2},
+				    {0,2},
+				    {-1,2},
+				    {-1,2},
+				    {-1,1},
+				    {-1,1} };
+
+static GLuint minify( GLuint d )
+{
+   return MAX2(1, d>>1);
+}
+
+GLboolean i915_miptree_layout( struct intel_mipmap_tree *mt )
+{
+   GLint i;
+
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP: {
+      const GLuint dim = mt->width0;
+      GLuint face;
+
+      /* double pitch for cube layouts */
+      mt->pitch = ((dim * mt->cpp * 2 + 3) & ~3) / mt->cpp;
+      mt->total_height = dim * 4;
+      
+      for ( face = 0 ; face < 6 ; face++) {
+	 GLuint x = initial_offsets[face][0] * dim;
+	 GLuint y = initial_offsets[face][1] * dim;
+	 GLuint d = dim;
+	 
+	 for (i = mt->first_level; i <= mt->last_level; i++) {
+	    mt->offset[face][i].x = x; 
+	    mt->offset[face][i].y = y; 
+	    mt->offset[face][i].width = d;
+	    mt->offset[face][i].height = d;
+	    mt->offset[face][i].depth = 1;
+	 
+	    d >>= 1;
+	    assert(d > 0);
+
+	    x += step_offsets[face][0] * d;
+	    y += step_offsets[face][1] * d;
+	 }
+      }
+      break;
+   }
+   case GL_TEXTURE_3D: {
+      GLuint width  = mt->width0;
+      GLuint height = mt->height0;
+      GLuint depth  = mt->depth0;
+
+      /* Calculate the size of a single slice.  Hardware demands a
+       * minimum of 8 mipmaps, some of which might ultimately not be
+       * used:
+       */
+      mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp;
+      mt->total_height = 0;
+
+      /* XXX: fixme! hardware expects/requires 9 levels at minimum.
+       */
+      for ( i = mt->first_level ; i <= mt->last_level ; i++ ) {
+	 mt->offset[0][i].x = 0;
+	 mt->offset[0][i].y = mt->total_height;
+	 mt->offset[0][i].width = width;
+	 mt->offset[0][i].height = height;
+	 mt->offset[0][i].depth = depth;
+
+	 mt->total_height += MAX2(2, height);
+
+	 width  = minify(width);
+	 height = minify(height);
+	 depth  = minify(depth);
+      }
+
+
+      /* Multiply slice size by texture depth for total size.  It's
+       * remarkable how wasteful of memory the i915 texture layouts
+       * are.  They are largely fixed in the i945.
+       */
+      mt->depth_pitch = mt->total_height * mt->pitch;
+      mt->total_height *= mt->depth0;
+      break;
+   }
+
+   default: {
+      GLuint width  = mt->width0;
+      GLuint height = mt->height0;
+
+      mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp;
+      mt->total_height = 0;
+
+      for ( i = mt->first_level ; i <= mt->last_level ; i++ ) {
+	 mt->offset[0][i].x = 0;
+	 mt->offset[0][i].y = mt->total_height;
+	 mt->offset[0][i].height = height;
+	 mt->offset[0][i].width = width;
+	 mt->offset[0][i].depth = 1;
+
+	 
+	 if (mt->compressed)
+	    mt->total_height += MAX2(1, height/4);
+	 else
+	    mt->total_height += MAX2(2, height);
+
+	 width  = minify(width);
+	 height = minify(height);
+      }
+      break;
+   }
+   }
+   DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, 
+		mt->pitch, 
+		mt->total_height,
+		mt->cpp,
+		mt->pitch * mt->total_height * mt->cpp );
+
+   return GL_TRUE;
+}
+
+
+GLboolean i945_miptree_layout( struct intel_mipmap_tree *mt )
+{
+   GLint i;
+
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP: {
+      const GLuint dim = mt->width0;
+      GLuint face;
+
+      /* Depending on the size of the largest images, pitch can be
+       * determined either by the old-style packing of cubemap faces,
+       * or the final row of 4x4, 2x2 and 1x1 faces below this. 
+       */
+      if (dim > 32) 
+	 mt->pitch = ((dim * mt->cpp * 2 + 3) & ~3) / mt->cpp;
+      else 
+	 mt->pitch = 14 * 8; 
+
+      mt->total_height = dim * 4 + 4;
+
+      
+      for ( face = 0 ; face < 6 ; face++) {
+	 GLuint x = initial_offsets[face][0] * dim;
+	 GLuint y = initial_offsets[face][1] * dim;
+	 GLuint d = dim;
+	 
+	 if (dim == 4 && face >= 4) {
+	    y = mt->total_height - 4;
+	    x = (face - 4) * 8;
+	 }
+	 else if (dim < 4) {
+	    y = mt->total_height - 4;
+	    x = face * 8;
+	 }
+
+	 for ( i = mt->first_level ; i <= mt->last_level ; i++ ) {
+	    mt->offset[face][i].x = x; 
+	    mt->offset[face][i].y = y; 
+	    mt->offset[face][i].width = d;
+	    mt->offset[face][i].height = d;
+	    mt->offset[face][i].depth = 1;
+
+	    d >>= 1;
+	    assert(d > 0);
+	    
+	    switch (d) {
+	    case 4:
+	       switch (face) {
+	       case FACE_POS_X:
+	       case FACE_NEG_X:
+		  x += step_offsets[face][0] * d;
+		  y += step_offsets[face][1] * d;
+		  break;
+	       case FACE_POS_Y:
+	       case FACE_NEG_Y:
+		  y += 12;
+		  x -= 8;
+		  break;
+	       case FACE_POS_Z:
+	       case FACE_NEG_Z:
+		  y = mt->total_height - 4;
+		  x = (face - 4) * 8;
+		  break;
+	       }
+
+	    case 2:
+	       y = mt->total_height - 4;
+	       x = 16 + face * 8;
+	       break;
+
+	    case 1:
+	       x += 48;
+	       break;
+	       
+	    default:
+	       x += step_offsets[face][0] * d;
+	       y += step_offsets[face][1] * d;
+	       break;
+	    }
+	 }
+      }
+      break;
+   }
+   case GL_TEXTURE_3D: {
+      GLuint width  = mt->width0;
+      GLuint height = mt->height0;
+      GLuint depth = mt->depth0;
+      GLuint depth_pack_pitch;
+      GLuint depth_packing = 0;
+
+      mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp;
+      mt->total_height = 0;
+
+      depth_pack_pitch = mt->pitch * mt->cpp;
+
+      for ( i = mt->first_level ; i <= mt->last_level ; i++ ) {
+
+	 mt->offset[0][i].x = 0;
+	 mt->offset[0][i].y = mt->total_height;
+	 mt->offset[0][i].width = width;
+	 mt->offset[0][i].height = height;
+	 mt->offset[0][i].depth = depth;
+
+	 mt->total_height += MAX2(2, height) * MAX2((depth >> depth_packing), 1);
+
+	 /* When alignment dominates, can't increase depth packing?
+	  * Or does pitch grow???  What are the alignment constraints,
+	  * anyway?
+	  */
+	 if (depth_pack_pitch > 4) {
+	    depth_packing++;
+	    depth_pack_pitch <<= 2; /* KW: is this right?? */
+	 }
+
+	 width  = minify(width);
+	 height = minify(height);
+	 depth  = minify(depth);
+
+	 /* XXX: Not sure how 3d textures work on i945 - where did
+	  * t->depth_pitch get set in the old code.  Did it ever work?
+	  * Fix up later.
+	  */
+      }
+      break;
+   }
+
+   default: {
+      GLuint x = 0;
+      GLuint y = 0;
+      GLuint width = mt->width0;
+      GLuint height = mt->height0;
+
+      mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp;
+      mt->total_height = 0;
+
+      for ( i = mt->first_level ; i <= mt->last_level ; i++ ) {
+	 mt->offset[0][i].x = x;
+	 mt->offset[0][i].y = y;
+	 mt->offset[0][i].height = height;
+	 mt->offset[0][i].width = width;
+	 mt->offset[0][i].depth = 1;
+
+	 
+	 /* LPT change: step right after second mipmap.
+	  */
+	 if (i == 1) 
+	    x += mt->pitch / 2;
+	 else {
+	    GLuint img_height;
+	 
+	    if (mt->compressed)
+	       img_height = MAX2(1, height/4);
+	    else
+	       img_height = MAX2(2, height);
+
+	    y += img_height;
+	 }
+
+	 /* Because the images are packed better, the final offset
+	  * might not be the maximal one:
+	  */
+	 mt->total_height = MAX2(mt->total_height, y);
+
+	 width  = minify(width);
+	 height = minify(height);
+      }
+      break;
+   }
+   }
+   DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, 
+		mt->pitch, 
+		mt->total_height,
+		mt->cpp,
+		mt->pitch * mt->total_height * mt->cpp );
+		
+   return GL_TRUE;
+}
+
--- a/src/mesa/drivers/dri/i915/i915_texstate.c
+++ b/src/mesa/drivers/dri/i915/i915_texstate.c
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -37,6 +37,7 @@
 #include "tnl/t_vertex.h"

 #include "intel_batchbuffer.h"
+#include "intel_tex.h"

 #include "i915_reg.h"
 #include "i915_context.h"
@@ -44,7 +45,7 @@
 static void i915_render_start( intelContextPtr intel )
 {
   GLcontext *ctx = &intel->ctx;
-   i915ContextPtr i915 = I915_CONTEXT(intel);
+   i915ContextPtr i915 = i915_context(&intel->ctx);

   if (ctx->FragmentProgram._Active) 
      i915ValidateFragmentProgram( i915 );
@@ -56,7 +57,7 @@ static void i915_render_start( intelContextPtr intel )
 static void i915_reduced_primitive_state( intelContextPtr intel,
 					  GLenum rprim )
 {
-    i915ContextPtr i915 = I915_CONTEXT(intel);
+   i915ContextPtr i915 = i915_context(&intel->ctx);
    GLuint st1 = i915->state.Stipple[I915_STPREG_ST1];

    st1 &= ~ST1_ENABLE;
@@ -88,7 +89,7 @@ static void i915_reduced_primitive_state( intelContextPtr intel,
 static GLboolean i915_check_vertex_size( intelContextPtr intel,
 					 GLuint expected )
 {
-   i915ContextPtr i915 = I915_CONTEXT(intel);
+   i915ContextPtr i915 = i915_context(&intel->ctx);
   int lis2 = i915->current->Ctx[I915_CTXREG_LIS2];
   int lis4 = i915->current->Ctx[I915_CTXREG_LIS4];
   int i, sz = 0;
@@ -218,7 +219,7 @@ do {							\
 */
 static void i915_emit_state( intelContextPtr intel )
 {
-   i915ContextPtr i915 = I915_CONTEXT(intel);
+   i915ContextPtr i915 = i915_context(&intel->ctx);
   struct i915_hw_state *state = i915->current;
   int i;
   GLuint dirty;
@@ -236,6 +237,11 @@ static void i915_emit_state( intelContextPtr intel )
   if (VERBOSE) 
      fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty);

+   if (dirty & I915_UPLOAD_INVARIENT) {
+      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_INVARIENT:\n"); 
+      i915_emit_invarient_state( intel );
+   }
+
   if (dirty & I915_UPLOAD_CTX) {
      if (VERBOSE) fprintf(stderr, "I915_UPLOAD_CTX:\n"); 
      emit( i915, state->Ctx, sizeof(state->Ctx) );
@@ -271,7 +277,9 @@ static void i915_emit_state( intelContextPtr intel )
      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
      for (i = 0 ; i < I915_TEX_UNITS ; i++)
 	 if (dirty & I915_UPLOAD_TEX(i)) {
-	    OUT_BATCH(state->Tex[i][I915_TEXREG_MS2]);
+	    /* Emit zero texture offset, will fixup before firing */
+	    intel_add_texoffset_fixup(intel, i, (GLuint *)batch_ptr); 
+	    batch_ptr += 4;
 	    OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
 	    OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
 	 }
@@ -314,14 +322,15 @@ static void i915_destroy_context( intelContextPtr intel )

 static void i915_set_draw_offset( intelContextPtr intel, int offset )
 {
-   i915ContextPtr i915 = I915_CONTEXT(intel);
+   i915ContextPtr i915 = i915_context(&intel->ctx);
   I915_STATECHANGE( i915, I915_UPLOAD_BUFFERS );
   i915->state.Buffer[I915_DESTREG_CBUFADDR2] = offset;
 }

 static void i915_lost_hardware( intelContextPtr intel )
 {
-   I915_CONTEXT(intel)->state.emitted = 0;
+   i915ContextPtr i915 = i915_context(&intel->ctx);
+   i915->state.emitted = 0;
 }

 static void i915_emit_flush( intelContextPtr intel )
@@ -337,7 +346,6 @@ static void i915_emit_flush( intelContextPtr intel )

 void i915InitVtbl( i915ContextPtr i915 )
 {
-   i915->intel.vtbl.alloc_tex_obj = i915AllocTexObj;
   i915->intel.vtbl.check_vertex_size = i915_check_vertex_size;
   i915->intel.vtbl.clear_with_tris = i915ClearWithTris;
   i915->intel.vtbl.destroy = i915_destroy_context;
--- a/src/mesa/drivers/dri/i915/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.c
@@ -37,283 +37,10 @@
 #include "intel_batchbuffer.h"
 #include "intel_context.h"

+#include "bufmgr.h"



-/* ================================================================
- * Performance monitoring functions
- */
-
-static void intel_fill_box( intelContextPtr intel,
-			    GLshort x, GLshort y,
-			    GLshort w, GLshort h,
-			    GLubyte r, GLubyte g, GLubyte b )
-{
-   intelEmitFillBlitLocked( intel, 
-			    intel->intelScreen->cpp,
-			    intel->intelScreen->back.pitch,
-			    intel->intelScreen->front.offset,
-			    x, y, w, h,
-			    INTEL_PACKCOLOR(intel->intelScreen->fbFormat,
-					    r,g,b,0xff));
-}
-
-static void intel_draw_performance_boxes( intelContextPtr intel )
-{
-   /* Purple box for page flipping
-    */
-   if ( intel->perf_boxes & I830_BOX_FLIP ) 
-      intel_fill_box( intel, 4, 4, 8, 8, 255, 0, 255 );
-
-   /* Red box if we have to wait for idle at any point
-    */
-   if ( intel->perf_boxes & I830_BOX_WAIT ) 
-      intel_fill_box( intel, 16, 4, 8, 8, 255, 0, 0 );
-
-   /* Blue box: lost context?
-    */
-   if ( intel->perf_boxes & I830_BOX_LOST_CONTEXT ) 
-      intel_fill_box( intel, 28, 4, 8, 8, 0, 0, 255 );
-
-   /* Yellow box for texture swaps
-    */
-   if ( intel->perf_boxes & I830_BOX_TEXTURE_LOAD ) 
-      intel_fill_box( intel, 40, 4, 8, 8, 255, 255, 0 );
-
-   /* Green box if hardware never idles (as far as we can tell)
-    */
-   if ( !(intel->perf_boxes & I830_BOX_RING_EMPTY) ) 
-      intel_fill_box( intel, 64, 4, 8, 8, 0, 255, 0 );
-
-
-   /* Draw bars indicating number of buffers allocated 
-    * (not a great measure, easily confused)
-    */
-#if 0
-   if (intel->dma_used) {
-      int bar = intel->dma_used / 10240;
-      if (bar > 100) bar = 100;
-      if (bar < 1) bar = 1;
-      intel_fill_box( intel, 4, 16, bar, 4, 196, 128, 128 );
-      intel->dma_used = 0;
-   }
-#endif
-
-   intel->perf_boxes = 0;
-}
-
-
-
-
-
-
-static int bad_prim_vertex_nr( int primitive, int nr )
-{
-   switch (primitive & PRIM3D_MASK) {
-   case PRIM3D_POINTLIST:
-      return nr < 1;
-   case PRIM3D_LINELIST:
-      return (nr & 1) || nr == 0;
-   case PRIM3D_LINESTRIP:
-      return nr < 2;
-   case PRIM3D_TRILIST:
-   case PRIM3D_RECTLIST:
-      return nr % 3 || nr == 0;
-   case PRIM3D_POLY:
-   case PRIM3D_TRIFAN:
-   case PRIM3D_TRISTRIP:
-   case PRIM3D_TRISTRIP_RVRSE:
-      return nr < 3;
-   default:
-      return 1;
-   }	
-}
-
-static void intel_flush_inline_primitive( GLcontext *ctx )
-{
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
-   GLuint used = intel->batch.ptr - intel->prim.start_ptr;
-   GLuint vertcount;
-
-   assert(intel->prim.primitive != ~0);
-
-   if (1) {
-      /* Check vertex size against the vertex we're specifying to
-       * hardware.  If it's wrong, ditch the primitive.
-       */ 
-      if (!intel->vtbl.check_vertex_size( intel, intel->vertex_size )) 
-	 goto do_discard;
-
-      vertcount = (used - 4)/ (intel->vertex_size * 4);
-
-      if (!vertcount)
-	 goto do_discard;
-      
-      if (vertcount * intel->vertex_size * 4 != used - 4) {
-	 fprintf(stderr, "vertex size confusion %d %d\n", used, 
-		 intel->vertex_size * vertcount * 4);
-	 goto do_discard;
-      }
-
-      if (bad_prim_vertex_nr( intel->prim.primitive, vertcount )) {
-	 fprintf(stderr, "bad_prim_vertex_nr %x %d\n", intel->prim.primitive,
-		 vertcount);
-	 goto do_discard;
-      }
-   }
-
-   if (used < 8)
-      goto do_discard;
-
-   *(int *)intel->prim.start_ptr = (_3DPRIMITIVE | 
-				    intel->prim.primitive |
-				    (used/4-2));
-
-   goto finished;
-   
- do_discard:
-   intel->batch.ptr -= used;
-   intel->batch.space += used;
-   assert(intel->batch.space >= 0);
-
- finished:
-   intel->prim.primitive = ~0;
-   intel->prim.start_ptr = 0;
-   intel->prim.flush = 0;
-}
-
-
-/* Emit a primitive referencing vertices in a vertex buffer.
- */
-void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim )
-{
-   BATCH_LOCALS;
-
-   if (0)
-      fprintf(stderr, "%s %x\n", __FUNCTION__, prim);
-
-
-   /* Finish any in-progress primitive:
-    */
-   INTEL_FIREVERTICES( intel );
-   
-   /* Emit outstanding state:
-    */
-   intel->vtbl.emit_state( intel );
-   
-   /* Make sure there is some space in this buffer:
-    */
-   if (intel->vertex_size * 10 * sizeof(GLuint) >= intel->batch.space)
-      intelFlushBatch(intel, GL_TRUE); 
-
-
-#if 1
-   if (((int)intel->batch.ptr) & 0x4) {
-      BEGIN_BATCH(1);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-#endif
-
-   /* Emit a slot which will be filled with the inline primitive
-    * command later.
-    */
-   BEGIN_BATCH(2);
-   OUT_BATCH( 0 );
-
-   intel->prim.start_ptr = batch_ptr;
-   intel->prim.primitive = prim;
-   intel->prim.flush = intel_flush_inline_primitive;
-
-   OUT_BATCH( 0 );
-   ADVANCE_BATCH();
-}
-
-
-void intelRestartInlinePrimitive( intelContextPtr intel )
-{
-   GLuint prim = intel->prim.primitive;
-
-   intel_flush_inline_primitive( &intel->ctx );
-   if (1) intelFlushBatch(intel, GL_TRUE); /* GL_TRUE - is critical */
-   intelStartInlinePrimitive( intel, prim );
-}
-
-
-
-void intelWrapInlinePrimitive( intelContextPtr intel )
-{
-   GLuint prim = intel->prim.primitive;
-
-   if (0)
-      fprintf(stderr, "%s\n", __FUNCTION__);
-   intel_flush_inline_primitive( &intel->ctx );
-   intelFlushBatch(intel, GL_TRUE);
-   intelStartInlinePrimitive( intel, prim );
-}
-
-
-/* Emit a primitive with space for inline vertices.
- */
-GLuint *intelEmitInlinePrimitiveLocked(intelContextPtr intel, 
-				       int primitive,
-				       int dwords,
-				       int vertex_size )
-{
-   GLuint *tmp = 0;
-   BATCH_LOCALS;
-
-   if (0)
-      fprintf(stderr, "%s 0x%x %d\n", __FUNCTION__, primitive, dwords);
-
-   /* Emit outstanding state:
-    */
-   intel->vtbl.emit_state( intel );
-
-
-   if (1) {
-      int used = dwords * 4;
-      int vertcount;
-
-      /* Check vertex size against the vertex we're specifying to
-       * hardware.  If it's wrong, ditch the primitive.
-       */ 
-      if (!intel->vtbl.check_vertex_size( intel, vertex_size )) 
-	 goto do_discard;
-
-      vertcount = dwords / vertex_size;
-      
-      if (dwords % vertex_size) {
-	 fprintf(stderr, "did not request a whole number of vertices\n");
-	 goto do_discard;
-      }
-
-      if (bad_prim_vertex_nr( primitive, vertcount )) {
-	 fprintf(stderr, "bad_prim_vertex_nr %x %d\n", primitive, vertcount);
-	 goto do_discard;
-      }
-
-      if (used < 8)
-	 goto do_discard;
-   }
-
-   /* Emit 3D_PRIMITIVE commands:
-    */
-   BEGIN_BATCH(1 + dwords);
-   OUT_BATCH( _3DPRIMITIVE | 
-	      primitive |
-	      (dwords-1) );
-
-   tmp = (GLuint *)batch_ptr;
-   batch_ptr += dwords * 4;
-
-   ADVANCE_BATCH();
-
- do_discard:
-   return tmp;
-}
-
-

 /*
 * Copy the back buffer to the front buffer. 
@@ -333,6 +60,9 @@ void intelCopyBuffer( const __DRIdrawablePrivate *dPriv )

   intelFlush( &intel->ctx );
   LOCK_HARDWARE( intel );
+   intelInstallBatchBuffer(intel);   
+   intelValidateBuffers( intel );
+
   {
      intelScreenPrivate *intelScreen = intel->intelScreen;
      __DRIdrawablePrivate *dPriv = intel->driDrawable;
@@ -360,9 +90,6 @@ void intelCopyBuffer( const __DRIdrawablePrivate *dPriv )
 	 break;
      }

-      if (0)
-	 intel_draw_performance_boxes( intel );
-
      for (i = 0 ; i < nbox; i++, pbox++) 
      {
 	 if (pbox->x1 > pbox->x2 ||
@@ -394,6 +121,8 @@ void intelCopyBuffer( const __DRIdrawablePrivate *dPriv )
      }
   }
   intelFlushBatchLocked( intel, GL_TRUE, GL_TRUE, GL_TRUE );
+   assert(intel->buffer_list == NULL);
+
   UNLOCK_HARDWARE( intel );
 }

@@ -457,6 +186,14 @@ void intelEmitCopyBlitLocked( intelContextPtr intel,
   int dst_x2 = dst_x + w;
   BATCH_LOCALS;

+
+   if (0)
+      _mesa_printf("%s src:0x%x/%d %d,%d dst:0x%x/%d %d,%d sz:%dx%d\n",
+		   __FUNCTION__,
+		   src_offset, src_pitch, src_x, src_y,
+		   dst_offset, dst_pitch, dst_x, dst_y,
+		   w,h);
+
   src_pitch *= cpp;
   dst_pitch *= cpp;

@@ -542,6 +279,8 @@ void intelClearWithBlit(GLcontext *ctx, GLbitfield flags, GLboolean all,

   intelFlush( &intel->ctx );
   LOCK_HARDWARE( intel );
+   intelInstallBatchBuffer(intel);   
+   intelValidateBuffers( intel );
   {
      /* flip top to bottom */
      cy = intel->driDrawable->h-cy1-ch;
@@ -631,58 +370,72 @@ void intelClearWithBlit(GLcontext *ctx, GLbitfield flags, GLboolean all,



-void intelDestroyBatchBuffer( GLcontext *ctx )
+void intelDestroyBatchBuffer( struct intel_context *intel )
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-
-   if (intel->alloc.ptr) {
-      intelFreeAGP( intel, intel->alloc.ptr );
-      intel->alloc.ptr = 0;
-   }
 }


-void intelInitBatchBuffer( GLcontext *ctx )
+void intelInstallBatchBuffer( struct intel_context *intel )
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   assert(!intel->batch.ptr);

-   if (!intel->intelScreen->allow_batchbuffer || getenv("INTEL_NO_BATCH")) {
-      intel->alloc.size = 8 * 1024;
-      intel->alloc.ptr = malloc( intel->alloc.size );
-      intel->alloc.offset = 0;
-   }
-   else {
-      switch (intel->intelScreen->deviceID) {
-      case PCI_CHIP_I865_G:
-	 /* HW bug?  Seems to crash if batchbuffer crosses 4k boundary.
-	  */
-	 intel->alloc.size = 8 * 1024; 
-	 break;
-      default:
-	 /* This is the smallest amount of memory the kernel deals with.
-	  * We'd ideally like to make this smaller.
-	  */
-	 intel->alloc.size = 1 << intel->intelScreen->logTextureGranularity;
-	 break;
+   intel->alloc.current++;
+   intel->alloc.current %= INTEL_ALLOC_NR;
+
+   DBG("%s: %d\n", __FUNCTION__, intel->alloc.current);
+
+   intel->batch.size = INTEL_ALLOC_SIZE;
+   intel->batch.space = intel->batch.size;
+   intel->batch.start_offset = 0;
+
+   intel->batch.ptr = bmMapBuffer( intel->bm, 
+				   intel->alloc.buffer[intel->alloc.current],
+				   BM_WRITE | BM_MEM_AGP );
+
+
+   assert(!intel->buffer_list);
+   intel->buffer_list = bmNewBufferList();
+      
+   /* Add the batchbuffer 
+    */
+   bmAddBuffer(intel->buffer_list,
+	       intel->alloc.buffer[intel->alloc.current],
+	       BM_READ,
+	       NULL,
+	       &intel->batch.start_offset);
+
+
+   if (0) {
+      static int foo;
+      if (foo++ > 10) {
+	 _mesa_printf("foo\n");
+	 exit(1);
      }
-
-      intel->alloc.ptr = intelAllocateAGP( intel, intel->alloc.size );
-      if (intel->alloc.ptr)
-	 intel->alloc.offset = 
-	    intelAgpOffsetFromVirtual( intel, intel->alloc.ptr );
-   }
-
-   if (!intel->alloc.ptr) {
-      FALLBACK(intel, INTEL_FALLBACK_NO_BATCHBUFFER, 1);
-   }
-   else {
-      intel->prim.flush = 0;
-      intel->vtbl.emit_invarient_state( intel );
-
-      /* Make sure this gets to the hardware, even if we have no cliprects:
-       */
-      LOCK_HARDWARE( intel );
-      intelFlushBatchLocked( intel, GL_TRUE, GL_FALSE, GL_TRUE );
-      UNLOCK_HARDWARE( intel );
   }
 }
+
+void intelInitBatchBuffer( struct intel_context *intel )
+{
+   GLint i;
+
+   _mesa_printf("%s: %d\n", __FUNCTION__, intel->alloc.current);
+   bmGenBuffers(intel->bm,
+		INTEL_ALLOC_NR,
+		intel->alloc.buffer);
+
+   for (i = 0; i < INTEL_ALLOC_NR; i++)
+      bmBufferData(intel->bm, 
+		   intel->alloc.buffer[i],
+		   INTEL_ALLOC_SIZE,
+		   NULL,
+		   BM_MEM_AGP);
+		
+
+}
+
+
+void intelValidateBuffers( struct intel_context *intel )
+{
+   if (!bmValidateBufferList(intel->bm, intel->buffer_list, BM_MEM_AGP))
+      assert(0);
+}
--- a/src/mesa/drivers/dri/i915/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.h
@@ -34,10 +34,7 @@

 #define BATCH_LOCALS	GLubyte *batch_ptr;

-/* #define VERBOSE 0 */
-#ifndef VERBOSE
-extern int VERBOSE;
-#endif
+#define VERBOSE 0


 #define BEGIN_BATCH(n)							\
@@ -45,6 +42,7 @@ do {									\
   if (VERBOSE) fprintf(stderr, 					\
 			"BEGIN_BATCH(%d) in %s, %d dwords free\n",	\
 			(n), __FUNCTION__, intel->batch.space/4);	\
+   assert(intel->locked); \
   if (intel->batch.space < (n)*4)					\
      intelFlushBatch(intel, GL_TRUE);					\
   batch_ptr = intel->batch.ptr;					\
@@ -65,12 +63,14 @@ do {								\
   assert(intel->batch.space >= 0);				\
 } while(0)

-extern void intelInitBatchBuffer( GLcontext *ctx );
-extern void intelDestroyBatchBuffer( GLcontext *ctx );
+extern void intelInitBatchBuffer( struct intel_context *intel );
+extern void intelDestroyBatchBuffer( struct intel_context *intel );
+
+void intelInstallBatchBuffer( struct intel_context *intel );
+

 extern void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim );
 extern void intelWrapInlinePrimitive( intelContextPtr intel );
-extern void intelRestartInlinePrimitive( intelContextPtr intel );
 extern GLuint *intelEmitInlinePrimitiveLocked(intelContextPtr intel, 
 					      int primitive, int dwords,
 					      int vertex_size);
@@ -99,25 +99,8 @@ extern void intelEmitFillBlitLocked( intelContextPtr intel,



-static __inline GLuint *intelExtendInlinePrimitive( intelContextPtr intel, 
-						GLuint dwords )
-{
-   GLuint sz = dwords * sizeof(GLuint);
-   GLuint *ptr;
-
-   if (intel->batch.space < sz) {
-      intelWrapInlinePrimitive( intel );
-/*       assert(intel->batch.space >= sz); */
-   }
-
-/*    assert(intel->prim.primitive != ~0); */
-   ptr = (GLuint *)intel->batch.ptr;
-   intel->batch.ptr += sz;
-   intel->batch.space -= sz;
-
-   return ptr;
-}
-
-
+GLuint *intelExtendInlinePrimitive( intelContextPtr intel, 
+				    GLuint dwords );
+void intelValidateBuffers( struct intel_context *intel );

 #endif
--- a/src/mesa/drivers/dri/i915/intel_blit.c
+++ b/src/mesa/drivers/dri/i915/intel_blit.c
--- a/src/mesa/drivers/dri/i915/intel_context.c
+++ b/src/mesa/drivers/dri/i915/intel_context.c
@@ -90,7 +90,7 @@ int prevLockLine;
 * Mesa's Driver Functions
 ***************************************/

-#define DRIVER_DATE                     "20050225"
+#define DRIVER_DATE                     "20060201"

 const GLubyte *intelGetString( GLcontext *ctx, GLenum name )
 {
@@ -197,7 +197,7 @@ const struct dri_extension card_extensions[] =
    { "GL_NV_blend_square",                NULL },
    { "GL_NV_vertex_program",              GL_NV_vertex_program_functions },
    { "GL_NV_vertex_program1_1",           NULL },
-    { "GL_SGIS_generate_mipmap",           NULL },
+/*     { "GL_SGIS_generate_mipmap",           NULL }, */
    { NULL,                                NULL }
 };

@@ -214,7 +214,7 @@ static const struct tnl_pipeline_stage *intel_pipeline[] = {
   &_tnl_point_attenuation_stage,
   &_tnl_arb_vertex_program_stage,
   &_tnl_vertex_program_stage,
-#if 1
+#if 0
   &_intel_render_stage,     /* ADD: unclipped rastersetup-to-dma */
 #endif
   &_tnl_render_stage,
@@ -300,9 +300,6 @@ GLboolean intelInitContext( intelContextPtr intel,
   intel->sarea = saPriv;


-   (void) memset( intel->texture_heaps, 0, sizeof( intel->texture_heaps ) );
-   make_empty_list( & intel->swapped );
-
   ctx->Const.MaxTextureMaxAnisotropy = 2.0;

   ctx->Const.MinLineWidth = 1.0;
@@ -371,7 +368,12 @@ GLboolean intelInitContext( intelContextPtr intel,

   _math_matrix_ctr (&intel->ViewportMatrix);

-   driInitExtensions( ctx, card_extensions, GL_TRUE );
+   /* Disable imaging extension until convolution is working in
+    * teximage paths:
+    */
+   driInitExtensions( ctx, card_extensions, 
+/* 		      GL_TRUE, */
+		      GL_FALSE);

   if (intel->ctx.Mesa_DXTn) {
     _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
@@ -387,7 +389,7 @@ GLboolean intelInitContext( intelContextPtr intel,
 /* 			  DRI_TEXMGR_DO_TEXTURE_RECT ); */


-   intel->prim.flush = intelInitBatchBuffer;
+   intel->prim.flush = NULL;
   intel->prim.primitive = ~0;


@@ -431,21 +433,14 @@ void intelDestroyContext(__DRIcontextPrivate *driContextPriv)
      _swrast_DestroyContext (&intel->ctx);
      intel->Fallback = 0;	/* don't call _swrast_Flush later */

-      intelDestroyBatchBuffer(&intel->ctx);
+      intelDestroyBatchBuffer(intel);
      

      if ( release_texture_heaps ) {
         /* This share group is about to go away, free our private
          * texture object data.
          */
-         int i;
-
-         for ( i = 0 ; i < intel->nr_heaps ; i++ ) {
-	    driDestroyTextureHeap( intel->texture_heaps[ i ] );
-	    intel->texture_heaps[ i ] = NULL;
-         }
-
-	 assert( is_empty_list( & intel->swapped ) );
+	 fprintf(stderr, "do somethign to free texture heaps\n");
      }

      /* free the Mesa context */
@@ -586,7 +581,13 @@ void intelGetLock( intelContextPtr intel, GLuint flags )
   __DRIscreenPrivate *sPriv = intel->driScreen;
   drmI830Sarea * sarea = intel->sarea;
   int me = intel->hHWContext;
-   unsigned   i;
+   static int foo = 0;
+
+/*    _mesa_printf("%s\n", __FUNCTION__); */
+/*    if (foo++ > 1) { */
+/*       _mesa_printf("%s - foo\n", __FUNCTION__); */
+/*       abort(); */
+/*    } */

   drmGetLock(intel->driFd, intel->hHWContext, flags);

@@ -609,15 +610,6 @@ void intelGetLock( intelContextPtr intel, GLuint flags )
      sarea->ctxOwner = me;
   }

-   /* Shared texture managment - if another client has played with
-    * texture space, figure out which if any of our textures have been
-    * ejected, and update our global LRU.
-    */
-
-   for ( i = 0 ; i < intel->nr_heaps ; i++ ) {
-      DRI_AGE_TEXTURES( intel->texture_heaps[ i ] );
-   }
-
   if (dPriv && intel->lastStamp != dPriv->lastStamp) {
      intelWindowMoved( intel );
      intel->lastStamp = dPriv->lastStamp;
--- a/src/mesa/drivers/dri/i915/intel_context.h
+++ b/src/mesa/drivers/dri/i915/intel_context.h
@@ -47,11 +47,9 @@
 #define DV_PF_565  (2<<8)
 #define DV_PF_8888 (3<<8)

-#define INTEL_CONTEXT(ctx)	((intelContextPtr)(ctx))
+struct intel_region;

-typedef struct intel_context intelContext;
 typedef struct intel_context *intelContextPtr;
-typedef struct intel_texture_object *intelTextureObjectPtr;

 typedef void (*intel_tri_func)(intelContextPtr, intelVertex *, intelVertex *,
 							  intelVertex *);
@@ -72,33 +70,53 @@ extern void intelFallback( intelContextPtr intel, GLuint bit, GLboolean mode );
 #define INTEL_TEX_MAXLEVELS 10


+
 struct intel_texture_object
 {
-   driTextureObject    base;	/* the parent class */
+   struct gl_texture_object base; /* The "parent" object */

-   GLuint texelBytes;
-   GLuint age;
-   GLuint Pitch;
-   GLuint Height;
-   GLuint TextureOffset;
-   GLubyte *BufAddr;   
+   /* The mipmap tree must include at least these levels once
+    * validated:
+    */
+   GLuint firstLevel;
+   GLuint lastLevel;

-   GLuint min_level;
-   GLuint max_level;
-   GLuint depth_pitch;
+   /* Offset for firstLevel image:
+    */
+   GLuint textureOffset;

-   struct {
-      const struct gl_texture_image *image;
-      GLuint offset;       /* into BufAddr */
-      GLuint height;
-      GLuint internalFormat;
-   } image[6][INTEL_TEX_MAXLEVELS];
-
-   GLuint dirty;
-   GLuint firstLevel,lastLevel;
+   /* On validation any active images held in main memory or in other
+    * regions will be copied to this region and the old storage freed.
+    */
+   struct intel_mipmap_tree *mt;
 };


+
+struct intel_texture_image {
+   struct gl_texture_image base;
+
+   /* These aren't stored in gl_texture_image 
+    */
+   GLuint level;
+   GLuint face;
+
+   /* If intelImage->mt != NULL, image data is stored here.
+    * Else if intelImage->base.Data != NULL, image is stored there.
+    * Else there is no image data.
+    */
+   struct intel_mipmap_tree *mt;
+};
+
+
+struct intel_reloc {
+   GLuint *value;
+   GLuint delta;
+   GLuint *dest;
+};
+
+#define INTEL_MAX_FIXUP 64
+
 struct intel_context
 {
   GLcontext ctx;		/* the parent class */
@@ -122,14 +140,14 @@ struct intel_context
 			       GLboolean all, 
 			       GLint cx, GLint cy, GLint cw, GLint ch);

-      intelTextureObjectPtr (*alloc_tex_obj)( struct gl_texture_object *tObj );
-
   } vtbl;

   GLint refcount;   
   GLuint Fallback;
   GLuint NewGLState;
   
+   GLuint last_fence;
+
   struct {
      GLuint start_offset;
      GLint size;
@@ -137,15 +155,16 @@ struct intel_context
      GLubyte *ptr;
   } batch;
      
+#define INTEL_ALLOC_NR 64
+#define INTEL_ALLOC_SIZE 4096
+
   struct {
-      void *ptr;
-      GLint size;
-      GLuint offset;
-      GLuint active_buf;
-      GLuint irq_emitted;
+      GLuint buffer[INTEL_ALLOC_NR];
+      GLuint current;
   } alloc;

   struct {
+      GLuint id;
      GLuint primitive;
      GLubyte *start_ptr;      
      void (*flush)( GLcontext * );
@@ -179,14 +198,14 @@ struct intel_context
   GLboolean hw_stencil;
   GLboolean hw_stipple;
   
-   /* Texture object bookkeeping
+   /* AGP memory buffer manager:
    */
-   GLuint                nr_heaps;
-   driTexHeap          * texture_heaps[1];
-   driTextureObject      swapped;
-   GLuint                lastStamp;
+   struct bufmgr *bm;
+   struct bm_buffer_list *buffer_list;
+
+   struct intel_reloc fixup[INTEL_MAX_FIXUP];
+   GLuint nr_fixups;

-   struct intel_texture_object *CurrentTexObj[MAX_TEXTURE_UNITS];

   /* State for intelvb.c and inteltris.c.
    */
@@ -195,7 +214,13 @@ struct intel_context
   GLenum render_primitive;
   GLenum reduced_primitive;
   GLuint vertex_size;
-   char *verts;			/* points to tnl->clipspace.vertex_buf */
+   GLubyte *verts;			/* points to tnl->clipspace.vertex_buf */
+
+
+   struct intel_region *front_region;
+   struct intel_region *back_region;
+   struct intel_region *draw_region;
+   struct intel_region *depth_region;


   /* Fallback rasterization functions 
@@ -212,7 +237,6 @@ struct intel_context
   GLuint numClipRects;		/* cliprects for that buffer */
   drm_clip_rect_t *pClipRects;

-   int dirtyAge;
   int perf_boxes;
   int do_irqs;

@@ -228,6 +252,8 @@ struct intel_context
   __DRIscreenPrivate *driScreen;
   intelScreenPrivate *intelScreen; 
   drmI830Sarea *sarea; 
+   
+   GLuint lastStamp;

   /**
    * Configuration cache
@@ -511,6 +537,38 @@ extern void intel_dump_batchbuffer( long offset,
 */	
 extern void intelInitPixelFuncs( struct dd_function_table *functions );

+GLboolean intel_check_color_per_fragment_ops( const GLcontext *ctx );
+
+GLboolean intel_clip_to_framebuffer( GLcontext *ctx,
+				     const GLframebuffer *buffer,
+				     GLint *x, GLint *y,
+				     GLsizei *width, GLsizei *height );
+
+struct intel_region *intel_readbuf_region( struct intel_context *intel );
+struct intel_region *intel_drawbuf_region( struct intel_context *intel );
+
+
+
+/*======================================================================
+ * Inline conversion functions.  
+ * These are better-typed than the macros used previously:
+ */
+static inline struct intel_context *intel_context( GLcontext *ctx )
+{
+   return (struct intel_context *)ctx;
+}
+
+static inline struct intel_texture_object *intel_texture_object( struct gl_texture_object *obj )
+{
+   return (struct intel_texture_object *)obj;
+}
+
+static inline struct intel_texture_image *intel_texture_image( struct gl_texture_image *img )
+{
+   return (struct intel_texture_image *)img;
+}
+
+#define INTEL_CONTEXT(ctx)	intel_context(ctx)


 #endif
--- a/src/mesa/drivers/dri/i915/intel_ioctl.c
+++ b/src/mesa/drivers/dri/i915/intel_ioctl.c
@@ -38,11 +38,12 @@
 #include "intel_context.h"
 #include "intel_ioctl.h"
 #include "intel_batchbuffer.h"
+#include "intel_regions.h"
 #include "drm.h"
+#include "bufmgr.h"


-
-static int intelEmitIrqLocked( intelContextPtr intel )
+int intelEmitIrqLocked( intelContextPtr intel )
 {
   drmI830IrqEmit ie;
   int ret, seq = 0;
@@ -51,21 +52,23 @@ static int intelEmitIrqLocked( intelContextPtr intel )
 	  (DRM_LOCK_HELD|intel->hHWContext));

   ie.irq_seq = &seq;
-	 
+
+#if 1
   ret = drmCommandWriteRead( intel->driFd, DRM_I830_IRQ_EMIT, 
 			      &ie, sizeof(ie) );
   if ( ret ) {
      fprintf( stderr, "%s: drmI830IrqEmit: %d\n", __FUNCTION__, ret );
      exit(1);
-   }
-   
+   }   
+#endif
+
   if (0)
      fprintf(stderr, "%s -->  %d\n", __FUNCTION__, seq );

   return seq;
 }

-static void intelWaitIrq( intelContextPtr intel, int seq )
+void intelWaitIrq( intelContextPtr intel, int seq )
 {
   drmI830IrqWait iw;
   int ret;
@@ -75,31 +78,20 @@ static void intelWaitIrq( intelContextPtr intel, int seq )

   iw.irq_seq = seq;
 	 
+#if 1
   do {
      ret = drmCommandWrite( intel->driFd, DRM_I830_IRQ_WAIT, &iw, sizeof(iw) );
   } while (ret == -EAGAIN || ret == -EINTR);

   if ( ret ) {
      fprintf( stderr, "%s: drmI830IrqWait: %d\n", __FUNCTION__, ret );
-      if (0)
-	 intel_dump_batchbuffer( intel->alloc.offset,
-				 intel->alloc.ptr,
-				 intel->alloc.size );
      exit(1);
   }
+#endif
 }



-static void age_intel( intelContextPtr intel, int age )
-{
-   GLuint i;
-
-   for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++)
-      if (intel->CurrentTexObj[i]) 
-	 intel->CurrentTexObj[i]->age = age;
-}
-
 void intel_dump_batchbuffer( long offset,
 			     int *ptr,
 			     int count )
@@ -107,34 +99,12 @@ void intel_dump_batchbuffer( long offset,
   int i;
   fprintf(stderr, "\n\n\nSTART BATCH (%d dwords):\n", count);
   for (i = 0; i < count/4; i += 4) 
-      fprintf(stderr, "\t0x%x: 0x%08x 0x%08x 0x%08x 0x%08x\n", 
-	      (unsigned int)offset + i*4, ptr[i], ptr[i+1], ptr[i+2], ptr[i+3]);
+      fprintf(stderr, "\t0x%08x 0x%08x 0x%08x 0x%08x\n", 
+/* 	      (unsigned int)offset + i*4,  */
+	      ptr[i], ptr[i+1], ptr[i+2], ptr[i+3]);
   fprintf(stderr, "END BATCH\n\n\n");
 }

-void intelRefillBatchLocked( intelContextPtr intel, GLboolean allow_unlock )
-{
-   GLuint last_irq = intel->alloc.irq_emitted;
-   GLuint half = intel->alloc.size / 2;
-   GLuint buf = (intel->alloc.active_buf ^= 1);
-
-   intel->alloc.irq_emitted = intelEmitIrqLocked( intel );
-
-   if (last_irq) {
-      if (allow_unlock) UNLOCK_HARDWARE( intel ); 
-      intelWaitIrq( intel, last_irq );
-      if (allow_unlock) LOCK_HARDWARE( intel ); 
-   }
-
-   if (0)
-      fprintf(stderr, "%s: now using half %d\n", __FUNCTION__, buf);
-
-   intel->batch.start_offset = intel->alloc.offset + buf * half;
-   intel->batch.ptr = (GLubyte *)intel->alloc.ptr + buf * half;
-   intel->batch.size = half - 8;
-   intel->batch.space = half - 8;
-   assert(intel->batch.space >= 0);
-}

 #define MI_BATCH_BUFFER_END 	(0xA<<23)

@@ -147,6 +117,9 @@ void intelFlushBatchLocked( intelContextPtr intel,
   drmI830BatchBuffer batch;

   assert(intel->locked);
+   assert(intel->buffer_list);
+   assert(intel->batch.ptr);
+

   if (0)
      fprintf(stderr, "%s used %d of %d offset %x..%x refill %d\n",
@@ -163,17 +136,6 @@ void intelFlushBatchLocked( intelContextPtr intel,
    * single buffer.
    */
   if (intel->numClipRects == 0 && !ignore_cliprects) {
-      
-      /* Without this yeild, an application with no cliprects can hog
-       * the hardware.  Without unlocking, the effect is much worse -
-       * effectively a lock-out of other contexts.
-       */
-      if (allow_unlock) {
-	 UNLOCK_HARDWARE( intel );
-	 sched_yield();
-	 LOCK_HARDWARE( intel );
-      }
-
      /* Note that any state thought to have been emitted actually
       * hasn't:
       */
@@ -191,19 +153,17 @@ void intelFlushBatchLocked( intelContextPtr intel,
      batch.DR4 = ((((GLuint)intel->drawX) & 0xffff) | 
 		   (((GLuint)intel->drawY) << 16));
      
-      if (intel->alloc.offset) {
-	 if ((batch.used & 0x4) == 0) {
-	    ((int *)intel->batch.ptr)[0] = 0;
-	    ((int *)intel->batch.ptr)[1] = MI_BATCH_BUFFER_END;
-	    batch.used += 0x8;
-	    intel->batch.ptr += 0x8;
-	 }
-	 else {
-	    ((int *)intel->batch.ptr)[0] = MI_BATCH_BUFFER_END;
-	    batch.used += 0x4;
-	    intel->batch.ptr += 0x4;
-	 }      
+      if ((batch.used & 0x4) == 0) {
+	 ((int *)intel->batch.ptr)[0] = 0;
+	 ((int *)intel->batch.ptr)[1] = MI_BATCH_BUFFER_END;
+	 batch.used += 0x8;
+	 intel->batch.ptr += 0x8;
      }
+      else {
+	 ((int *)intel->batch.ptr)[0] = MI_BATCH_BUFFER_END;
+	 batch.used += 0x4;
+	 intel->batch.ptr += 0x4;
+      }      

      if (0)
 	 intel_dump_batchbuffer( batch.start,
@@ -214,65 +174,35 @@ void intelFlushBatchLocked( intelContextPtr intel,
 	 fprintf(stderr, "%s: 0x%x..0x%x DR4: %x cliprects: %d\n",
 		 __FUNCTION__, 
 		 batch.start, 
-		 batch.start + batch.used,
+		 batch.start + batch.used * 4,
 		 batch.DR4, batch.num_cliprects);

-      intel->batch.start_offset += batch.used;
-      intel->batch.size -= batch.used;
-
-      if (intel->batch.size < 8) {
-	 refill = GL_TRUE;
-	 intel->batch.space = intel->batch.size = 0;
+#if 1
+      if (drmCommandWrite (intel->driFd, DRM_I830_BATCHBUFFER, &batch, 
+			   sizeof(batch))) {
+	 fprintf(stderr, "DRM_I830_BATCHBUFFER: %d\n",  -errno);
+	 UNLOCK_HARDWARE(intel);
+	 exit(1);
      }
-      else {
-	 intel->batch.size -= 8;
-	 intel->batch.space = intel->batch.size;
-      }
-
-
-      assert(intel->batch.space >= 0);
-      assert(batch.start >= intel->alloc.offset);
-      assert(batch.start < intel->alloc.offset + intel->alloc.size);
-      assert(batch.start + batch.used > intel->alloc.offset);
-      assert(batch.start + batch.used <= 
-	     intel->alloc.offset + intel->alloc.size);
-
-
-      if (intel->alloc.offset) {
-	 if (drmCommandWrite (intel->driFd, DRM_I830_BATCHBUFFER, &batch, 
-			      sizeof(batch))) {
-	    fprintf(stderr, "DRM_I830_BATCHBUFFER: %d\n",  -errno);
-	    UNLOCK_HARDWARE(intel);
-	    exit(1);
-	 }
-      } else {
-	 drmI830CmdBuffer cmd;
-	 cmd.buf = (char *)intel->alloc.ptr + batch.start;
-	 cmd.sz = batch.used;
-	 cmd.DR1 = batch.DR1;
-	 cmd.DR4 = batch.DR4;
-	 cmd.num_cliprects = batch.num_cliprects;
-	 cmd.cliprects = batch.cliprects;
-	 
-	 if (drmCommandWrite (intel->driFd, DRM_I830_CMDBUFFER, &cmd, 
-			      sizeof(cmd))) {
-	    fprintf(stderr, "DRM_I830_CMDBUFFER: %d\n",  -errno);
-	    UNLOCK_HARDWARE(intel);
-	    exit(1);
-	 }
-      }	 
-
+#endif
      
-      age_intel(intel, intel->sarea->last_enqueue);
-
      /* FIXME: use hardware contexts to avoid 'losing' hardware after
       * each buffer flush.
       */
      intel->vtbl.lost_hardware( intel );
   }

-   if (refill)
-      intelRefillBatchLocked( intel, allow_unlock );
+   bmUnmapBuffer( intel->bm,
+		  intel->alloc.buffer[intel->alloc.current] );
+   intel->batch.ptr = NULL;
+   intel->batch.size = 0;
+   intel->batch.space = 0;
+
+   intel->last_fence = bmFenceBufferList(intel->bm, intel->buffer_list);
+   bmFreeBufferList(intel->buffer_list);
+
+
+   intel->buffer_list = NULL;
 }

 void intelFlushBatch( intelContextPtr intel, GLboolean refill )
@@ -281,9 +211,7 @@ void intelFlushBatch( intelContextPtr intel, GLboolean refill )
      intelFlushBatchLocked( intel, GL_FALSE, refill, GL_FALSE );
   } 
   else {
-      LOCK_HARDWARE(intel);
-      intelFlushBatchLocked( intel, GL_FALSE, refill, GL_TRUE );
-      UNLOCK_HARDWARE(intel);
+      assert(intel->batch.size == intel->batch.space);
   }
 }

@@ -291,21 +219,31 @@ void intelFlushBatch( intelContextPtr intel, GLboolean refill )



+static void wait_for_idle_locked( struct intel_context *intel )
+{
+   intelInstallBatchBuffer(intel);

-
-void intelWaitForIdle( intelContextPtr intel )
-{   
-   if (0)
-      fprintf(stderr, "%s\n", __FUNCTION__);
+   bmAddBuffer(intel->buffer_list, intel->draw_region->buffer,
+	       BM_WRITE, NULL, NULL);
+   
+   intelValidateBuffers(intel);

   intel->vtbl.emit_flush( intel );
   intelFlushBatch( intel, GL_TRUE );

-   /* Use an irq to wait for dma idle -- Need to track lost contexts
-    * to shortcircuit consecutive calls to this function:
-    */
-   intelWaitIrq( intel, intel->alloc.irq_emitted );
-   intel->alloc.irq_emitted = 0;
+   bmFinishFence( intel->bm, intel->last_fence );
+}
+
+void intelWaitForIdle( intelContextPtr intel )
+{   
+   if (intel->locked) {
+      wait_for_idle_locked( intel );
+   }
+   else {
+      LOCK_HARDWARE(intel);
+      wait_for_idle_locked( intel );
+      UNLOCK_HARDWARE(intel);
+   }
 }


@@ -396,171 +334,6 @@ void intelClear(GLcontext *ctx, GLbitfield mask, GLboolean all,



-void *intelAllocateAGP( intelContextPtr intel, GLsizei size )
-{
-   int region_offset = 0;
-   drmI830MemAlloc alloc;
-   int ret;
-
-   if (0)
-      fprintf(stderr, "%s: %d bytes\n", __FUNCTION__, size);
-
-   alloc.region = I830_MEM_REGION_AGP;
-   alloc.alignment = 0;
-   alloc.size = size;
-   alloc.region_offset = &region_offset;
-
-   LOCK_HARDWARE(intel);
-
-   /* Make sure the global heap is initialized
-    */
-   if (intel->texture_heaps[0])
-      driAgeTextures( intel->texture_heaps[0] );
-
-
-   ret = drmCommandWriteRead( intel->driFd,
-			      DRM_I830_ALLOC,
-			      &alloc, sizeof(alloc));
-   
-   if (ret) {
-      fprintf(stderr, "%s: DRM_I830_ALLOC ret %d\n", __FUNCTION__, ret);
-      UNLOCK_HARDWARE(intel);
-      return NULL;
-   }
-   
-   if (0)
-      fprintf(stderr, "%s: allocated %d bytes\n", __FUNCTION__, size);
-
-   /* Need to propogate this information (agp memory in use) to our
-    * local texture lru.  The kernel has already updated the global
-    * lru.  An alternative would have been to allocate memory the
-    * usual way and then notify the kernel to pin the allocation.
-    */
-   if (intel->texture_heaps[0])
-      driAgeTextures( intel->texture_heaps[0] );
-
-   UNLOCK_HARDWARE(intel);   
-
-   return (void *)((char *)intel->intelScreen->tex.map + region_offset);
-}
-
-void intelFreeAGP( intelContextPtr intel, void *pointer )
-{
-   int region_offset;
-   drmI830MemFree memfree;
-   int ret;
-
-   region_offset = (char *)pointer - (char *)intel->intelScreen->tex.map;
-
-   if (region_offset < 0 || 
-       region_offset > intel->intelScreen->tex.size) {
-      fprintf(stderr, "offset %d outside range 0..%d\n", region_offset,
-	      intel->intelScreen->tex.size);
-      return;
-   }
-
-   memfree.region = I830_MEM_REGION_AGP;
-   memfree.region_offset = region_offset;
-   
-   ret = drmCommandWrite( intel->driFd,
-			  DRM_I830_FREE,
-			  &memfree, sizeof(memfree));
-   
-   if (ret) 
-      fprintf(stderr, "%s: DRM_I830_FREE ret %d\n", __FUNCTION__, ret);
-}
-
-/* This version of AllocateMemoryMESA allocates only agp memory, and
- * only does so after the point at which the driver has been
- * initialized.
- *
- * Theoretically a valid context isn't required.  However, in this
- * implementation, it is, as I'm using the hardware lock to protect
- * the kernel data structures, and the current context to get the
- * device fd.
- */
-void *intelAllocateMemoryMESA(__DRInativeDisplay *dpy, int scrn,
-			      GLsizei size, GLfloat readfreq,
-			      GLfloat writefreq, GLfloat priority)
-{
-   GET_CURRENT_CONTEXT(ctx);
-
-   if (INTEL_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, 
-	      writefreq, priority);
-
-   if (getenv("INTEL_NO_ALLOC"))
-      return NULL;
-   
-   if (!ctx || INTEL_CONTEXT(ctx) == 0) 
-      return NULL;
-   
-   return intelAllocateAGP( INTEL_CONTEXT(ctx), size );
-}
-
-
-/* Called via glXFreeMemoryMESA() */
-void intelFreeMemoryMESA(__DRInativeDisplay *dpy, int scrn, GLvoid *pointer)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   if (INTEL_DEBUG & DEBUG_IOCTL) 
-      fprintf(stderr, "%s %p\n", __FUNCTION__, pointer);
-
-   if (!ctx || INTEL_CONTEXT(ctx) == 0) {
-      fprintf(stderr, "%s: no context\n", __FUNCTION__);
-      return;
-   }
-
-   intelFreeAGP( INTEL_CONTEXT(ctx), pointer );
-}
-
-/* Called via glXGetMemoryOffsetMESA() 
- *
- * Returns offset of pointer from the start of agp aperture.
- */
-GLuint intelGetMemoryOffsetMESA(__DRInativeDisplay *dpy, int scrn, 
-				const GLvoid *pointer)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   intelContextPtr intel;
-
-   if (!ctx || !(intel = INTEL_CONTEXT(ctx)) ) {
-      fprintf(stderr, "%s: no context\n", __FUNCTION__);
-      return ~0;
-   }
-
-   if (!intelIsAgpMemory( intel, pointer, 0 ))
-      return ~0;
-
-   return intelAgpOffsetFromVirtual( intel, pointer );
-}
-
-
-GLboolean intelIsAgpMemory( intelContextPtr intel, const GLvoid *pointer,
-			   GLint size )
-{
-   int offset = (char *)pointer - (char *)intel->intelScreen->tex.map;
-   int valid = (size >= 0 &&
-		offset >= 0 &&
-		offset + size < intel->intelScreen->tex.size);
-
-   if (INTEL_DEBUG & DEBUG_IOCTL)
-      fprintf(stderr, "intelIsAgpMemory( %p ) : %d\n", pointer, valid );
-   
-   return valid;
-}
-
-
-GLuint intelAgpOffsetFromVirtual( intelContextPtr intel, const GLvoid *pointer )
-{
-   int offset = (char *)pointer - (char *)intel->intelScreen->tex.map;
-
-   if (offset < 0 || offset > intel->intelScreen->tex.size)
-      return ~0;
-   else
-      return intel->intelScreen->tex.offset + offset;
-}
-



--- a/src/mesa/drivers/dri/i915/intel_ioctl.h
+++ b/src/mesa/drivers/dri/i915/intel_ioctl.h
@@ -46,21 +46,9 @@ extern void intelRefillBatchLocked( intelContextPtr intel, GLboolean allow_unloc
 extern void intelFinish( GLcontext *ctx );
 extern void intelFlush( GLcontext *ctx );

-extern void *intelAllocateAGP( intelContextPtr intel, GLsizei size );
-extern void intelFreeAGP( intelContextPtr intel, void *pointer );

-extern void *intelAllocateMemoryMESA( __DRInativeDisplay *dpy, int scrn, 
-				      GLsizei size, GLfloat readfreq,
-				      GLfloat writefreq, GLfloat priority );
-
-extern void intelFreeMemoryMESA( __DRInativeDisplay *dpy, int scrn, 
-				 GLvoid *pointer );
-
-extern GLuint intelGetMemoryOffsetMESA( __DRInativeDisplay *dpy, int scrn, const GLvoid *pointer );
-extern GLboolean intelIsAgpMemory( intelContextPtr intel, const GLvoid *pointer,
-				  GLint size );
-
-extern GLuint intelAgpOffsetFromVirtual( intelContextPtr intel, const GLvoid *p );
+void intelWaitIrq( intelContextPtr intel, int seq );
+int intelEmitIrqLocked( intelContextPtr intel );


 #endif
--- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
@@ -0,0 +1,250 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "bufmgr.h"
+#include "enums.h"
+
+static GLenum target_to_target( GLenum target )
+{
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+      return GL_TEXTURE_CUBE_MAP_ARB;
+   default:
+      return target;
+   }
+}
+
+struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel,
+						GLenum target,
+						GLenum internal_format,
+						GLuint first_level,
+						GLuint last_level,
+						GLuint width0,
+						GLuint height0,
+						GLuint depth0,
+						GLuint cpp,
+						GLboolean compressed)
+{
+   GLboolean ok;
+   struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
+
+   DBG("%s target %s format %s level %d..%d\n", __FUNCTION__,
+		_mesa_lookup_enum_by_nr(target),
+		_mesa_lookup_enum_by_nr(internal_format),
+		first_level,
+		last_level);
+
+   mt->target = target_to_target(target);
+   mt->internal_format = internal_format;
+   mt->first_level = first_level;
+   mt->last_level = last_level;
+   mt->width0 = width0;
+   mt->height0 = height0;
+   mt->depth0 = depth0;
+   mt->cpp = cpp;
+   mt->compressed = compressed;
+
+   switch (intel->intelScreen->deviceID) {
+   case PCI_CHIP_I945_G:
+      ok = i945_miptree_layout( mt );
+      break;
+   case PCI_CHIP_I915_G:
+   case PCI_CHIP_I915_GM:
+      ok = i915_miptree_layout( mt );
+      break;
+   default:
+      ok = 0;
+      break;
+   }
+
+   if (ok)
+      mt->region = intel_region_alloc( intel, 
+				       mt->cpp,
+				       mt->pitch, 
+				       mt->total_height );
+
+   if (!mt->region) {
+      free(mt);
+      return NULL;
+   }
+
+   return mt;
+}
+
+
+struct intel_mipmap_tree *intel_miptree_reference( struct intel_mipmap_tree *mt )
+{
+   mt->refcount++;
+   return mt;
+}
+
+void intel_miptree_release( struct intel_context *intel,
+			   struct intel_mipmap_tree *mt )
+{
+   if (--mt->refcount) {
+      intel_region_release(intel, mt->region);
+      free(mt);
+   }
+}
+
+
+
+
+/* Can the image be pulled into a unified mipmap tree.  This mirrors
+ * the completeness test in a lot of ways.
+ *
+ * Not sure whether I want to pass gl_texture_image here.
+ */
+GLboolean intel_miptree_match_image( struct intel_mipmap_tree *mt, 
+				     struct gl_texture_image *image,
+				     GLuint face,
+				     GLuint level )
+{
+   DBG("%s %d %d/%d %d/%d\n", __FUNCTION__,
+		image->Border,
+		image->InternalFormat, mt->internal_format,
+		image->IsCompressed, mt->compressed);
+
+   /* Images with borders are never pulled into mipmap trees. 
+    */
+   if (image->Border)
+      return GL_FALSE;
+
+   if (image->InternalFormat != mt->internal_format ||
+       image->IsCompressed != mt->compressed)
+      return GL_FALSE;
+
+   DBG("%s: %d/%d %d/%d %d/%d\n", __FUNCTION__,
+		image->Width, mt->offset[face][level].width,
+		image->Height, mt->offset[face][level].height,
+		image->Depth, mt->offset[face][level].depth);
+
+   /* Test image dimensions against the base level image adjusted for
+    * minification.  This will also catch images not present in the
+    * tree, changed targets, etc.
+    */
+   if (image->Width != mt->offset[face][level].width ||
+       image->Height != mt->offset[face][level].height ||
+       image->Depth != mt->offset[face][level].depth)
+      return GL_FALSE;
+
+
+   DBG("%s: success\n", __FUNCTION__);
+   return GL_TRUE;
+}
+
+
+GLuint intel_miptree_image_offset(struct intel_mipmap_tree *mt,
+				  GLuint face,
+				  GLuint level)
+{
+   return (mt->offset[face][level].x +
+	   mt->offset[face][level].y * mt->pitch) * mt->cpp;
+}
+
+
+
+
+GLubyte *intel_miptree_image_map(struct intel_context *intel, 
+				 struct intel_mipmap_tree *mt,
+				 GLuint face,
+				 GLuint level,
+				 GLuint *stride)
+{
+   DBG("%s \n", __FUNCTION__);
+   
+   if (stride)
+      *stride = mt->pitch * mt->cpp;
+
+   return (intel_region_map(intel, mt->region) +
+	   intel_miptree_image_offset(mt, face, level));
+}
+
+void intel_miptree_image_unmap(struct intel_context *intel, 
+			       struct intel_mipmap_tree *mt)
+{
+   DBG("%s\n", __FUNCTION__);
+   intel_region_unmap(intel, mt->region);
+}
+
+
+
+/* Upload data for a particular image.
+ *
+ * TODO: 3D textures
+ */
+void intel_miptree_image_data(struct intel_context *intel, 
+			      struct intel_mipmap_tree *dst,
+			      GLuint face,
+			      GLuint level,
+			      void *src, GLuint src_pitch )
+{
+   DBG("%s\n", __FUNCTION__);
+   intel_region_data(intel,
+		     dst->region,
+		     dst->offset[face][level].x,
+		     dst->offset[face][level].y,
+		     src,
+		     src_pitch,
+		     0, 0,	/* source x,y */
+		     dst->offset[face][level].width,
+		     dst->offset[face][level].height);      
+}
+			  
+/* Copy mipmap image between trees
+ */
+void intel_miptree_image_copy( struct intel_context *intel,
+			       struct intel_mipmap_tree *dst,
+			       GLuint face, GLuint level,
+			       struct intel_mipmap_tree *src )
+{
+   DBG("%s\n", __FUNCTION__);
+   assert(src->offset[face][level].width == 
+	  dst->offset[face][level].width);
+
+   assert(src->offset[face][level].height == 
+	  dst->offset[face][level].height);
+
+   intel_region_copy(intel,
+		     dst->region,
+		     dst->offset[face][level].x,
+		     dst->offset[face][level].y,
+		     src->region,
+		     src->offset[face][level].x,
+		     src->offset[face][level].y,
+		     src->offset[face][level].width,
+		     src->offset[face][level].height);
+		     
+}
--- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
@@ -0,0 +1,166 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_MIPMAP_TREE_H
+#define INTEL_MIPMAP_TREE_H
+
+#include "intel_regions.h"
+
+/* A layer on top of the intel_regions code which adds:
+ *
+ * - Code to size and layout a region to hold a set of mipmaps.
+ * - Query to determine if a new image fits in an existing tree.
+ * - More refcounting 
+ *     - maybe able to remove refcounting from intel_region?
+ * - ?
+ *
+ * The fixed mipmap layout of intel hardware where one offset
+ * specifies the position of all images in a mipmap hierachy
+ * complicates the implementation of GL texture image commands,
+ * compared to hardware where each image is specified with an
+ * independent offset.
+ *
+ * In an ideal world, each texture object would be associated with a
+ * single bufmgr buffer or 2d intel_region, and all the images within
+ * the texture object would slot into the tree as they arrive.  The
+ * reality can be a little messier, as images can arrive from the user
+ * with sizes that don't fit in the existing tree, or in an order
+ * where the tree layout cannot be guessed immediately.  
+ * 
+ * This structure encodes an idealized mipmap tree.  The GL image
+ * commands build these where possible, otherwise store the images in
+ * temporary system buffers.
+ */
+
+
+struct intel_mipmap_offset {
+   GLuint x; 
+   GLuint y;
+   GLuint width;
+   GLuint height;
+   GLuint depth;		/* how will this work? */
+};
+
+struct intel_mipmap_tree {
+   /* Effectively the key:
+    */
+   GLenum target;
+   GLenum internal_format;
+
+   GLuint first_level;
+   GLuint last_level;
+
+   GLuint width0, height0, depth0;
+   GLuint cpp;
+   GLboolean compressed;
+
+   /* Derived from the above:
+    */   
+   GLuint pitch;
+   GLuint depth_pitch;		/* per-image on i945? */
+   GLuint total_height;
+   struct intel_mipmap_offset offset[MAX_FACES][MAX_TEXTURE_LEVELS];
+
+   /* The data is held here:
+    */
+   struct intel_region *region;
+
+   /* These are also refcounted:
+    */
+   GLuint refcount;
+};
+
+
+
+struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel,
+						GLenum target,
+						GLenum internal_format,
+						GLuint first_level,
+						GLuint last_level,
+						GLuint width0,
+						GLuint height0,
+						GLuint depth0,
+						GLuint cpp,
+						GLboolean compressed);
+
+struct intel_mipmap_tree *intel_miptree_reference( struct intel_mipmap_tree * );
+
+void intel_miptree_release( struct intel_context *intel,
+			    struct intel_mipmap_tree *mt );
+
+/* Check if an image fits an existing mipmap tree layout
+ */
+GLboolean intel_miptree_match_image( struct intel_mipmap_tree *mt, 
+				     struct gl_texture_image *image,
+				     GLuint face,
+				     GLuint level );
+
+/* Return a pointer to an image within a tree.  Return image stride as
+ * well.
+ */
+GLubyte *intel_miptree_image_map( struct intel_context *intel,
+				  struct intel_mipmap_tree *mt,
+				  GLuint face,
+				  GLuint level,
+				  GLuint *stride );
+
+void intel_miptree_image_unmap( struct intel_context *intel,
+				struct intel_mipmap_tree *mt );
+
+
+/* Return the linear offset of an image relative to the start of the
+ * tree:
+ */
+GLuint intel_miptree_image_offset( struct intel_mipmap_tree *mt,
+				   GLuint face,
+				   GLuint level );
+
+
+
+/* Upload an image into a tree
+ */
+void intel_miptree_image_data(struct intel_context *intel, 
+			      struct intel_mipmap_tree *dst,
+			      GLuint face,
+			      GLuint level,
+			      void *src, GLuint src_pitch );
+
+/* Copy an image between two trees
+ */
+void intel_miptree_image_copy( struct intel_context *intel,
+			       struct intel_mipmap_tree *dst,
+			       GLuint face, GLuint level,
+			       struct intel_mipmap_tree *src );
+
+/* i915_mipmap_tree.c:
+ */
+GLboolean i915_miptree_layout( struct intel_mipmap_tree *mt );
+GLboolean i945_miptree_layout( struct intel_mipmap_tree *mt );
+
+
+
+#endif
--- a/src/mesa/drivers/dri/i915/intel_pixel.c
+++ b/src/mesa/drivers/dri/i915/intel_pixel.c
@@ -35,11 +35,11 @@
 #include "intel_context.h"
 #include "intel_ioctl.h"
 #include "intel_batchbuffer.h"
-
-
+#include "intel_regions.h"
+#include "bufmgr.h"

 static GLboolean
-check_color( const GLcontext *ctx, GLenum type, GLenum format,
+check_color( GLcontext *ctx, GLenum type, GLenum format,
 	     const struct gl_pixelstore_attrib *packing,
 	     const void *pixels, GLint sz, GLint pitch )
 {
@@ -72,38 +72,28 @@ check_color( const GLcontext *ctx, GLenum type, GLenum format,
   return GL_FALSE;
 }

-static GLboolean
-check_color_per_fragment_ops( const GLcontext *ctx )
+GLboolean intel_check_color_per_fragment_ops( const GLcontext *ctx )
 {
-   int result;
-   result = (!(     ctx->Color.AlphaEnabled || 
-		    ctx->Depth.Test ||
-		    ctx->Fog.Enabled ||
-		    ctx->Scissor.Enabled ||
-		    ctx->Stencil.Enabled ||
-		    !ctx->Color.ColorMask[0] ||
-		    !ctx->Color.ColorMask[1] ||
-		    !ctx->Color.ColorMask[2] ||
-		    !ctx->Color.ColorMask[3] ||
-		    ctx->Color.ColorLogicOpEnabled ||
-		    ctx->Texture._EnabledUnits
-           ) &&
-	   ctx->Current.RasterPosValid);
-   
-   return result;
+   return !(ctx->Color.AlphaEnabled || 
+	    ctx->Depth.Test ||
+	    ctx->Fog.Enabled ||
+	    ctx->Scissor.Enabled ||
+	    ctx->Stencil.Enabled ||
+	    !ctx->Color.ColorMask[0] ||
+	    !ctx->Color.ColorMask[1] ||
+	    !ctx->Color.ColorMask[2] ||
+	    !ctx->Color.ColorMask[3] ||
+	    ctx->Color.ColorLogicOpEnabled ||
+	    ctx->Texture._EnabledUnits);
 }



-static GLboolean
-clip_pixelrect( const GLcontext *ctx,
-		const GLframebuffer *buffer,
-		GLint *x, GLint *y,
-		GLsizei *width, GLsizei *height,
-		GLint *size )
+GLboolean intel_clip_to_framebuffer( GLcontext *ctx,
+				     const GLframebuffer *buffer,
+				     GLint *x, GLint *y,
+				     GLsizei *width, GLsizei *height )
 {
-   intelContextPtr intel = INTEL_CONTEXT(ctx);
-
   /* left clipping */
   if (*x < buffer->_Xmin) {
      *width -= (buffer->_Xmin - *x);
@@ -130,9 +120,6 @@ clip_pixelrect( const GLcontext *ctx,
   if (*height <= 0)
      return GL_FALSE;

-   *size = ((*y + *height - 1) * intel->intelScreen->front.pitch +
-	    (*x + *width - 1) * intel->intelScreen->cpp);
-
   return GL_TRUE;
 }

@@ -152,8 +139,7 @@ intelTryReadPixels( GLcontext *ctx,

   /* Only accelerate reading to agp buffers.
    */
-   if ( !intelIsAgpMemory(intel, pixels, 
-			pitch * height * intel->intelScreen->cpp ) ) {
+   if ( 1 ) {
      if (INTEL_DEBUG & DEBUG_PIXEL)
 	 fprintf(stderr, "%s: dest not agp\n", __FUNCTION__);
      return GL_FALSE;
@@ -194,12 +180,11 @@ intelTryReadPixels( GLcontext *ctx,
      int nbox = dPriv->numClipRects;
      int src_offset = intel->drawOffset;
      int src_pitch = intel->intelScreen->front.pitch;
-      int dst_offset = intelAgpOffsetFromVirtual( intel, pixels);
+      int dst_offset = 0;
      drm_clip_rect_t *box = dPriv->pClipRects;
      int i;

-      if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height,
-			  &size)) {
+      if (!intel_clip_to_framebuffer(ctx, ctx->ReadBuffer, &x, &y, &width, &height)) {
 	 UNLOCK_HARDWARE( intel );
 	 if (INTEL_DEBUG & DEBUG_PIXEL)
 	    fprintf(stderr, "%s totally clipped -- nothing to do\n",
@@ -276,8 +261,7 @@ static void do_draw_pix( GLcontext *ctx,
   drm_clip_rect_t *box = dPriv->pClipRects;
   int nbox = dPriv->numClipRects;
   int i;
-   int size;
-   int src_offset = intelAgpOffsetFromVirtual( intel, pixels);
+   int src_offset = 0;
   int src_pitch = pitch;

   if (INTEL_DEBUG & DEBUG_PIXEL)
@@ -289,9 +273,8 @@ static void do_draw_pix( GLcontext *ctx,
   {
      y -= height;			/* cope with pixel zoom */
   
-      if (!clip_pixelrect(ctx, ctx->DrawBuffer,
-			  &x, &y, &width, &height,
-			  &size)) {
+      if (!intel_clip_to_framebuffer(ctx, ctx->DrawBuffer,
+			  &x, &y, &width, &height)) {
 	 UNLOCK_HARDWARE( intel );
 	 return;
      }
@@ -367,7 +350,7 @@ intelTryDrawPixels( GLcontext *ctx,

      /* Can't do conversions on agp reads/draws. 
       */
-      if ( !intelIsAgpMemory( intel, pixels, size ) ) {
+      if ( 1 ) {
 	 if (INTEL_DEBUG & DEBUG_PIXEL)
 	    fprintf(stderr, "%s: not agp memory\n", __FUNCTION__);
 	 return GL_FALSE;
@@ -376,9 +359,11 @@ intelTryDrawPixels( GLcontext *ctx,
      if (!check_color(ctx, type, format, unpack, pixels, size, pitch)) {
 	 return GL_FALSE;
      }
-      if (!check_color_per_fragment_ops(ctx)) {
+      if (!intel_check_color_per_fragment_ops(ctx)) {
 	 return GL_FALSE;
      }
+      if (!ctx->Current.RasterPosValid)
+	 return GL_FALSE;

      if (ctx->Pixel.ZoomX != 1.0F ||
 	  ctx->Pixel.ZoomY != -1.0F)
@@ -389,7 +374,7 @@ intelTryDrawPixels( GLcontext *ctx,
      return GL_FALSE;
   }

-   if ( intelIsAgpMemory(intel, pixels, size) )
+   if ( 0 )
   {
      do_draw_pix( ctx, x, y, width, height, pitch, pixels,
 		   dest );
@@ -424,6 +409,41 @@ intelDrawPixels( GLcontext *ctx,
 }


+struct intel_region *intel_drawbuf_region( struct intel_context *intel )
+{
+   switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) {
+   case BUFFER_BIT_FRONT_LEFT:
+      return intel->front_region;
+   case BUFFER_BIT_BACK_LEFT:
+      return intel->back_region;
+   default:
+      /* Not necessary to fallback - could handle either NONE or
+       * FRONT_AND_BACK cases below.
+       */
+      return NULL;		
+   }
+}
+
+struct intel_region *intel_readbuf_region( struct intel_context *intel )
+{
+   GLcontext *ctx = &intel->ctx;
+
+   /* This will have to change to support EXT_fbo's, but is correct
+    * for now:
+    */
+   switch (ctx->ReadBuffer->_ColorReadBufferIndex) {
+   case BUFFER_FRONT_LEFT:
+      return intel->front_region;
+   case BUFFER_BACK_LEFT:
+      return intel->back_region;
+   default:
+      return NULL;
+   }
+}
+
+
+
+


 /**
@@ -431,58 +451,180 @@ intelDrawPixels( GLcontext *ctx,
 * for the color buffer.  Don't support zooming, pixel transfer, etc.
 * We do support copying from one window to another, ala glXMakeCurrentRead.
 */
+static GLboolean intelTryCopyPixels( GLcontext *ctx,
+				     GLint srcx, GLint srcy, 
+				     GLsizei width, GLsizei height,
+				     GLint dstx, GLint dsty, 
+				     GLenum type )
+{
+   struct intel_context *intel = intel_context( ctx );
+   struct intel_region *dst = intel_drawbuf_region( intel );
+   struct intel_region *src = NULL;
+
+   /* Copypixels can be more than a straight copy.  Ensure all the
+    * extra operations are disabled:
+    */
+   if (!intel_check_color_per_fragment_ops(ctx) ||
+       ctx->_ImageTransferState ||
+       ctx->Pixel.ZoomX != 1.0F || 
+       ctx->Pixel.ZoomY != 1.0F)
+      return GL_FALSE;
+
+   switch (type) {
+   case GL_COLOR:
+      src = intel_readbuf_region( intel );
+      /* No readbuffer, copypixels is a noop: 
+       */
+      if (!src)
+	 return GL_TRUE;
+      break;
+   case GL_DEPTH:
+      /* Don't think this is really possible execpt at 16bpp, when we have no stencil.
+       */
+      if (intel->intelScreen->cpp == 2)
+	 src = intel->depth_region;
+      break;
+   case GL_STENCIL:
+      /* Don't think this is really possible. 
+       */
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+      /* Does it matter whether it is stencil/depth or depth/stencil?
+       */
+      src = intel->depth_region;
+      break;
+   default:
+      break;
+   }
+
+   if (!src || !dst) 
+      return GL_FALSE;
+
+
+
+   intelFlush( &intel->ctx );
+   LOCK_HARDWARE( intel );
+   intelInstallBatchBuffer( intel );
+   {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      GLint nbox = dPriv->numClipRects;
+      GLint delta_x = srcx - dstx;
+      GLint delta_y = srcy - dsty;
+      GLuint dst_offset = 0;
+      GLuint src_offset = 0;
+      GLuint i;
+
+#if 0
+      dsty -= height;			/* cope with pixel zoom */
+      srcy -= height;			/* cope with pixel zoom */
+#endif
+      if (!ctx->DrawBuffer)
+	 goto out;
+
+      if (!intel_clip_to_framebuffer(ctx, ctx->DrawBuffer, &dstx, &dsty, &width, &height)) 
+	 goto out;
+
+      /* Update source for clipped dest.  Need to also clip the source rect.
+       */
+      srcx = dstx + delta_x;
+      srcy = dsty + delta_y;
+
+      if (!intel_clip_to_framebuffer(ctx, ctx->DrawBuffer, &srcx, &srcy, &width, &height)) 
+	 goto out;
+
+      /* Update dest for clipped source:
+       */
+      dstx = srcx - delta_x;
+      dsty = srcy - delta_y;
+
+
+      srcy = dPriv->h - srcy - height; 	/* convert from gl to hardware coords */
+      dsty = dPriv->h - dsty - height; 	/* convert from gl to hardware coords */
+      srcx += dPriv->x;
+      dstx += dPriv->x;
+      srcy += dPriv->y;
+      dsty += dPriv->y;
+
+
+      bmAddBuffer(intel->buffer_list, dst->buffer, BM_NO_EVICT|BM_NO_UPLOAD|BM_WRITE, 
+		  NULL, &dst_offset);
+      bmAddBuffer(intel->buffer_list, src->buffer, BM_NO_EVICT|BM_NO_UPLOAD|BM_READ, 
+		  NULL, &src_offset);
+
+      if (!bmValidateBufferList(intel->bm, intel->buffer_list, BM_MEM_AGP)) 
+	 goto out;
+      
+      /* Could do slightly more clipping: Eg, take the intersection of
+       * the existing set of cliprects and those cliprects translated
+       * by delta_x, delta_y:
+       * 
+       * This code will not overwrite other windows, but will
+       * introduce garbage when copying from obscured window regions.
+       */
+      for (i = 0 ; i < nbox ; i++ )
+      {
+	 GLint bx = box[i].x1;
+	 GLint by = box[i].y1;
+	 GLint bw = box[i].x2 - bx;
+	 GLint bh = box[i].y2 - by;
+
+	 if (bx < dstx) bw -= dstx - bx, bx = dstx;
+	 if (by < dsty) bh -= dsty - by, by = dsty;
+	 if (bx + bw > dstx + width) bw = dstx + width - bx;
+	 if (by + bh > dsty + height) bh = dsty + height - by;
+	 if (bw <= 0) continue;
+	 if (bh <= 0) continue;
+
+	 assert(dst_offset == intel->drawOffset);
+
+	 intelEmitCopyBlitLocked( intel,
+				  dst->cpp,
+				  src->pitch, src_offset,
+				  dst->pitch, dst_offset,
+				  bx + delta_x, by - delta_y, /* srcx, srcy */
+				  bx, by, /* dstx, dsty */
+				  bw, bh );
+      }
+   }
+ out:
+   intelFlushBatchLocked( intel, GL_TRUE, GL_FALSE, GL_FALSE);
+   UNLOCK_HARDWARE( intel );
+   return GL_TRUE;
+}
+
+
 static void
 intelCopyPixels( GLcontext *ctx,
 		 GLint srcx, GLint srcy, GLsizei width, GLsizei height,
 		 GLint destx, GLint desty, GLenum type )
 {
-#if 0
-   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
-   const SWcontext *swrast = SWRAST_CONTEXT( ctx );
-   XMesaDisplay *dpy = xmesa->xm_visual->display;
-   const XMesaDrawable drawBuffer = xmesa->xm_draw_buffer->buffer;
-   const XMesaDrawable readBuffer = xmesa->xm_read_buffer->buffer;
-   const XMesaGC gc = xmesa->xm_draw_buffer->gc;
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);

-   ASSERT(dpy);
-   ASSERT(gc);
-
-   if (drawBuffer &&  /* buffer != 0 means it's a Window or Pixmap */
-       readBuffer &&
-       type == GL_COLOR &&
-       (swrast->_RasterMask & ~CLIP_BIT) == 0 && /* no blend, z-test, etc */
-       ctx->_ImageTransferState == 0 &&  /* no color tables, scale/bias, etc */
-       ctx->Pixel.ZoomX == 1.0 &&        /* no zooming */
-       ctx->Pixel.ZoomY == 1.0) {
-      /* Note: we don't do any special clipping work here.  We could,
-       * but X will do it for us.
-       */
-      srcy = FLIP(xmesa->xm_read_buffer, srcy) - height + 1;
-      desty = FLIP(xmesa->xm_draw_buffer, desty) - height + 1;
-      XCopyArea(dpy, readBuffer, drawBuffer, gc,
-                srcx, srcy, width, height, destx, desty);
+   if (!intelTryCopyPixels( ctx, srcx, srcy, width, height, destx, desty, type)) {
+/*       if (INTEL_DEBUG & DEBUG_FALLBACKS) */
+	 _mesa_printf("fallback to _swrast_CopyPixels\n");
+      _swrast_CopyPixels( ctx, srcx, srcy, width, height, destx, desty, type);
   }
-#else
-   _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type );
-#endif
 }



-
 void intelInitPixelFuncs( struct dd_function_table *functions )
 {
   /* Pixel path fallbacks.
    */
   functions->Accum = _swrast_Accum;
   functions->Bitmap = _swrast_Bitmap;
-   functions->CopyPixels = intelCopyPixels;

-   if (!getenv("INTEL_NO_BLITS")) {
+   if (getenv("INTEL_NO_BLITS") == 0) {
+      functions->CopyPixels = intelCopyPixels;
      functions->ReadPixels = intelReadPixels;  
      functions->DrawPixels = intelDrawPixels; 
   }
   else {
+      functions->CopyPixels = _swrast_CopyPixels;
      functions->ReadPixels = _swrast_ReadPixels;
      functions->DrawPixels = _swrast_DrawPixels;
   }
--- a/src/mesa/drivers/dri/i915/intel_regions.c
+++ b/src/mesa/drivers/dri/i915/intel_regions.c
@@ -0,0 +1,363 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/* Provide additional functionality on top of bufmgr buffers:
+ *   - 2d semantics and blit operations
+ *   - refcounting of buffers for multiple images in a buffer.
+ *   - refcounting of buffer mappings.
+ *   - some logic for moving the buffers to the best memory pools for
+ *     given operations.
+ *
+ * Most of this is to make it easier to implement the fixed-layout
+ * mipmap tree required by intel hardware in the face of GL's
+ * programming interface where each image can be specifed in random
+ * order and it isn't clear what layout the tree should have until the
+ * last moment.
+ */
+
+#include "intel_context.h"
+#include "intel_regions.h"
+#include "intel_batchbuffer.h"
+#include "bufmgr.h"
+
+/* XXX: Thread safety?
+ */
+GLubyte *intel_region_map(struct intel_context *intel, struct intel_region *region)
+{
+   DBG("%s\n", __FUNCTION__);
+   if (!region->map_refcount++) {
+      region->map = bmMapBuffer(intel->bm, region->buffer, 0);
+   }
+
+   return region->map;
+}
+
+void intel_region_unmap(struct intel_context *intel, 
+			struct intel_region *region)
+{
+   DBG("%s\n", __FUNCTION__);
+   if (!--region->map_refcount) {
+      bmUnmapBuffer(intel->bm, region->buffer);
+   }
+}
+
+struct intel_region *intel_region_alloc( struct intel_context *intel, 
+					 GLuint cpp,
+					 GLuint pitch, 
+					 GLuint height )
+{
+   struct intel_region *region = calloc(sizeof(*region), 1);
+
+   DBG("%s\n", __FUNCTION__);
+
+   region->cpp = cpp;
+   region->pitch = pitch;
+   region->height = height; 	/* needed? */
+   region->refcount = 1;
+
+   bmGenBuffers(intel->bm, 1, &region->buffer);
+   bmBufferData(intel->bm, region->buffer, pitch * cpp * height, NULL, 0);
+
+   return region;
+}
+
+struct intel_region *intel_region_reference( struct intel_region *region )
+{
+   region->refcount++;
+   return region;
+}
+
+void intel_region_release( struct intel_context *intel,
+			   struct intel_region *region )
+{
+   if (--region->refcount) {
+      assert(region->map_refcount == 0);
+      bmDeleteBuffers(intel->bm, 1, &region->buffer);
+      free(region);
+   }
+}
+
+
+struct intel_region *intel_region_create_static( struct intel_context *intel, 
+						 GLuint mem_type,
+						 GLuint offset,
+						 void *virtual,
+						 GLuint cpp,
+						 GLuint pitch, 
+						 GLuint height )
+{
+   struct intel_region *region = calloc(sizeof(*region), 1);
+   GLuint size = cpp * pitch * height;
+   GLint pool;
+
+   DBG("%s\n", __FUNCTION__);
+
+   region->cpp = cpp;
+   region->pitch = pitch;
+   region->height = height; 	/* needed? */
+   region->refcount = 1;
+
+   /* Recipe for creating a static buffer - create a static pool with
+    * the right offset and size, generate a buffer and use a special
+    * call to bind it to all of the memory in that pool.
+    */
+   pool = bmInitPool(intel->bm, offset, virtual, size, 
+		     (BM_MEM_AGP |
+		      BM_NO_UPLOAD | 
+		      BM_NO_EVICT | 
+		      BM_NO_MOVE));
+   if (pool < 0) {
+      _mesa_printf("bmInitPool failed for static region\n");
+      exit(1);
+   }
+
+   bmGenBuffers(intel->bm, 1, &region->buffer);
+   bmBufferStatic(intel->bm, region->buffer, size, pool);
+
+   return region;
+}
+
+
+
+static void _mesa_copy_rect( GLubyte *dst,
+			     GLuint cpp,
+			     GLuint dst_pitch,
+			     GLuint dst_x, 
+			     GLuint dst_y,
+			     GLuint width,
+			     GLuint height,
+			     GLubyte *src,
+			     GLuint src_pitch,
+			     GLuint src_x,
+			     GLuint src_y )
+{
+   GLuint i;
+
+   dst_pitch *= cpp;
+   src_pitch *= cpp;
+   dst += dst_x * cpp;
+   src += src_x * cpp;
+   dst += dst_y * dst_pitch;
+   src += src_y * dst_pitch;
+   width *= cpp;
+
+   if (width == dst_pitch && 
+       width == src_pitch)
+      memcpy(dst, src, height * width);
+   else {
+      for (i = 0; i < height; i++) {
+	 memcpy(dst, src, width);
+	 dst += dst_pitch;
+	 src += src_pitch;
+      }
+   }
+}
+
+
+
+/* Could make color a char * to handle deeper buffers.
+ */
+static void _mesa_fill_rect( GLubyte *dst,
+			     GLuint cpp,
+			     GLuint dst_pitch,
+			     GLuint dst_x, 
+			     GLuint dst_y,
+			     GLuint width,
+			     GLuint height,
+			     GLuint color )
+{
+   GLuint i,j;
+
+   switch (cpp) {
+   case 1:
+      dst += dst_x;
+      dst += dst_y * dst_pitch;
+      for (i = 0; i < height; i++) { 
+	 memset(dst, color, width);
+	 dst += dst_pitch;
+      }
+      break;
+   case 2: {
+      GLushort color_short = color & 0xffff;
+      GLushort *dst_short = (GLushort *)dst;
+      dst_short += dst_x;
+      dst_short += dst_y * dst_pitch;
+
+      for (i = 0; i < height; i++) { 
+	 for (j = 0; j < width; j++) 
+	    dst_short[j] = color_short;
+      }
+      break;
+   }
+   case 4: {
+      GLuint *dst_int = (GLuint *)dst;
+      dst_int += dst_x;
+      dst_int += dst_y * dst_pitch;
+
+      for (i = 0; i < height; i++) { 
+	 for (j = 0; j < width; j++) 
+	    dst_int[j] = color;
+      }
+      break;
+   }
+   default:
+      assert(0);
+      return;
+   }
+}
+
+
+
+/* Upload data to a rectangular sub-region.  Lots of choices how to do this:
+ *
+ * - memcpy by span to current destination
+ * - upload data as new buffer and blit
+ *
+ * Currently always memcpy.
+ */
+void intel_region_data(struct intel_context *intel, 
+		       struct intel_region *dst,
+		       GLuint dstx, GLuint dsty,
+		       void *src, GLuint src_pitch,
+		       GLuint srcx, GLuint srcy,
+		       GLuint width, GLuint height)
+{
+   DBG("%s\n", __FUNCTION__);
+
+   LOCK_HARDWARE(intel);
+   
+   _mesa_copy_rect(intel_region_map(intel, dst),
+		   dst->cpp,
+		   dst->pitch,
+		   dstx, dsty,
+		   width, height,
+		   src,
+		   src_pitch,
+		   srcx, srcy);      
+
+   intel_region_unmap(intel, dst);
+
+   UNLOCK_HARDWARE(intel);
+   
+}
+			  
+/* Copy rectangular sub-regions. Need better logic about when to
+ * push buffers into AGP - will currently do so whenever possible.
+ */
+void intel_region_copy( struct intel_context *intel,
+			struct intel_region *dst,
+			GLuint dstx, GLuint dsty,
+			struct intel_region *src,
+			GLuint srcx, GLuint srcy,
+			GLuint width, GLuint height )
+{
+   unsigned dst_offset;
+   unsigned src_offset;
+   struct bm_buffer_list *list = bmNewBufferList();
+
+   DBG("%s\n", __FUNCTION__);
+
+   assert(src->cpp == dst->cpp);
+
+   LOCK_HARDWARE(intel);
+   bmAddBuffer(list, dst->buffer, BM_WRITE, NULL, &dst_offset);
+   bmAddBuffer(list, src->buffer, BM_READ, NULL, &src_offset);
+
+   /* Query if both buffers are already uploaded:
+    */
+   if (bmValidateBufferList(intel->bm, list, BM_NO_EVICT|BM_NO_UPLOAD|BM_MEM_AGP)) {
+      intelEmitCopyBlitLocked(intel,
+			      dst->cpp,
+			      src->pitch, src_offset,
+			      dst->pitch, dst_offset, 
+			      srcx, srcy,
+			      dstx, dsty,
+			      width, height);
+
+      bmFenceBufferList(intel->bm, list);
+   }
+   else {
+      _mesa_copy_rect(intel_region_map(intel, dst),
+		      dst->cpp,
+		      dst->pitch,
+		      dstx, dsty,
+		      width, height,
+		      intel_region_map(intel, src),
+		      srcx, srcy,
+		      src->pitch);      
+
+      intel_region_unmap(intel, dst);
+      intel_region_unmap(intel, src);      
+   }
+   
+   bmFreeBufferList(list);
+   UNLOCK_HARDWARE(intel);
+}
+
+/* Fill a rectangular sub-region.  Need better logic about when to
+ * push buffers into AGP - will currently do so whenever possible.
+ */
+void intel_region_fill( struct intel_context *intel,
+			struct intel_region *dst,
+			GLuint dstx, GLuint dsty,
+			GLuint width, GLuint height,
+			GLuint color )
+{
+   unsigned dst_offset;
+   struct bm_buffer_list *list = bmNewBufferList();
+
+   DBG("%s\n", __FUNCTION__);
+
+   LOCK_HARDWARE(intel);
+   bmAddBuffer(list, dst->buffer, BM_WRITE, NULL, &dst_offset);
+
+   if (bmValidateBufferList(intel->bm, list, BM_NO_EVICT|BM_NO_UPLOAD|BM_MEM_AGP)) {
+      intelEmitFillBlitLocked(intel,
+			      dst->cpp,
+			      dst->pitch,
+			      dst_offset, 
+			      dstx, dsty,
+			      width, height,
+			      color );
+
+      bmFenceBufferList(intel->bm, list);
+   }
+   else {
+      _mesa_fill_rect(intel_region_map(intel, dst),
+		      dst->cpp,
+		      dst->pitch,
+		      dstx, dsty,
+		      width, height,
+		      color);      
+
+      intel_region_unmap(intel, dst);
+   }
+   
+   bmFreeBufferList(list);
+   UNLOCK_HARDWARE(intel);
+}
+
--- a/src/mesa/drivers/dri/i915/intel_regions.h
+++ b/src/mesa/drivers/dri/i915/intel_regions.h
@@ -0,0 +1,108 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_REGIONS_H
+#define INTEL_REGIONS_H
+
+#include "mtypes.h"
+#include "bufmgr.h"		/* for DBG! */
+struct intel_context;
+
+/* A layer on top of the bufmgr buffers that adds a few useful things:
+ *
+ * - Refcounting for local buffer references.
+ * - Refcounting for buffer maps
+ * - Buffer dimensions - pitch and height.
+ * - Blitter commands for copying 2D regions between buffers.
+ */
+struct intel_region {
+   GLuint buffer;
+   GLuint refcount;
+   GLuint cpp;
+   GLuint pitch;
+   GLuint height;
+   GLubyte *map;
+   GLuint map_refcount;
+};
+
+/* Allocate a refcounted region.  Pointers to regions should only be
+ * copied by calling intel_reference_region().
+ */
+struct intel_region *intel_region_alloc( struct intel_context *intel,
+					 GLuint cpp,
+					 GLuint pitch, 
+					 GLuint height );
+struct intel_region *intel_region_reference( struct intel_region *ib );
+void intel_region_release(struct intel_context *intel,
+			  struct intel_region *ib );
+
+
+struct intel_region *intel_region_create_static( struct intel_context *intel,
+						 GLuint mem_type,
+						 GLuint offset,
+						 void *virtual,
+						 GLuint cpp,
+						 GLuint pitch,
+						 GLuint height );
+
+/* Map/unmap regions.  This is refcounted also: 
+ */
+GLubyte *intel_region_map(struct intel_context *intel, 
+		       struct intel_region *ib);
+
+void intel_region_unmap(struct intel_context *intel,
+			struct intel_region *ib);
+
+
+/* Upload data to a rectangular sub-region
+ */
+void intel_region_data(struct intel_context *intel, 
+		       struct intel_region *dest,
+		       GLuint destx, GLuint desty,
+		       void *src, GLuint src_stride,
+		       GLuint srcx, GLuint srcy,
+		       GLuint width, GLuint height);
+			  
+/* Copy rectangular sub-regions
+ */
+void intel_region_copy( struct intel_context *intel,
+			struct intel_region *dest,
+			GLuint destx, GLuint desty,
+			struct intel_region *src,
+			GLuint srcx, GLuint srcy,
+			GLuint width, GLuint height );
+
+/* Fill a rectangular sub-region
+ */
+void intel_region_fill( struct intel_context *intel,
+			struct intel_region *dest,
+			GLuint destx, GLuint desty,
+			GLuint width, GLuint height,
+			GLuint color );
+
+
+#endif
--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -199,6 +199,10 @@ static GLboolean intel_run_render( GLcontext *ctx,
   struct vertex_buffer *VB = &tnl->vb;
   GLuint i;

+   /* disabled
+    */
+   return GL_TRUE;
+
   /* Don't handle clipping or indexed vertices.
    */
   if (intel->RenderIndex != 0 || 
--- a/src/mesa/drivers/dri/i915/intel_screen.c
+++ b/src/mesa/drivers/dri/i915/intel_screen.c
@@ -166,7 +166,7 @@ static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
 			 
   intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset;
   
-   if (0) intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv);
+   if (1) intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv);

   intelScreen->drmMinor = sPriv->drmMinor;

@@ -202,13 +202,8 @@ static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)

   if (glx_enable_extension != NULL) {
      (*glx_enable_extension)( psc, "GLX_SGI_make_current_read" );
-      (*glx_enable_extension)( psc, "GLX_MESA_allocate_memory" );
   }
   
-   sPriv->psc->allocateMemory = (void *) intelAllocateMemoryMESA;
-   sPriv->psc->freeMemory     = (void *) intelFreeMemoryMESA;
-   sPriv->psc->memoryOffset   = (void *) intelGetMemoryOffsetMESA;
-
   return GL_TRUE;
 }
 		
@@ -338,12 +333,16 @@ static GLboolean intelCreateContext( const __GLcontextModes *mesaVis,
   intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private;

   switch (intelScreen->deviceID) {
+#if 0
+      /* Don't deal with i830 until texture work complete:
+       */
   case PCI_CHIP_845_G:
   case PCI_CHIP_I830_M:
   case PCI_CHIP_I855_GM:
   case PCI_CHIP_I865_G:
      return i830CreateContext( mesaVis, driContextPriv, 
 				sharedContextPrivate );
+#endif

   case PCI_CHIP_I915_G:
   case PCI_CHIP_I915_GM:
--- a/src/mesa/drivers/dri/i915/intel_screen.h
+++ b/src/mesa/drivers/dri/i915/intel_screen.h
@@ -32,6 +32,9 @@
 #include "dri_util.h"
 #include "xmlconfig.h"

+/* XXX: change name or eliminate to avoid conflict with "struct
+ * intel_region"!!!
+ */
 typedef struct {
   drm_handle_t handle;
   drmSize size;        /* region size in bytes */
--- a/src/mesa/drivers/dri/i915/intel_span.c
+++ b/src/mesa/drivers/dri/i915/intel_span.c
@@ -31,12 +31,14 @@
 #include "colormac.h"

 #include "intel_screen.h"
-
 #include "intel_span.h"
+#include "intel_regions.h"
 #include "intel_ioctl.h"
+#include "intel_tex.h"
+
 #include "swrast/swrast.h"

-
+#undef DBG
 #define DBG 0

 #define LOCAL_VARS						\
@@ -205,16 +207,47 @@ do {								\
 void intelSpanRenderStart( GLcontext *ctx )
 {
   intelContextPtr intel = INTEL_CONTEXT(ctx);
+   GLuint i;

   intelFlush(&intel->ctx);
   LOCK_HARDWARE(intel);
   intelWaitForIdle(intel);
+
+   /* Just map the framebuffer and all textures.  Bufmgr code will
+    * take care of waiting on the necessary fences:
+    */
+   intel_region_map(intel, intel->front_region);
+   intel_region_map(intel, intel->back_region);
+   intel_region_map(intel, intel->depth_region);
+
+   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled) {
+	 struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+	 intel_tex_map_images(intel, intel_texture_object(texObj));
+      }
+   }
 }

 void intelSpanRenderFinish( GLcontext *ctx )
 {
   intelContextPtr intel = INTEL_CONTEXT( ctx );
+   GLuint i;
+
   _swrast_flush( ctx );
+
+   /* Now unmap the framebuffer:
+    */
+   intel_region_unmap(intel, intel->front_region);
+   intel_region_unmap(intel, intel->back_region);
+   intel_region_unmap(intel, intel->depth_region);
+
+   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled) {
+	 struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+	 intel_tex_unmap_images(intel, intel_texture_object(texObj));
+      }
+   }
+
   UNLOCK_HARDWARE( intel );
 }

--- a/src/mesa/drivers/dri/i915/intel_state.c
+++ b/src/mesa/drivers/dri/i915/intel_state.c
@@ -192,10 +192,17 @@ static void intelDrawBuffer(GLcontext *ctx, GLenum mode )
   
   intelSetFrontClipRects( intel );

+/*    if (intel->draw_region) */
+/*       intel_region_release(intel, intel->draw_region); */
+
   if (front) {
      intel->drawOffset = screen->front.offset;
+/*       intel->draw_region = intel_region_reference(intel->front_region); */
+      intel->draw_region = intel->front_region;
   } else {
      intel->drawOffset = screen->back.offset;
+/*       intel->draw_region = intel_region_reference(intel->back_region); */
+      intel->draw_region = intel->back_region;
   }

   intel->vtbl.set_draw_offset( intel, intel->drawOffset );
--- a/src/mesa/drivers/dri/i915/intel_tex.c
+++ b/src/mesa/drivers/dri/i915/intel_tex.c
@@ -1,830 +1,101 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "glheader.h"
-#include "mtypes.h"
-#include "imports.h"
-#include "macros.h"
-#include "simple_list.h"
-#include "enums.h"
-#include "image.h"
-#include "texstore.h"
-#include "texformat.h"
-#include "teximage.h"
-#include "texmem.h"
 #include "texobj.h"
-#include "swrast/swrast.h"
-
-#include "mm.h"
-
-#include "intel_screen.h"
-#include "intel_batchbuffer.h"
 #include "intel_context.h"
+#include "intel_mipmap_tree.h"
 #include "intel_tex.h"
-#include "intel_ioctl.h"


-
-static GLboolean
-intelValidateClientStorage( intelContextPtr intel, GLenum target,
-			    GLint internalFormat,
-			    GLint srcWidth, GLint srcHeight, 
-			    GLenum format, GLenum type,  const void *pixels,
-			    const struct gl_pixelstore_attrib *packing,
-			    struct gl_texture_object *texObj,
-			    struct gl_texture_image *texImage)
-
+static GLboolean intelIsTextureResident(GLcontext *ctx,
+                                      struct gl_texture_object *texObj)
 {
-   GLcontext *ctx = &intel->ctx;
-   int texelBytes;
-
-   if (0)
-      fprintf(stderr, "intformat %s format %s type %s\n",
-	      _mesa_lookup_enum_by_nr( internalFormat ),
-	      _mesa_lookup_enum_by_nr( format ),
-	      _mesa_lookup_enum_by_nr( type ));
-
-   if (!ctx->Unpack.ClientStorage)
-      return 0;
-
-   if (ctx->_ImageTransferState ||
-       texImage->IsCompressed ||
-       texObj->GenerateMipmap)
-      return 0;
+#if 0
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   
+   return 
+      intelObj->mt && 
+      intelObj->mt->region && 
+      intel_is_region_resident(intel, intelObj->mt->region);
+#endif
+   return 1;
+}


-   /* This list is incomplete
+
+static struct gl_texture_image *intelNewTextureImage( GLcontext *ctx )
+{
+   (void) ctx;
+   return (struct gl_texture_image *)CALLOC_STRUCT(intel_texture_image);
+}
+
+
+static struct gl_texture_object *intelNewTextureObject( GLcontext *ctx, 
+							GLuint name, 
+							GLenum target )
+{
+   struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object);
+
+   _mesa_initialize_texture_object(&obj->base, name, target);
+
+   return &obj->base;
+}
+
+
+static void intelFreeTextureImageData( GLcontext *ctx, 
+				     struct gl_texture_image *texImage )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+
+   if (intelImage->mt) {
+      intel_miptree_release(intel, intelImage->mt);
+      intelImage->mt = NULL;
+   }
+   
+   if (texImage->Data) {
+      free(texImage->Data);
+      texImage->Data = NULL;
+   }
+}
+
+static void *do_memcpy( void *dest, const void *src, size_t n )
+{
+   if ( (((unsigned)src) & 63) ||
+	(((unsigned)dest) & 63))
+      return __memcpy(dest, src, n);	
+   else
+      return memcpy(dest, src, n);
+}
+
+
+void intelInitTextureFuncs(struct dd_function_table * functions)
+{
+   functions->ChooseTextureFormat = intelChooseTextureFormat;
+   functions->TexImage1D = intelTexImage1D;
+   functions->TexImage2D = intelTexImage2D;
+   functions->TexSubImage1D = intelTexSubImage1D;
+   functions->TexSubImage2D = intelTexSubImage2D;
+   functions->CopyTexImage1D = intelCopyTexImage1D;
+   functions->CopyTexImage2D = intelCopyTexImage2D;
+   functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
+   functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
+   functions->NewTextureObject = intelNewTextureObject;
+   functions->NewTextureImage = intelNewTextureImage;
+   functions->DeleteTexture = _mesa_delete_texture_object;
+   functions->FreeTexImageData = intelFreeTextureImageData;
+   functions->UpdateTexturePalette = 0;
+   functions->IsTextureResident = intelIsTextureResident;
+
+   /* The system memcpy (at least on ubuntu 5.10) has problems copying
+    * to agp (writecombined) memory from a source which isn't 64-byte
+    * aligned - there is a 4x performance falloff.
+    *
+    * The x86 __memcpy is immune to this but is slightly slower
+    * (10%-ish) than the system memcpy.
+    *
+    * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
+    * isn't much faster than x86_memcpy for agp copies.
+    * 
+    * TODO: switch dynamically.
    */
-   switch ( internalFormat ) {
-   case GL_RGBA:
-      if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
-	 texImage->TexFormat = &_mesa_texformat_argb8888;
-	 texelBytes = 4;
-      }
-      else
-	 return 0;
-      break;
-
-   case GL_RGB:
-      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
-	 texImage->TexFormat = &_mesa_texformat_rgb565;
-	 texelBytes = 2;
-      }
-      else
-	 return 0;
-      break;
-
-   case GL_YCBCR_MESA:
-      if ( format == GL_YCBCR_MESA && 
-	   type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) {
-	 texImage->TexFormat = &_mesa_texformat_ycbcr_rev;
-	 texelBytes = 2;
-      }
-      else if ( format == GL_YCBCR_MESA && 
-		(type == GL_UNSIGNED_SHORT_8_8_APPLE || 
-		 type == GL_UNSIGNED_BYTE)) {
-	 texImage->TexFormat = &_mesa_texformat_ycbcr;
-	 texelBytes = 2;
-      }
-      else
-	 return 0;
-      break;
-      
-	 
-   default:
-      return 0;
-   }
-
-   /* Could deal with these packing issues, but currently don't:
-    */
-   if (packing->SkipPixels || 
-       packing->SkipRows || 
-       packing->SwapBytes ||
-       packing->LsbFirst) {
-      return 0;
-   }
-
-   {      
-      GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth,
-						  format, type);
-
-      
-      if (0)
-	 fprintf(stderr, "%s: srcRowStride %d/%x\n", 
-		 __FUNCTION__, srcRowStride, srcRowStride);
-
-      /* Could check this later in upload, pitch restrictions could be
-       * relaxed, but would need to store the image pitch somewhere,
-       * as packing details might change before image is uploaded:
-       */
-      if (!intelIsAgpMemory( intel, pixels, srcHeight * srcRowStride ) ||
-	  (srcRowStride & 63))
-	 return 0;
-
-
-      /* Have validated that _mesa_transfer_teximage would be a straight
-       * memcpy at this point.  NOTE: future calls to TexSubImage will
-       * overwrite the client data.  This is explicitly mentioned in the
-       * extension spec.
-       */
-      texImage->Data = (void *)pixels;
-      texImage->IsClientData = GL_TRUE;
-      texImage->RowStride = srcRowStride / texelBytes;
-      return 1;
-   }
-}
-
- 
-
-static void intelTexImage1D( GLcontext *ctx, GLenum target, GLint level,
-			    GLint internalFormat,
-			    GLint width, GLint border,
-			    GLenum format, GLenum type, const GLvoid *pixels,
-			    const struct gl_pixelstore_attrib *packing,
-			    struct gl_texture_object *texObj,
-			    struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
-   assert(t);
-   intelFlush( ctx );
-   driSwapOutTextureObject( t );
-
-   texImage->IsClientData = GL_FALSE;
-
-   _mesa_store_teximage1d( ctx, target, level, internalFormat,
-			   width, border, format, type,
-			   pixels, packing, texObj, texImage );
-
-   t->dirty_images[0] |= (1 << level);
-}
-
-static void intelTexSubImage1D( GLcontext *ctx, 
-			       GLenum target,
-			       GLint level,	
-			       GLint xoffset,
-				GLsizei width,
-			       GLenum format, GLenum type,
-			       const GLvoid *pixels,
-			       const struct gl_pixelstore_attrib *packing,
-			       struct gl_texture_object *texObj,
-			       struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
-   assert(t);
-   intelFlush( ctx );
-   driSwapOutTextureObject( t );
-
-   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, 
-			     format, type, pixels, packing, texObj,
-			     texImage);
-}
-
-
-/* Handles 2D, CUBE, RECT:
- */
-static void intelTexImage2D( GLcontext *ctx, GLenum target, GLint level,
-			    GLint internalFormat,
-			    GLint width, GLint height, GLint border,
-			    GLenum format, GLenum type, const GLvoid *pixels,
-			    const struct gl_pixelstore_attrib *packing,
-			    struct gl_texture_object *texObj,
-			    struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-   GLuint face;
-
-   /* which cube face or ordinary 2D image */
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-      ASSERT(face < 6);
-      break;
-   default:
-      face = 0;
-   }
-
-   assert(t);
-   intelFlush( ctx );
-   driSwapOutTextureObject( t );
-   texImage->IsClientData = GL_FALSE;
-
-   if (intelValidateClientStorage( INTEL_CONTEXT(ctx), target, 
-				   internalFormat, 
-				   width, height, 
-				   format, type, pixels, 
-				   packing, texObj, texImage)) {
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); 
-   }
-   else {
-      _mesa_store_teximage2d( ctx, target, level, internalFormat,
-			      width, height, border, format, type,
-			      pixels, packing, texObj, texImage );
-
-      t->dirty_images[face] |= (1 << level);
-   }
-}
-
-static void intelTexSubImage2D( GLcontext *ctx, 
-			       GLenum target,
-			       GLint level,	
-			       GLint xoffset, GLint yoffset,
-			       GLsizei width, GLsizei height,
-			       GLenum format, GLenum type,
-			       const GLvoid *pixels,
-			       const struct gl_pixelstore_attrib *packing,
-			       struct gl_texture_object *texObj,
-			       struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-   GLuint face;
-
-   /* which cube face or ordinary 2D image */
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-      ASSERT(face < 6);
-      break;
-   default:
-      face = 0;
-   }
-
-   if (texImage->IsClientData &&
-       (char *)pixels == (char *)texImage->Data + 
-       ((xoffset + yoffset * texImage->RowStride) * 
-	texImage->TexFormat->TexelBytes)) {
-
-      /* Notification only - no upload required */
-   }
-   else {
-      assert( t ); /* this _should_ be true */
-      intelFlush( ctx );
-      driSwapOutTextureObject( t );
-
-      _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, 
-				height, format, type, pixels, packing, texObj,
-				texImage);
-
-      t->dirty_images[face] |= (1 << level);
-   }
-}
-
-static void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
-                              GLint internalFormat,
-                              GLint width, GLint height, GLint border,
-                              GLsizei imageSize, const GLvoid *data,
-                              struct gl_texture_object *texObj,
-                              struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-   GLuint face;
-
-   /* which cube face or ordinary 2D image */
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-      ASSERT(face < 6);
-      break;
-   default:
-      face = 0;
-   }
-
-   assert(t);
-   intelFlush( ctx );
-   
-   driSwapOutTextureObject( t );
-   texImage->IsClientData = GL_FALSE;
-
-   if (INTEL_DEBUG & DEBUG_TEXTURE)
-     fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); 
-   
-   _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width,
-				     height, border, imageSize, data, texObj, texImage);
-   
-   t->dirty_images[face] |= (1 << level);
-}
-
-
-static void intelCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
-                                 GLint xoffset, GLint yoffset,
-                                 GLsizei width, GLsizei height,
-                                 GLenum format,
-                                 GLsizei imageSize, const GLvoid *data,
-                                 struct gl_texture_object *texObj,
-                                 struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-   GLuint face;
-
-
-   /* which cube face or ordinary 2D image */
-   switch (target) {
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X;
-      ASSERT(face < 6);
-      break;
-   default:
-      face = 0;
-   }
-
-   assert( t ); /* this _should_ be true */
-   intelFlush( ctx );
-   driSwapOutTextureObject( t );
-   
-   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
-					height, format, imageSize, data, texObj, texImage);
-   
-   t->dirty_images[face] |= (1 << level);
-}
-
-
-static void intelTexImage3D( GLcontext *ctx, GLenum target, GLint level,
-                            GLint internalFormat,
-                            GLint width, GLint height, GLint depth,
-                            GLint border,
-                            GLenum format, GLenum type, const GLvoid *pixels,
-                            const struct gl_pixelstore_attrib *packing,
-                            struct gl_texture_object *texObj,
-                            struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
-   assert(t);
-   driSwapOutTextureObject( t );
-   texImage->IsClientData = GL_FALSE;
-
-   _mesa_store_teximage3d(ctx, target, level, internalFormat,
-			  width, height, depth, border,
-			  format, type, pixels,
-			  &ctx->Unpack, texObj, texImage);
-   
-   t->dirty_images[0] |= (1 << level);
-}
-
-
-static void
-intelTexSubImage3D( GLcontext *ctx, GLenum target, GLint level,
-                   GLint xoffset, GLint yoffset, GLint zoffset,
-                   GLsizei width, GLsizei height, GLsizei depth,
-                   GLenum format, GLenum type,
-                   const GLvoid *pixels,
-                   const struct gl_pixelstore_attrib *packing,
-                   struct gl_texture_object *texObj,
-                   struct gl_texture_image *texImage )
-{
-   driTextureObject * t = (driTextureObject *) texObj->DriverData;
-
-   assert( t ); /* this _should_ be true */
-   driSwapOutTextureObject( t );
-
-   _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset,
-                             width, height, depth,
-                             format, type, pixels, packing, texObj, texImage);
-
-   t->dirty_images[0] |= (1 << level);
-}
-
-
-
-
-static void intelDeleteTexture( GLcontext *ctx, struct gl_texture_object *tObj )
-{
-   driTextureObject * t = (driTextureObject *) tObj->DriverData;
-
-   if ( t != NULL ) {
-      intelFlush( ctx );
-      driDestroyTextureObject( t );
-   }
-   
-   /* Free mipmap images and the texture object itself */
-   _mesa_delete_texture_object(ctx, tObj);
-}
-
-
-static const struct gl_texture_format *
-intelChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
-			 GLenum format, GLenum type )
-{
-   intelContextPtr intel = INTEL_CONTEXT( ctx );
-   const GLboolean do32bpt = ( intel->intelScreen->cpp == 4 &&
-			       intel->intelScreen->tex.size > 4*1024*1024);
-
-   switch ( internalFormat ) {
-   case 4:
-   case GL_RGBA:
-   case GL_COMPRESSED_RGBA:
-      if ( format == GL_BGRA ) {
-	 if ( type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
-	    return &_mesa_texformat_argb8888;
-	 }
-         else if ( type == GL_UNSIGNED_SHORT_4_4_4_4_REV ) {
-            return &_mesa_texformat_argb4444;
-	 }
-         else if ( type == GL_UNSIGNED_SHORT_1_5_5_5_REV ) {
-	    return &_mesa_texformat_argb1555;
-	 }
-      }
-      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
-
-   case 3:
-   case GL_RGB:
-   case GL_COMPRESSED_RGB:
-      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
-	 return &_mesa_texformat_rgb565;
-      }
-      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
-
-   case GL_RGBA8:
-   case GL_RGB10_A2:
-   case GL_RGBA12:
-   case GL_RGBA16:
-      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
-
-   case GL_RGBA4:
-   case GL_RGBA2:
-      return &_mesa_texformat_argb4444;
-
-   case GL_RGB5_A1:
-      return &_mesa_texformat_argb1555;
-
-   case GL_RGB8:
-   case GL_RGB10:
-   case GL_RGB12:
-   case GL_RGB16:
-      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
-
-   case GL_RGB5:
-   case GL_RGB4:
-   case GL_R3_G3_B2:
-      return &_mesa_texformat_rgb565;
-
-   case GL_ALPHA:
-   case GL_ALPHA4:
-   case GL_ALPHA8:
-   case GL_ALPHA12:
-   case GL_ALPHA16:
-   case GL_COMPRESSED_ALPHA:
-      return &_mesa_texformat_a8;
-
-   case 1:
-   case GL_LUMINANCE:
-   case GL_LUMINANCE4:
-   case GL_LUMINANCE8:
-   case GL_LUMINANCE12:
-   case GL_LUMINANCE16:
-   case GL_COMPRESSED_LUMINANCE:
-      return &_mesa_texformat_l8;
-
-   case 2:
-   case GL_LUMINANCE_ALPHA:
-   case GL_LUMINANCE4_ALPHA4:
-   case GL_LUMINANCE6_ALPHA2:
-   case GL_LUMINANCE8_ALPHA8:
-   case GL_LUMINANCE12_ALPHA4:
-   case GL_LUMINANCE12_ALPHA12:
-   case GL_LUMINANCE16_ALPHA16:
-   case GL_COMPRESSED_LUMINANCE_ALPHA:
-      return &_mesa_texformat_al88;
-
-   case GL_INTENSITY:
-   case GL_INTENSITY4:
-   case GL_INTENSITY8:
-   case GL_INTENSITY12:
-   case GL_INTENSITY16:
-   case GL_COMPRESSED_INTENSITY:
-      return &_mesa_texformat_i8;
-
-   case GL_YCBCR_MESA:
-      if (type == GL_UNSIGNED_SHORT_8_8_MESA ||
-	  type == GL_UNSIGNED_BYTE)
-         return &_mesa_texformat_ycbcr;
-      else
-         return &_mesa_texformat_ycbcr_rev;
-
-   case GL_COMPRESSED_RGB_FXT1_3DFX:
-     return &_mesa_texformat_rgb_fxt1;
-   case GL_COMPRESSED_RGBA_FXT1_3DFX:
-     return &_mesa_texformat_rgba_fxt1;
-
-   case GL_RGB_S3TC:
-   case GL_RGB4_S3TC:
-   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-     return &_mesa_texformat_rgb_dxt1;
-
-   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-     return &_mesa_texformat_rgba_dxt1;
-
-   case GL_RGBA_S3TC:
-   case GL_RGBA4_S3TC:
-   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-     return &_mesa_texformat_rgba_dxt3;
-
-   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-      return &_mesa_texformat_rgba_dxt5;
-
-   case GL_DEPTH_COMPONENT:
-   case GL_DEPTH_COMPONENT16:
-   case GL_DEPTH_COMPONENT24:
-   case GL_DEPTH_COMPONENT32:
-      return &_mesa_texformat_depth_component16;
-
-   default:
-      fprintf(stderr, "unexpected texture format %s in %s\n", 
-	      _mesa_lookup_enum_by_nr(internalFormat),
-	      __FUNCTION__);
-      return NULL;
-   }
-
-   return NULL; /* never get here */
-}
-
-
-
-void intelDestroyTexObj(intelContextPtr intel, intelTextureObjectPtr t)
-{
-   unsigned   i;
-
-   if ( intel == NULL ) 
-      return;
-
-   if ( t->age > intel->dirtyAge )
-      intel->dirtyAge = t->age;
-
-   for ( i = 0 ; i < MAX_TEXTURE_UNITS ; i++ ) {
-      if ( t == intel->CurrentTexObj[ i ] ) 
-	 intel->CurrentTexObj[ i ] = NULL;
-   }
-}
-
-
-
-/* Upload an image from mesa's internal copy.  Image may be 1D, 2D or
- * 3D.  Cubemaps are expanded elsewhere.
- */
-static void intelUploadTexImage( intelContextPtr intel,
-				 intelTextureObjectPtr t,
-				 const struct gl_texture_image *image,
-				 const GLuint offset )
-{
-
-   if (!image || !image->Data) 
-      return;
-
-   if (image->Depth == 1 && image->IsClientData) {
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 fprintf(stderr, "Blit uploading\n");
-
-      /* Do it with a blit.
-       */
-      intelEmitCopyBlitLocked( intel,
-			       image->TexFormat->TexelBytes,
-			       image->RowStride, /* ? */
-			       intelGetMemoryOffsetMESA( NULL, 0, image->Data ),
-			       t->Pitch / image->TexFormat->TexelBytes,
-			       intelGetMemoryOffsetMESA( NULL, 0, t->BufAddr + offset ),
-			       0, 0,
-			       0, 0,
-			       image->Width,
-			       image->Height);
-   }
-   else if (image->IsCompressed) {
-      GLuint row_len = image->Width * 2;
-      GLubyte *dst = (GLubyte *)(t->BufAddr + offset);
-      GLubyte *src = (GLubyte *)image->Data;
-      GLuint j;
-
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 fprintf(stderr, 
-		 "Upload image %dx%dx%d offset %xm row_len %x "
-		 "pitch %x depth_pitch %x\n",
-		 image->Width, image->Height, image->Depth, offset,
-		 row_len, t->Pitch, t->depth_pitch);
-
-      switch (image->InternalFormat) {
-	case GL_COMPRESSED_RGB_FXT1_3DFX:
-	case GL_COMPRESSED_RGBA_FXT1_3DFX:
-	case GL_RGB_S3TC:
-	case GL_RGB4_S3TC:
-	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-	  for (j = 0 ; j < image->Height/4 ; j++, dst += (t->Pitch)) {
-	    __memcpy(dst, src, row_len );
-	    src += row_len;
-	  }
-	  break;
-	case GL_RGBA_S3TC:
-	case GL_RGBA4_S3TC:
-	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-	  for (j = 0 ; j < image->Height/4 ; j++, dst += (t->Pitch)) {
-	    __memcpy(dst, src, (image->Width*4) );
-	    src += image->Width*4;
-	  }
-	  break;
-	default:
-	  fprintf(stderr,"Internal Compressed format not supported %d\n", image->InternalFormat);
-	  break;
-      }
-   }
-   else {
-      GLuint row_len = image->Width * image->TexFormat->TexelBytes;
-      GLubyte *dst = (GLubyte *)(t->BufAddr + offset);
-      GLubyte *src = (GLubyte *)image->Data;
-      GLuint d, j;
-
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 fprintf(stderr, 
-		 "Upload image %dx%dx%d offset %xm row_len %x "
-		 "pitch %x depth_pitch %x\n",
-		 image->Width, image->Height, image->Depth, offset,
-		 row_len, t->Pitch, t->depth_pitch);
-
-      if (row_len == t->Pitch) {
-	 for (d = 0; d < image->Depth; d++) {
-	    memcpy( dst, src, t->Pitch * image->Height );
-	    dst += t->depth_pitch;
-	    src += row_len * image->Height;
-	 }
-      }
-      else { 
-	 for (d = 0 ; d < image->Depth ; d++) {
-	    for (j = 0 ; j < image->Height ; j++) {
-	       __memcpy(dst, src, row_len );
-	       src += row_len;
-	       dst += t->Pitch;
-	    }
-
-	    dst += t->depth_pitch - (t->Pitch * image->Height);
-	 }
-      }
-   }
-}
-
-
-
-int intelUploadTexImages( intelContextPtr intel, 
-			  intelTextureObjectPtr t,
-			  GLuint face)
-{
-   const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
-   const struct gl_texture_image *firstImage = t->image[face][t->base.firstLevel].image;
-   int pitch = firstImage->RowStride * firstImage->TexFormat->TexelBytes;
-
-   /* Can we texture out of the existing client data? */
-   if ( numLevels == 1 &&
-	firstImage->IsClientData &&
-	(pitch & 3) == 0) {
-
-      if (INTEL_DEBUG & DEBUG_TEXTURE)
-	 fprintf(stderr, "AGP texturing from client memory\n");
-
-      t->TextureOffset = intelAgpOffsetFromVirtual( intel, firstImage->Data );
-      t->BufAddr = 0;
-      t->dirty = ~0;
-      return GL_TRUE;
-   }
-   else {
-      if (INTEL_DEBUG & DEBUG_TEXTURE) 
-	 fprintf(stderr, "Uploading client data to agp\n");
-
-      INTEL_FIREVERTICES( intel );
-      LOCK_HARDWARE( intel );
-
-      if ( t->base.memBlock == NULL ) {
-	 int heap;
-
-	 heap = driAllocateTexture( intel->texture_heaps, intel->nr_heaps,
-				    (driTextureObject *) t );
-	 if ( heap == -1 ) {
-	    UNLOCK_HARDWARE( intel );
-	    return GL_FALSE;
-	 }
-
-	 /* Set the base offset of the texture image */
-	 t->BufAddr = intel->intelScreen->tex.map + t->base.memBlock->ofs;
-	 t->TextureOffset = intel->intelScreen->tex.offset + t->base.memBlock->ofs;
-	 t->dirty = ~0;
-      }
-
-
-      /* Let the world know we've used this memory recently.
-       */
-      driUpdateTextureLRU( (driTextureObject *) t );
-
-
-      /* Upload any images that are new */
-      if (t->base.dirty_images[face]) {
-	 int i;
-
- 	 intelWaitForIdle( intel );
-	    
-	 for (i = 0 ; i < numLevels ; i++) { 
-	    int level = i + t->base.firstLevel;
-
-	    if (t->base.dirty_images[face] & (1<<level)) {
-
-	       const struct gl_texture_image *image = t->image[face][i].image;
-	       GLuint offset = t->image[face][i].offset;
-
-     	       if (INTEL_DEBUG & DEBUG_TEXTURE)
-	          fprintf(stderr, "upload level %d, offset %x\n", 
-			  level, offset);
-
-	       intelUploadTexImage( intel, t, image, offset );
-	    }
-	 }
-	 t->base.dirty_images[face] = 0;
-	 intel->perf_boxes |= I830_BOX_TEXTURE_LOAD;
-      }
-      
-      UNLOCK_HARDWARE( intel );
-      return GL_TRUE;
-   }
-}
-
-/**
- * Allocate a new texture object.
- * Called via ctx->Driver.NewTextureObject.
- * Note: this function will be called during context creation to
- * allocate the default texture objects.
- * Note: we could use containment here to 'derive' the driver-specific
- * texture object from the core mesa gl_texture_object.  Not done at this time.
- */
-static struct gl_texture_object *
-intelNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
-{
-   struct gl_texture_object *obj = _mesa_new_texture_object(ctx, name, target);
-   INTEL_CONTEXT(ctx)->vtbl.alloc_tex_obj( obj );
-   return obj;
-}
-
-
-void intelInitTextureFuncs( struct dd_function_table *functions )
-{
-   functions->NewTextureObject          = intelNewTextureObject;
-   functions->ChooseTextureFormat       = intelChooseTextureFormat;
-   functions->TexImage1D                = intelTexImage1D;
-   functions->TexImage2D                = intelTexImage2D;
-   functions->TexImage3D                = intelTexImage3D;
-   functions->TexSubImage1D             = intelTexSubImage1D;
-   functions->TexSubImage2D             = intelTexSubImage2D;
-   functions->TexSubImage3D             = intelTexSubImage3D;
-   functions->CopyTexImage1D            = _swrast_copy_teximage1d;
-   functions->CopyTexImage2D            = _swrast_copy_teximage2d;
-   functions->CopyTexSubImage1D         = _swrast_copy_texsubimage1d;
-   functions->CopyTexSubImage2D         = _swrast_copy_texsubimage2d;
-   functions->CopyTexSubImage3D         = _swrast_copy_texsubimage3d;
-   functions->DeleteTexture             = intelDeleteTexture;
-   functions->UpdateTexturePalette      = NULL;
-   functions->IsTextureResident         = driIsTextureResident;
-   functions->TestProxyTexImage         = _mesa_test_proxy_teximage;
-   functions->DeleteTexture             = intelDeleteTexture;
-   functions->CompressedTexImage2D      = intelCompressedTexImage2D;
-   functions->CompressedTexSubImage2D   = intelCompressedTexSubImage2D;
+   functions->TextureMemCpy = do_memcpy;
 }
--- a/src/mesa/drivers/dri/i915/intel_tex.h
+++ b/src/mesa/drivers/dri/i915/intel_tex.h
@@ -35,8 +35,88 @@

 void intelInitTextureFuncs( struct dd_function_table *functions );

-void intelDestroyTexObj( intelContextPtr intel, intelTextureObjectPtr t );
-int intelUploadTexImages( intelContextPtr intel, intelTextureObjectPtr t,
-			  GLuint face );
+const struct gl_texture_format *
+intelChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+                          GLenum format, GLenum type );
+
+void intelTexImage2D(GLcontext *ctx, 
+		     GLenum target, GLint level,
+		     GLint internalFormat,
+		     GLint width, GLint height, GLint border,
+		     GLenum format, GLenum type, const void *pixels,
+		     const struct gl_pixelstore_attrib *packing,
+		     struct gl_texture_object *texObj,
+		     struct gl_texture_image *texImage);
+
+void intelTexSubImage2D(GLcontext *ctx,
+			GLenum target,
+			GLint level,
+			GLint xoffset, GLint yoffset,
+			GLsizei width, GLsizei height,
+			GLenum format, GLenum type,
+			const GLvoid *pixels,
+			const struct gl_pixelstore_attrib *packing,
+			struct gl_texture_object *texObj,
+			struct gl_texture_image *texImage);
+
+void intelTexImage1D(GLcontext *ctx, 
+		     GLenum target, GLint level,
+		     GLint internalFormat,
+		     GLint width, GLint border,
+		     GLenum format, GLenum type, const void *pixels,
+		     const struct gl_pixelstore_attrib *packing,
+		     struct gl_texture_object *texObj,
+		     struct gl_texture_image *texImage);
+
+void intelTexSubImage1D(GLcontext *ctx,
+			GLenum target,
+			GLint level,
+			GLint xoffset,
+			GLsizei width,
+			GLenum format, GLenum type,
+			const GLvoid *pixels,
+			const struct gl_pixelstore_attrib *packing,
+			struct gl_texture_object *texObj,
+			struct gl_texture_image *texImage);
+
+void intelCopyTexImage1D( GLcontext *ctx, GLenum target, GLint level,
+			  GLenum internalFormat,
+			  GLint x, GLint y, GLsizei width,
+			  GLint border );
+
+void intelCopyTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			  GLenum internalFormat,
+			  GLint x, GLint y, GLsizei width, GLsizei height,
+			  GLint border );
+
+void intelCopyTexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
+			     GLint xoffset,
+			     GLint x, GLint y, GLsizei width );
+
+void intelCopyTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+			     GLint xoffset, GLint yoffset,
+			     GLint x, GLint y, GLsizei width, GLsizei height );
+
+
+GLuint intel_validate_mipmap_tree( struct intel_context *intel,
+				   struct intel_texture_object *intelObj );
+
+void intel_add_texoffset_fixup( struct intel_context *intel,
+				GLuint unit,
+				GLuint *ptr );
+
+void intel_apply_fixups( struct intel_context *intel );
+
+GLboolean intel_prevalidate_buffers( struct intel_context *intel );
+GLboolean intel_validate_buffers( struct intel_context *intel );
+void intel_fence_buffers( struct intel_context *intel );
+
+
+void intel_tex_map_images( struct intel_context *intel,
+			   struct intel_texture_object *intelObj );
+
+void intel_tex_unmap_images( struct intel_context *intel,
+			     struct intel_texture_object *intelObj );
+

 #endif
--- a/src/mesa/drivers/dri/i915/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_copy.c
@@ -0,0 +1,301 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "mtypes.h"
+#include "enums.h"
+#include "teximage.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_tex.h"
+#include "bufmgr.h"
+
+/* Do the best we can using the blitter.  A future project is to use
+ * the texture engine and fragment programs for these copies.
+ */
+
+static struct intel_region *get_teximage_source( struct intel_context *intel,
+						 GLenum internalFormat )
+{
+   if (0)
+      _mesa_printf("%s %s\n", __FUNCTION__, 
+		   _mesa_lookup_enum_by_nr(internalFormat));
+
+   switch (internalFormat) {
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT16_ARB:
+      if (intel->intelScreen->cpp == 2)
+	 return intel->depth_region;
+      return NULL;
+   case GL_DEPTH24_STENCIL8_EXT:
+   case GL_DEPTH_STENCIL_EXT:
+      if (intel->intelScreen->cpp == 4)
+	 return intel->depth_region;
+      return NULL;
+   case GL_RGBA:
+      return intel_readbuf_region( intel );
+   case GL_RGB:
+      if (intel->intelScreen->cpp == 2)
+	 return intel_readbuf_region( intel );
+      return NULL;
+   default:
+      return NULL;
+   }
+}
+
+static GLboolean check_copytex_fragment_ops( const GLcontext *ctx )
+{
+   return GL_TRUE;
+
+   return !(ctx->Color.AlphaEnabled || 
+/* 	    ctx->Depth.Test || */
+	    ctx->Fog.Enabled ||
+/* 	    ctx->Scissor.Enabled || */
+	    ctx->Stencil.Enabled ||
+	    !ctx->Color.ColorMask[0] ||
+	    !ctx->Color.ColorMask[1] ||
+	    !ctx->Color.ColorMask[2] ||
+	    !ctx->Color.ColorMask[3] ||
+	    ctx->Color.ColorLogicOpEnabled ||
+	    ctx->Texture._EnabledUnits);
+}
+
+
+static GLboolean do_copy_texsubimage( struct intel_context *intel,
+				      struct intel_texture_image *intelImage,
+				      GLenum internalFormat,
+				      GLint dstx, GLint dsty,
+				      GLint x, GLint y,
+				      GLsizei width, GLsizei height )
+{
+   GLcontext *ctx = &intel->ctx;
+   struct intel_region *src = get_teximage_source(intel, internalFormat);
+   GLuint ret = GL_TRUE;
+
+   if (!intelImage->mt || !src)
+      return GL_FALSE;
+ 
+   if (!check_copytex_fragment_ops( ctx ))
+      return GL_FALSE;
+ 
+
+   LOCK_HARDWARE(intel);
+   intelInstallBatchBuffer(intel);
+   {
+      __DRIdrawablePrivate *dPriv = intel->driDrawable;
+      GLuint image_offset = intel_miptree_image_offset(intelImage->mt, 
+						       intelImage->face,
+						       intelImage->level);
+      GLuint dst_offset = 0;
+      GLuint src_offset = 0;
+      GLint orig_x = x;
+      GLint orig_y = y;
+
+      if (!intel_clip_to_framebuffer(ctx, ctx->DrawBuffer, &x, &y, &width, &height)) {
+	 ret = GL_TRUE;
+	 goto out;
+      }
+
+      /* Update dst for clipped src.  Need to also clip the source rect.
+       */
+      dstx = x - orig_x;
+      dsty = y - orig_y;
+
+      y = dPriv->h - y - height; 	/* convert from gl to hardware coords */
+      x += dPriv->x;
+      y += dPriv->y;
+
+
+      bmAddBuffer(intel->buffer_list, 
+		  intelImage->mt->region->buffer, 
+		  BM_WRITE, NULL, &dst_offset);
+
+      bmAddBuffer(intel->buffer_list, 
+		  src->buffer, 
+		  BM_READ, NULL, &src_offset);
+
+      if (!bmValidateBufferList(intel->bm, intel->buffer_list, BM_MEM_AGP)) {
+	 ret = GL_FALSE;
+	 goto out;
+      }
+      
+      intelEmitCopyBlitLocked( intel,
+			       intelImage->mt->cpp,
+			       src->pitch, src_offset,
+			       intelImage->mt->pitch, 
+			       dst_offset + image_offset,
+			       x, y, 
+			       dstx, dsty,
+			       width, height );
+   out:
+      intelFlushBatchLocked( intel, GL_TRUE, GL_FALSE, GL_FALSE);
+   }
+   
+
+   UNLOCK_HARDWARE(intel);
+   if (!ret)
+      return GL_FALSE;
+
+#if 0
+   /* GL_SGIS_generate_mipmap -- this can be accelerated now.
+    */
+   if (level == texObj->BaseLevel && 
+       texObj->GenerateMipmap) {
+      intel_generate_mipmap(ctx, target,
+                            &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
+                            texObj);
+   }
+#endif
+
+   return GL_TRUE;
+}
+
+
+
+
+
+void intelCopyTexImage1D( GLcontext *ctx, GLenum target, GLint level,
+			  GLenum internalFormat,
+			  GLint x, GLint y, GLsizei width,
+			  GLint border )
+{
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texUnit, target, level);
+
+   if (border)
+      goto fail;
+
+   /* Setup or redefine the texture object, mipmap tree and texture
+    * image.  Don't populate yet.  
+    */
+   ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+			  width, border,
+			  GL_RGBA, CHAN_TYPE, NULL,
+			  &ctx->DefaultPacking, texObj, texImage);
+
+   if (!do_copy_texsubimage(intel_context(ctx), 
+			    intel_texture_image(texImage),
+			    internalFormat,
+			    0, 0,
+			    x, y, 
+			    width, 1))
+       goto fail;
+
+   return;
+
+ fail:
+   _swrast_copy_teximage1d( ctx, target, level, internalFormat, x, y,
+			    width, border );
+}
+
+void intelCopyTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			  GLenum internalFormat,
+			  GLint x, GLint y, GLsizei width, GLsizei height,
+			  GLint border )
+{
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texUnit, target, level);
+
+   if (border)
+      goto fail;
+
+   /* Setup or redefine the texture object, mipmap tree and texture
+    * image.  Don't populate yet.  
+    */
+   ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+			  width, height, border,
+			  GL_RGBA, CHAN_TYPE, NULL,
+			  &ctx->DefaultPacking, texObj, texImage);
+
+
+   if (!do_copy_texsubimage(intel_context(ctx), 
+			    intel_texture_image(texImage),
+			    internalFormat,
+			    0, 0,
+			    x, y, 
+			    width, height))
+       goto fail;
+
+   return;
+
+ fail:
+   _swrast_copy_teximage2d( ctx, target, level, internalFormat, x, y,
+			    width, height, border );
+}
+
+
+void intelCopyTexSubImage1D( GLcontext *ctx, GLenum target, GLint level,
+			     GLint xoffset,
+			     GLint x, GLint y, GLsizei width )
+{
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texUnit, target, level);
+   GLenum internalFormat = texImage->InternalFormat;
+
+   /* Need to check texture is compatible with source format. 
+    */
+
+   if (!do_copy_texsubimage(intel_context(ctx), 
+			    intel_texture_image(texImage),
+			    internalFormat,
+			    xoffset, 0,
+			    x, y, width, 1)) {
+      _swrast_copy_texsubimage1d( ctx, target, level, 
+				  xoffset, x, y, width );
+   }
+}
+
+
+
+void intelCopyTexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+			     GLint xoffset, GLint yoffset,
+			     GLint x, GLint y, GLsizei width, GLsizei height )
+{
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texUnit, target, level);
+   GLenum internalFormat = texImage->InternalFormat;
+
+
+   /* Need to check texture is compatible with source format. 
+    */
+
+   if (!do_copy_texsubimage(intel_context(ctx), 
+			    intel_texture_image(texImage),
+			    internalFormat,
+			    xoffset, yoffset,
+			    x, y, width, height)) {
+      _swrast_copy_texsubimage2d( ctx, target, level,
+				  xoffset, yoffset,
+				  x, y, width, height );
+   }
+}
--- a/src/mesa/drivers/dri/i915/intel_tex_format.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_format.c
@@ -0,0 +1,148 @@
+#include "intel_context.h"
+#include "intel_tex.h"
+#include "texformat.h"
+#include "enums.h"
+
+/* It works out that this function is fine for all the supported
+ * hardware.  However, there is still a need to map the formats onto
+ * hardware descriptors.
+ */
+/* Note that the i915 can actually support many more formats than
+ * these if we take the step of simply swizzling the colors
+ * immediately after sampling...
+ */
+const struct gl_texture_format *
+intelChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+                          GLenum format, GLenum type )
+{
+   intelContextPtr intel = INTEL_CONTEXT( ctx );
+   const GLboolean do32bpt = (intel->intelScreen->cpp == 4);
+
+   switch ( internalFormat ) {
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      if ( format == GL_BGRA ) {
+	 if ( type == GL_UNSIGNED_INT_8_8_8_8_REV ) {
+	    return &_mesa_texformat_argb8888;
+	 }
+         else if ( type == GL_UNSIGNED_SHORT_4_4_4_4_REV ) {
+            return &_mesa_texformat_argb4444;
+	 }
+         else if ( type == GL_UNSIGNED_SHORT_1_5_5_5_REV ) {
+	    return &_mesa_texformat_argb1555;
+	 }
+      }
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
+
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
+	 return &_mesa_texformat_rgb565;
+      }
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
+
+   case GL_RGBA4:
+   case GL_RGBA2:
+      return &_mesa_texformat_argb4444;
+
+   case GL_RGB5_A1:
+      return &_mesa_texformat_argb1555;
+
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return &_mesa_texformat_argb8888;
+
+   case GL_RGB5:
+   case GL_RGB4:
+   case GL_R3_G3_B2:
+      return &_mesa_texformat_rgb565;
+
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+      return &_mesa_texformat_a8;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      return &_mesa_texformat_l8;
+
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return &_mesa_texformat_al88;
+
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      return &_mesa_texformat_i8;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_MESA ||
+	  type == GL_UNSIGNED_BYTE)
+         return &_mesa_texformat_ycbcr;
+      else
+         return &_mesa_texformat_ycbcr_rev;
+
+   case GL_COMPRESSED_RGB_FXT1_3DFX:
+     return &_mesa_texformat_rgb_fxt1;
+   case GL_COMPRESSED_RGBA_FXT1_3DFX:
+     return &_mesa_texformat_rgba_fxt1;
+
+   case GL_RGB_S3TC:
+   case GL_RGB4_S3TC:
+   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+     return &_mesa_texformat_rgb_dxt1;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+     return &_mesa_texformat_rgba_dxt1;
+
+   case GL_RGBA_S3TC:
+   case GL_RGBA4_S3TC:
+   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+     return &_mesa_texformat_rgba_dxt3;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+      return &_mesa_texformat_rgba_dxt5;
+
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT16:
+   case GL_DEPTH_COMPONENT24:
+   case GL_DEPTH_COMPONENT32:
+      return &_mesa_texformat_depth_component16;
+
+   default:
+      fprintf(stderr, "unexpected texture format %s in %s\n", 
+	      _mesa_lookup_enum_by_nr(internalFormat),
+	      __FUNCTION__);
+      return NULL;
+   }
+
+   return NULL; /* never get here */
+}
--- a/src/mesa/drivers/dri/i915/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_image.c
@@ -0,0 +1,355 @@
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "glheader.h"
+#include "macros.h"
+#include "mtypes.h"
+#include "enums.h"
+#include "colortab.h"
+#include "convolve.h"
+#include "context.h"
+#include "simple_list.h"
+#include "texcompress.h"
+#include "texformat.h"
+#include "texobj.h"
+#include "texstore.h"
+
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+#include "intel_ioctl.h"
+
+
+/* Functions to store texture images.  Where possible, mipmap_tree's
+ * will be created or further instantiated with image data, otherwise
+ * images will be stored in malloc'd memory.  A validation step is
+ * required to pull those images into a mipmap tree, or otherwise
+ * decide a fallback is required.
+ */
+
+
+static int logbase2(int n)
+{
+   GLint i = 1;
+   GLint log2 = 0;
+
+   while (n > i) {
+      i *= 2;
+      log2++;
+   }
+
+   return log2;
+}
+
+
+/* Otherwise, store it in memory if (Border != 0) or (any dimension ==
+ * 1).
+ *    
+ * Otherwise, if max_level >= level >= min_level, create tree with
+ * space for textures from min_level down to max_level.
+ *
+ * Otherwise, create tree with space for textures from (level
+ * 0)..(1x1).  Consider pruning this tree at a validation if the
+ * saving is worth it.
+ */
+static void guess_and_alloc_mipmap_tree( struct intel_context *intel,
+					 struct intel_texture_object *intelObj,
+					 struct intel_texture_image *intelImage )
+{
+   GLuint firstLevel;
+   GLuint lastLevel;
+   GLuint width = intelImage->base.Width;
+   GLuint height = intelImage->base.Height;
+   GLuint depth = intelImage->base.Depth;
+   GLuint l2width, l2height, l2depth;
+   GLuint i;
+
+   DBG("%s\n", __FUNCTION__);
+
+   if (intelImage->base.Border)
+      return;
+
+   if (intelImage->level > intelObj->base.BaseLevel &&
+       (intelImage->base.Width == 1 ||
+	(intelObj->base.Target != GL_TEXTURE_1D && 
+	 intelImage->base.Height == 1) ||
+	(intelObj->base.Target == GL_TEXTURE_3D &&
+	 intelImage->base.Depth == 1)))
+      return;
+
+   /* If this image disrespects BaseLevel, allocate from level zero.
+    * Usually BaseLevel == 0, so it's unlikely to happen.
+    */
+   if (intelImage->level < intelObj->base.BaseLevel)
+      firstLevel = 0;
+   else
+      firstLevel = intelObj->base.BaseLevel;
+
+
+   /* Figure out image dimensions at start level. 
+    */
+   for (i = intelImage->level; i > firstLevel; i--) {
+      width <<= 1;
+      if (height != 1) height <<= 1;
+      if (depth != 1) depth <<= 1;
+   }
+
+   /* Guess a reasonable value for lastLevel.  This is probably going
+    * to be wrong fairly often and might mean that we have to look at
+    * resizable buffers, or require that buffers implement lazy
+    * pagetable arrangements.
+    */
+   if ((intelObj->base.MinFilter == GL_NEAREST || 
+	intelObj->base.MinFilter == GL_LINEAR) &&
+       intelImage->level == firstLevel) {
+      lastLevel = firstLevel;
+   }
+   else {
+      l2width = logbase2(width);
+      l2height = logbase2(height);
+      l2depth = logbase2(depth);
+      lastLevel = firstLevel + MAX2(MAX2(l2width,l2height),l2depth);
+   }
+	 
+
+   intelObj->mt = intel_miptree_create( intel,
+					intelObj->base.Target,
+					intelImage->base.InternalFormat,
+					firstLevel,
+					lastLevel,
+					width,
+					height,
+					depth,
+					intelImage->base.TexFormat->TexelBytes,
+					intelImage->base.IsCompressed );
+
+   DBG("%s - success\n", __FUNCTION__);
+}
+   
+
+
+
+static GLuint target_to_face( GLenum target )
+{
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+      return ((GLuint) target - 
+	      (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X);
+   default:
+      return 0;
+   }
+}
+
+
+static void intelTexImage(GLcontext *ctx, 
+			  GLint dims,
+			  GLenum target, GLint level,
+			  GLint internalFormat,
+			  GLint width, GLint height, GLint border,
+			  GLenum format, GLenum type, const void *pixels,
+			  const struct gl_pixelstore_attrib *packing,
+			  struct gl_texture_object *texObj,
+			  struct gl_texture_image *texImage)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   GLint postConvWidth = width;
+   GLint postConvHeight = height;
+   GLint texelBytes, sizeInBytes;
+   GLuint dstRowStride;
+
+   DBG("%s target %s level %d %dx%d border %d\n", __FUNCTION__,
+		_mesa_lookup_enum_by_nr(target),
+		level,
+		width, height, border);
+
+   intelImage->face = target_to_face( target );
+   intelImage->level = level;
+
+   if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) {
+      _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth,
+                                         &postConvHeight);
+   }
+
+   /* choose the texture format */
+   texImage->TexFormat = intelChooseTextureFormat(ctx, internalFormat, 
+						  format, type);
+
+   assert(texImage->TexFormat);
+
+   if (dims == 1) {
+      texImage->FetchTexelc = texImage->TexFormat->FetchTexel1D;
+      texImage->FetchTexelf = texImage->TexFormat->FetchTexel1Df;
+   }
+   else {
+      texImage->FetchTexelc = texImage->TexFormat->FetchTexel2D;
+      texImage->FetchTexelf = texImage->TexFormat->FetchTexel2Df;
+   }
+   texelBytes = texImage->TexFormat->TexelBytes;
+
+
+   /* Minimum pitch of 32 bytes */
+   if (postConvWidth * texelBytes < 32) {
+      postConvWidth = 32 / texelBytes;
+      texImage->RowStride = postConvWidth;
+   }
+
+   assert(texImage->RowStride == postConvWidth);
+
+   /* Release the reference to a potentially orphaned buffer.   
+    * Release any old malloced memory.
+    */
+   if (intelImage->mt) {
+      intel_miptree_release(intel, intelImage->mt);
+      intelImage->mt = NULL;
+      assert(!texImage->Data);
+   }
+   else if (texImage->Data) {
+      free(texImage->Data);
+   }
+
+   /* XXX: If this is the only texture image in the tree, could call
+    * bmBufferData with NULL data to free the old block and avoid
+    * waiting on any outstanding fences.
+    *
+    * XXX: Better to do this internally to intel_mipmap_tree.c,
+    * somehow?
+    */
+   if (intelObj->mt && 
+       intelObj->mt->first_level == level &&
+       intelObj->mt->last_level == level &&
+       intelObj->mt->target != GL_TEXTURE_CUBE_MAP_ARB) {
+   }
+
+
+   if (!intelObj->mt) {
+      guess_and_alloc_mipmap_tree(intel, intelObj, intelImage);
+   }
+   
+
+   if (intelObj->mt && 
+       intelObj->mt != intelImage->mt &&
+       intel_miptree_match_image(intelObj->mt, &intelImage->base,
+				 intelImage->face, intelImage->level)) {
+      
+      if (intelImage->mt)
+	 intel_miptree_release(intel, intelImage->mt);
+
+      intelImage->mt = intel_miptree_reference(intelObj->mt);
+   }
+
+
+   /* intelCopyTexImage calls this function with pixels == NULL, with
+    * the expectation that the mipmap tree will be set up but nothing
+    * more will be done.  This is where those calls return:
+    */
+   pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, 1, 
+					format, type,
+					pixels, packing, "glTexImage");
+   if (!pixels) 
+      return;
+   
+
+
+
+   LOCK_HARDWARE(intel);
+   
+   if (intelImage->mt) {
+      texImage->Data = intel_miptree_image_map(intel, 
+					       intelImage->mt, 
+					       intelImage->face, 
+					       intelImage->level, 
+					       &dstRowStride);	 
+   }
+   else {
+      /* Allocate regular memory and store the image there temporarily.   */
+      if (texImage->IsCompressed) {
+	 sizeInBytes = texImage->CompressedSize;
+         dstRowStride = _mesa_compressed_row_stride(texImage->InternalFormat,width);
+      }
+      else {
+	 sizeInBytes = postConvWidth * postConvHeight * texelBytes;
+         dstRowStride = postConvWidth * texImage->TexFormat->TexelBytes;
+      }
+
+      texImage->Data = malloc(sizeInBytes);
+   }
+     
+   /* Copy data.  Would like to know when it's ok for us to eg. use
+    * the blitter to copy.  Or, use the hardware to do the format
+    * conversion and copy:
+    */
+   if (!texImage->TexFormat->StoreImage(ctx, dims,
+					texImage->_BaseFormat,
+					texImage->TexFormat,
+					texImage->Data,
+					0, 0, 0,  /* dstX/Y/Zoffset */
+					dstRowStride, 0 /* dstImageStride */,
+					width, height, 1,
+					format, type, pixels, packing)) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+   }
+
+   _mesa_unmap_teximage_pbo(ctx, packing);
+
+   if (intelImage->mt) {
+      intel_miptree_image_unmap(intel, intelImage->mt);
+      texImage->Data = NULL;
+   }
+
+   UNLOCK_HARDWARE(intel);
+
+#if 0
+   /* GL_SGIS_generate_mipmap -- this can be accelerated now.
+    */
+   if (level == texObj->BaseLevel && 
+       texObj->GenerateMipmap) {
+      intel_generate_mipmap(ctx, target,
+                            &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
+                            texObj);
+   }
+#endif
+}
+
+
+void intelTexImage2D(GLcontext *ctx, 
+		     GLenum target, GLint level,
+		     GLint internalFormat,
+		     GLint width, GLint height, GLint border,
+		     GLenum format, GLenum type, const void *pixels,
+		     const struct gl_pixelstore_attrib *packing,
+		     struct gl_texture_object *texObj,
+		     struct gl_texture_image *texImage)
+{
+   intelTexImage( ctx, 2, target, level, 
+		internalFormat, width, height, border,
+		format, type, pixels,
+		packing, texObj, texImage );
+}
+
+void intelTexImage1D(GLcontext *ctx, 
+		     GLenum target, GLint level,
+		     GLint internalFormat,
+		     GLint width, GLint border,
+		     GLenum format, GLenum type, const void *pixels,
+		     const struct gl_pixelstore_attrib *packing,
+		     struct gl_texture_object *texObj,
+		     struct gl_texture_image *texImage)
+{
+   intelTexImage( ctx, 1, target, level, 
+		  internalFormat, width, 1, border,
+		  format, type, pixels,
+		  packing, texObj, texImage );
+}
+
+
+
+
+
--- a/src/mesa/drivers/dri/i915/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_subimage.c
@@ -0,0 +1,148 @@
+
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "mtypes.h"
+#include "texobj.h"
+#include "texstore.h"
+#include "enums.h"
+
+#include "intel_context.h"
+#include "intel_tex.h"
+#include "intel_mipmap_tree.h"
+
+
+static void intelTexSubimage (GLcontext *ctx,
+			      GLint dims,
+			      GLenum target, GLint level,
+			      GLint xoffset, GLint yoffset,
+			      GLint width, GLint height,
+			      GLenum format, GLenum type, const void *pixels,
+			      const struct gl_pixelstore_attrib *packing,
+			      struct gl_texture_object *texObj,
+			      struct gl_texture_image *texImage)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   GLuint dstImageStride = 0;
+   GLuint dstRowStride;
+
+   DBG("%s target %s level %d offset %d,%d %dx%d\n", __FUNCTION__,
+		_mesa_lookup_enum_by_nr(target),
+		level,
+		xoffset, yoffset,
+		width, height);
+
+   pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, 1, format, type,
+                                        pixels, packing, "glTexSubImage2D");
+   if (!pixels)
+      return;
+
+   LOCK_HARDWARE(intel);
+
+   /* Map buffer if necessary.  Need to lock to prevent other contexts
+    * from uploading the buffer under us.
+    */
+   if (intelImage->mt) 
+      texImage->Data = intel_miptree_image_map(intel, 
+					       intelImage->mt, 
+					       intelImage->face, 
+					       intelImage->level, 
+					       &dstRowStride );
+      
+
+   if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat,
+					texImage->TexFormat,
+					texImage->Data,
+					xoffset, yoffset, 0,
+					dstRowStride, dstImageStride,
+					width, height, 1,
+					format, type, pixels, packing)) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+   }
+
+#if 0
+   /* GL_SGIS_generate_mipmap */
+   if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
+      _mesa_generate_mipmap(ctx, target,
+                            &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
+                            texObj);
+   }
+#endif
+
+   _mesa_unmap_teximage_pbo(ctx, packing);
+
+   if (intelImage->mt) {
+      intel_miptree_image_unmap(intel, intelImage->mt);
+      texImage->Data = NULL;
+   }
+
+   UNLOCK_HARDWARE(intel);
+}
+
+
+
+
+
+
+void intelTexSubImage2D(GLcontext *ctx,
+			GLenum target,
+			GLint level,
+			GLint xoffset, GLint yoffset,
+			GLsizei width, GLsizei height,
+			GLenum format, GLenum type,
+			const GLvoid *pixels,
+			const struct gl_pixelstore_attrib *packing,
+			struct gl_texture_object *texObj,
+			struct gl_texture_image *texImage)
+{
+
+   intelTexSubimage(ctx, 2,
+		    target, level, xoffset, yoffset, width,
+		    height, format, type, pixels, packing, texObj,
+		    texImage);
+
+}
+
+
+void intelTexSubImage1D(GLcontext *ctx,
+			GLenum target,
+			GLint level,
+			GLint xoffset,
+			GLsizei width,
+			GLenum format, GLenum type,
+			const GLvoid *pixels,
+			const struct gl_pixelstore_attrib *packing,
+			struct gl_texture_object *texObj,
+			struct gl_texture_image *texImage)
+{
+   intelTexSubimage(ctx, 1,
+		    target, level, xoffset, 0, width,
+		    1, format, type, pixels, packing, texObj,
+		    texImage);
+
+}
--- a/src/mesa/drivers/dri/i915/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_validate.c
@@ -0,0 +1,349 @@
+#include "mtypes.h"
+#include "macros.h"
+
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+#include "bufmgr.h"
+
+/**
+ * Compute which mipmap levels that really need to be sent to the hardware.
+ * This depends on the base image size, GL_TEXTURE_MIN_LOD,
+ * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+ */
+static void intel_calculate_first_last_level( struct intel_texture_object *intelObj )
+{
+   struct gl_texture_object *tObj = &intelObj->base;
+   const struct gl_texture_image * const baseImage =
+       tObj->Image[0][tObj->BaseLevel];
+
+   /* These must be signed values.  MinLod and MaxLod can be negative numbers,
+    * and having firstLevel and lastLevel as signed prevents the need for
+    * extra sign checks.
+    */
+   int   firstLevel;
+   int   lastLevel;
+
+   /* Yes, this looks overly complicated, but it's all needed.
+    */
+   switch (tObj->Target) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+   case GL_TEXTURE_CUBE_MAP:
+      if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
+         /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
+          */
+         firstLevel = lastLevel = tObj->BaseLevel;
+      }
+      else {
+	 firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
+	 firstLevel = MAX2(firstLevel, tObj->BaseLevel);
+	 lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
+	 lastLevel = MAX2(lastLevel, tObj->BaseLevel);
+	 lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
+	 lastLevel = MIN2(lastLevel, tObj->MaxLevel);
+	 lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+      }
+      break;
+   case GL_TEXTURE_RECTANGLE_NV:
+   case GL_TEXTURE_4D_SGIS:
+      firstLevel = lastLevel = 0;
+      break;
+   default:
+      return;
+   }
+
+   /* save these values */
+   intelObj->firstLevel = firstLevel;
+   intelObj->lastLevel = lastLevel;
+}
+
+static void copy_image_data_to_tree( struct intel_context *intel,
+				     struct intel_mipmap_tree *mt,
+				     struct intel_texture_image *intelImage )
+{
+   if (intelImage->mt) {
+      /* Copy potentially with the blitter:
+       */
+      intel_miptree_image_copy(intel,
+			       mt,
+			       intelImage->face,
+			       intelImage->level,
+			       intelImage->mt);
+
+      intel_miptree_release(intel, intelImage->mt);
+   }
+   else {
+      assert(intelImage->base.Data != NULL);
+
+      /* More straightforward upload.  
+       */
+      intel_miptree_image_data(intel,
+			       mt,
+			       intelImage->face,
+			       intelImage->level,
+			       intelImage->base.Data,
+			       intelImage->base.RowStride);
+
+      free(intelImage->base.Data);
+      intelImage->base.Data = NULL;
+   }
+
+   intelImage->mt = intel_miptree_reference(mt);
+}
+
+
+/*  
+ */
+static GLuint intel_finalize_mipmap_tree( struct intel_context *intel, GLuint unit )
+{
+   struct gl_texture_object *tObj = intel->ctx.Texture.Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+
+   GLuint face, i;
+   GLuint nr_faces = 0;
+   struct intel_texture_image *firstImage;
+
+   /* We know/require this is true by now: 
+    */
+   assert(intelObj->base.Complete);
+
+   /* What levels must the tree include at a minimum?
+    */
+   intel_calculate_first_last_level( intelObj );
+   firstImage = intel_texture_image(intelObj->base.Image[0][intelObj->firstLevel]);
+
+   /* Fallback case:
+    */
+   if (firstImage->base.Border) {
+      if (intelObj->mt) {
+	 intel_miptree_release(intel, intelObj->mt);
+	 intelObj->mt = NULL;
+      }
+      return 0;
+   }
+
+
+   /* If both firstImage and intelObj have a tree which can contain
+    * all active images, favour firstImage.
+    */
+   if (firstImage->mt &&
+       firstImage->mt != intelObj->mt &&
+       firstImage->mt->first_level <= intelObj->firstLevel &&
+       firstImage->mt->last_level >= intelObj->lastLevel) {
+
+      if (intelObj->mt) 
+	 intel_miptree_release(intel, intelObj->mt);
+
+      intelObj->mt = intel_miptree_reference(firstImage->mt);
+   }
+
+   /* Check tree can hold all active levels.  Check tree matches
+    * target, imageFormat, etc.
+    * 
+    * XXX: For some layouts (eg i945?), the test might have to be
+    * first_level == firstLevel, as the tree isn't valid except at the
+    * original start level.  Hope to get around this by
+    * programming minLod, maxLod, baseLevel into the hardware and
+    * leaving the tree alone.
+    */
+   if (intelObj->mt &&
+       ((intelObj->mt->first_level > intelObj->firstLevel) ||
+	(intelObj->mt->last_level < intelObj->lastLevel) ||
+	(intelObj->mt->internal_format != firstImage->base.InternalFormat))) {
+      intel_miptree_release(intel, intelObj->mt);
+      intelObj->mt = NULL;
+   }
+      
+
+   /* May need to create a new tree:
+    */
+   if (!intelObj->mt) {
+      intelObj->mt = intel_miptree_create(intel,
+					  intelObj->base.Target,
+					  firstImage->base.InternalFormat,
+					  intelObj->firstLevel,
+					  intelObj->lastLevel,
+					  firstImage->base.Width,
+					  firstImage->base.Height,
+					  firstImage->base.Depth,
+					  firstImage->base.TexFormat->TexelBytes,
+					  firstImage->base.IsCompressed);
+   }
+
+   /* Pull in any images not in the object's tree:
+    */
+   nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   for (face = 0; face < nr_faces; face++) {
+      for (i = intelObj->firstLevel; i < intelObj->lastLevel; i++) {
+	 struct intel_texture_image *intelImage = 
+	    intel_texture_image(intelObj->base.Image[face][i]);
+	 
+	 /* Need to import images in main memory or held in other trees.
+	  */
+	 if (intelObj->mt != intelImage->mt) {
+	    copy_image_data_to_tree(intel,
+				    intelObj->mt,
+				    intelImage);
+	 }
+      }
+   }
+
+   return GL_TRUE;
+}
+
+void intel_add_texoffset_fixup( struct intel_context *intel,
+				GLuint unit,
+				GLuint *ptr )
+{
+   struct gl_texture_object *tObj = intel->ctx.Texture.Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+
+#if 0
+   struct intel_reloc *f = &intel->fixup[intel->nr_fixups++];
+   assert(intel->nr_fixups <= INTEL_MAX_FIXUP);
+   f->dest = ptr;
+   f->value = &intelObj->textureOffset;
+   f->delta = (intel->intelScreen->tex.offset + 
+	       intel_miptree_image_offset(intelObj->mt, 0, intelObj->firstLevel));
+#else
+   *ptr = (intelObj->textureOffset + 	   
+	   intel_miptree_image_offset(intelObj->mt, 0, intelObj->firstLevel));
+#endif
+}
+
+/* Fix up the command buffer:
+ */
+void intel_apply_fixups( struct intel_context *intel )
+{
+   GLuint i;
+
+   for (i = 0; i < intel->nr_fixups; i++) {
+      struct intel_reloc *f = &intel->fixup[i];
+      *f->dest = *f->value + f->delta;
+   }
+
+   intel->nr_fixups = 0;
+}
+
+
+
+/* One upshot of the new manager is that it should be possible to tell
+ * ahead of time whether a certain set of buffers will cause a
+ * fallback.  
+ *
+ * Unless we do this we either have to a) hold the DRI lock
+ * while emitting all vertices and fire after each vertex buffer, or
+ * b) build a fallback path that operates on i915 command streams
+ * rather than the state in the GLcontext.
+ */
+GLboolean intel_prevalidate_buffers( struct intel_context *intel )
+{
+   return GL_TRUE;		/* never fallback */
+}
+
+
+GLboolean intel_validate_buffers( struct intel_context *intel )
+{
+   GLcontext *ctx = &intel->ctx;
+   GLboolean ok = GL_TRUE;
+   GLuint i;
+
+   DBG("%s\n", __FUNCTION__);
+
+   assert(intel->locked);
+
+   /* Add the color and depth buffers so that fences associated with
+    * these buffers:
+    */
+   bmAddBuffer(intel->buffer_list,
+	       intel->draw_region->buffer,
+	       BM_WRITE,
+	       NULL,
+	       NULL);
+   
+   bmAddBuffer(intel->buffer_list,
+	       intel->depth_region->buffer,
+	       BM_WRITE,
+	       NULL,
+	       NULL);
+
+   /* Add each enabled texture:
+    */
+   for (i = 0 ; i < ctx->Const.MaxTextureUnits && ok ; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled) {
+	 struct gl_texture_object *tObj = intel->ctx.Texture.Unit[i]._Current;
+	 struct intel_texture_object *intelObj = intel_texture_object(tObj);
+
+	 ok = intel_finalize_mipmap_tree( intel, i );
+	 if (ok) {
+	    bmAddBuffer(intel->buffer_list,
+			intelObj->mt->region->buffer,
+			BM_READ,
+			NULL,
+			&intelObj->textureOffset);
+	 }
+      }
+   }
+
+   ok = bmValidateBufferList(intel->bm, intel->buffer_list, BM_MEM_AGP);
+   assert(ok);
+   return ok;
+}
+
+void intel_fence_buffers( struct intel_context *intel )
+{
+   assert(intel->locked);
+   assert(intel->buffer_list);
+}
+
+
+
+
+
+void intel_tex_map_images( struct intel_context *intel,
+			   struct intel_texture_object *intelObj )
+{
+   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   GLuint face, i;
+   
+   DBG("%s\n", __FUNCTION__);
+
+   for (face = 0; face < nr_faces; face++) {
+      for (i = intelObj->firstLevel; i < intelObj->lastLevel; i++) {
+	 struct intel_texture_image *intelImage = 
+	    intel_texture_image(intelObj->base.Image[face][i]);
+
+	 if (intelImage->mt) {
+	    intelImage->base.Data = 
+	       intel_miptree_image_map(intel, 
+				       intelImage->mt,
+				       intelImage->face,
+				       intelImage->level,
+				       &intelImage->base.RowStride);
+	 }
+      }
+   }
+}
+
+
+
+void intel_tex_unmap_images( struct intel_context *intel,
+			     struct intel_texture_object *intelObj )
+{
+   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   GLuint face, i;
+
+   for (face = 0; face < nr_faces; face++) {
+      for (i = intelObj->firstLevel; i < intelObj->lastLevel; i++) {
+	 struct intel_texture_image *intelImage = 
+	    intel_texture_image(intelObj->base.Image[face][i]);
+
+	 if (intelImage->mt) {
+	    intel_miptree_image_unmap(intel, intelImage->mt);
+	    intelImage->base.Data = NULL;
+	 }
+      }
+   }
+}
--- a/src/mesa/drivers/dri/i915/intel_tris.c
+++ b/src/mesa/drivers/dri/i915/intel_tris.c
@@ -42,10 +42,106 @@
 #include "intel_batchbuffer.h"
 #include "intel_reg.h"
 #include "intel_span.h"
+#include "intel_tex.h"

 static void intelRenderPrimitive( GLcontext *ctx, GLenum prim );
 static void intelRasterPrimitive( GLcontext *ctx, GLenum rprim, GLuint hwprim );

+/* The simplest but least-good technique for integrating new buffer
+ * management:
+ *
+ * LOCK_HARDWARE
+ *   validate_buffers
+ *   emit_state to batch
+ *   emit_vertices to batch
+ *   flush batch
+ *   fence_buffers
+ * UNLOCK_HARDWARE
+ *
+ * Will look later at ways to get the emit_state and emit_vertices out
+ * of the locked region - vertex buffers, second batch buffer for
+ * primitives, relocation fixups for texture addresses.
+ */
+static void intel_flush_inline_primitive( GLcontext *ctx )
+{
+   intelContextPtr intel = INTEL_CONTEXT( ctx );
+   GLuint used = intel->batch.ptr - intel->prim.start_ptr;
+
+   assert(intel->prim.primitive != ~0);
+
+   if (used < 8)
+      goto do_discard;
+
+   *(int *)intel->prim.start_ptr = (_3DPRIMITIVE | 
+				    intel->prim.primitive |
+				    (used/4-2));
+
+   goto finished;
+   
+ do_discard:
+   intel->batch.ptr -= used;
+   intel->batch.space += used;
+   assert(intel->batch.space >= 0);
+
+ finished:
+   intel->prim.primitive = ~0;
+   intel->prim.start_ptr = 0;
+   intel->prim.flush = 0;
+}
+
+
+/* Emit a primitive referencing vertices in a vertex buffer.
+ */
+void intelStartInlinePrimitive( intelContextPtr intel, GLuint prim )
+{
+   BATCH_LOCALS;
+   
+   /* Emit a slot which will be filled with the inline primitive
+    * command later.
+    */
+   BEGIN_BATCH(2);
+   OUT_BATCH( 0 );
+
+   intel->prim.start_ptr = batch_ptr;
+   intel->prim.primitive = prim;
+   intel->prim.flush = intel_flush_inline_primitive;
+
+   OUT_BATCH( 0 );
+   ADVANCE_BATCH();
+}
+
+
+void intelWrapInlinePrimitive( intelContextPtr intel )
+{
+   GLuint prim = intel->prim.primitive;
+
+   intel_flush_inline_primitive( &intel->ctx );
+   intelFlushBatch(intel, GL_TRUE);
+   intelInstallBatchBuffer( intel );    
+   intel_validate_buffers( intel );
+   intel->vtbl.emit_state( intel );
+   intelStartInlinePrimitive( intel, prim );
+}
+
+GLuint *intelExtendInlinePrimitive( intelContextPtr intel, 
+				    GLuint dwords )
+{
+   GLuint sz = dwords * sizeof(GLuint);
+   GLuint *ptr;
+
+   if (intel->batch.space < sz) {
+      intelWrapInlinePrimitive( intel );
+   }
+
+   ptr = (GLuint *)intel->batch.ptr;
+   intel->batch.ptr += sz;
+   intel->batch.space -= sz;
+
+   return ptr;
+}
+
+
+
 /***********************************************************************
 *                    Emit primitives as inline vertices               *
 ***********************************************************************/
@@ -520,22 +616,6 @@ intel_fallback_line( intelContextPtr intel,
 }


-static void
-intel_fallback_point( intelContextPtr intel,
-		     intelVertex *v0 )
-{
-   GLcontext *ctx = &intel->ctx;
-   SWvertex v[1];
-
-   if (0)
-      fprintf(stderr, "\n%s\n", __FUNCTION__);
-
-   _swsetup_Translate( ctx, v0, &v[0] );
-   intelSpanRenderStart( ctx );
-   _swrast_Point( ctx, &v[0] );
-   intelSpanRenderFinish( ctx );
-}
-


 /**********************************************************************/
@@ -630,12 +710,8 @@ static void intelFastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,



-#define POINT_FALLBACK (0)
-#define LINE_FALLBACK (DD_LINE_STIPPLE)
-#define TRI_FALLBACK (0)
-#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK|\
-                            DD_TRI_STIPPLE|DD_POINT_ATTEN)
-#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
+#define ANY_FALLBACK_FLAGS (DD_LINE_STIPPLE | DD_TRI_STIPPLE | DD_POINT_ATTEN)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE | DD_TRI_OFFSET | DD_TRI_UNFILLED)

 void intelChooseRenderState(GLcontext *ctx)
 {
@@ -672,19 +748,14 @@ void intelChooseRenderState(GLcontext *ctx)
 	 intel->draw_tri = intel_draw_triangle;
      }

+#if 0
      /* Hook in fallbacks for specific primitives.
       */
      if (flags & ANY_FALLBACK_FLAGS)
      {
-	 if (flags & POINT_FALLBACK)
-	    intel->draw_point = intel_fallback_point;
-
-	 if (flags & LINE_FALLBACK)
+	 if (flags & DD_LINE_STIPPLE)
 	    intel->draw_line = intel_fallback_line;

-	 if (flags & TRI_FALLBACK)
-	    intel->draw_tri = intel_fallback_tri;
-
 	 if ((flags & DD_TRI_STIPPLE) && !intel->hw_stipple) 
 	    intel->draw_tri = intel_fallback_tri;

@@ -693,6 +764,8 @@ void intelChooseRenderState(GLcontext *ctx)

 	 index |= INTEL_FALLBACK_BIT;
      }
+#endif
+
   }

   if (intel->RenderIndex != index) {
@@ -760,13 +833,28 @@ static void intelRunPipeline( GLcontext *ctx )

 static void intelRenderStart( GLcontext *ctx )
 {
-   INTEL_CONTEXT(ctx)->vtbl.render_start( INTEL_CONTEXT(ctx) );
+   struct intel_context *intel = intel_context(ctx);
+
+   intel->vtbl.render_start( INTEL_CONTEXT(ctx) );
+
+   LOCK_HARDWARE(intel);
+   intelInstallBatchBuffer( intel );    
+   intel_validate_buffers( intel );
+   intel->vtbl.emit_state( intel );
 }

 static void intelRenderFinish( GLcontext *ctx )
 {
-   if (INTEL_CONTEXT(ctx)->RenderIndex & INTEL_FALLBACK_BIT)
+   struct intel_context *intel = intel_context(ctx);
+
+   if (intel->RenderIndex & INTEL_FALLBACK_BIT)
      _swrast_flush( ctx );
+
+   if (intel->prim.flush)
+      intel->prim.flush(ctx);
+
+   intelFlushBatch(intel, GL_TRUE); 
+   UNLOCK_HARDWARE(intel);
 }


--- a/src/mesa/swrast/s_context.c
+++ b/src/mesa/swrast/s_context.c
@@ -32,6 +32,7 @@
 #include "colormac.h"
 #include "mtypes.h"
 #include "program.h"
+#include "teximage.h"
 #include "swrast.h"
 #include "s_blend.h"
 #include "s_context.h"
@@ -368,6 +369,83 @@ _swrast_validate_blend_func( GLcontext *ctx, GLuint n,
 }


+/**
+ * Make sure we have texture image data for all the textures we may need
+ * for subsequent rendering.
+ */
+static void
+_swrast_validate_texture_images(GLcontext *ctx)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLuint u;
+
+   if (!swrast->ValidateTextureImage || !ctx->Texture._EnabledUnits) {
+      /* no textures enabled, or no way to validate images! */
+      return;
+   }
+
+   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
+      if (ctx->Texture.Unit[u]._ReallyEnabled) {
+         struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
+         ASSERT(texObj);
+         if (texObj) {
+            GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+            GLuint face;
+            for (face = 0; face < numFaces; face++) {
+               GLuint lvl;
+               for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
+                  struct gl_texture_image *texImg = texObj->Image[face][lvl];
+                  if (texImg && !texImg->Data) {
+                     swrast->ValidateTextureImage(ctx, texObj, face, lvl);
+                     ASSERT(texObj->Image[face][lvl]->Data);
+                  }
+               }
+            }
+         }
+      }
+   }
+}
+
+
+/**
+ * Free the texture image data attached to all currently enabled
+ * textures.  Meant to be called by device drivers when transitioning
+ * from software to hardware rendering.
+ */
+void
+_swrast_eject_texture_images(GLcontext *ctx)
+{
+   GLuint u;
+
+   if (!ctx->Texture._EnabledUnits) {
+      /* no textures enabled */
+      return;
+   }
+
+   for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) {
+      if (ctx->Texture.Unit[u]._ReallyEnabled) {
+         struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current;
+         ASSERT(texObj);
+         if (texObj) {
+            GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+            GLuint face;
+            for (face = 0; face < numFaces; face++) {
+               GLuint lvl;
+               for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) {
+                  struct gl_texture_image *texImg = texObj->Image[face][lvl];
+                  if (texImg && texImg->Data) {
+                     _mesa_free_texmemory(texImg->Data);
+                     texImg->Data = NULL;
+                  }
+               }
+            }
+         }
+      }
+   }
+}
+
+
+
 static void
 _swrast_sleep( GLcontext *ctx, GLbitfield new_state )
 {
@@ -452,6 +530,9 @@ _swrast_validate_derived( GLcontext *ctx )
      if (swrast->NewState & _NEW_TEXTURE)
         _swrast_update_texture_samplers( ctx );

+      if (swrast->NewState & (_NEW_TEXTURE | _NEW_PROGRAM))
+         _swrast_validate_texture_images( ctx );
+
      swrast->NewState = 0;
      swrast->StateChanges = 0;
      swrast->InvalidateState = _swrast_invalidate_state;
--- a/src/mesa/swrast/s_context.h
+++ b/src/mesa/swrast/s_context.h
@@ -226,6 +226,11 @@ typedef void (*swrast_tri_func)( GLcontext *ctx, const SWvertex *,
                                 const SWvertex *, const SWvertex *);


+typedef void (*validate_texture_image_func)(GLcontext *ctx,
+                                            struct gl_texture_object *texObj,
+                                            GLuint face, GLuint level);
+
+
 /** \defgroup Bitmasks
 * Bitmasks to indicate which rasterization options are enabled
 * (RasterMask)
@@ -354,6 +359,8 @@ typedef struct
    */
   GLchan *TexelBuffer;

+   validate_texture_image_func ValidateTextureImage;
+
 } SWcontext;


--- a/src/mesa/swrast/swrast.h
+++ b/src/mesa/swrast/swrast.h
@@ -244,6 +244,10 @@ _swrast_copy_texsubimage3d(GLcontext *ctx,
                           GLint x, GLint y, GLsizei width, GLsizei height);


+extern void
+_swrast_eject_texture_images(GLcontext *ctx);
+
+
 /**
 * The driver interface for the software rasterizer.
 * XXX this may go away.
Author	SHA1	Message	Date
Keith Whitwell	11c0215bf8	Switch between memcpy implementations according to src/dest alignment.	2006-02-01 18:42:16 +00:00
Keith Whitwell	6a13b6c346	Debug off	2006-02-01 18:30:16 +00:00
Keith Whitwell	a103097ee5	Quieten depend	2006-02-01 18:29:56 +00:00
Keith Whitwell	7efad0d84c	Just build the i915 on this branch	2006-02-01 18:29:25 +00:00
Keith Whitwell	42c88cd072	subtexrate test on this branch too	2006-02-01 18:14:09 +00:00
Keith Whitwell	2541c54e79	gearbox demo on this branch too	2006-02-01 18:13:23 +00:00
Keith Whitwell	33529e3d8e	Bump driver date	2006-02-01 18:01:04 +00:00
Keith Whitwell	84c1b82081	Disable check_copytex_fragment_ops - it doesn't really apply.	2006-02-01 16:40:23 +00:00
Keith Whitwell	a97a1439ae	Get batchbuffers working natively again. This code still relies over-heavily on the DRI lock to protect offsets in the command stream from changing before being queued on the ring, and should be viewed as being a pretty temporary mechanism before a more robust alternative is implemented, most likely based on a batchbuffer relocation list that will be used to patch prebuilt batchbuffers after validation takes place.	2006-02-01 15:54:42 +00:00
Keith Whitwell	2fd2910010	Remove the last of the old AllocateAgp mechanism. Move the batchbuffer code to being effectively a ring of bufmgr buffers.	2006-02-01 15:51:31 +00:00
Keith Whitwell	fbbda155e0	Add code to map/unmap all texture images for the software rasterizer. Add the color and depth buffers to the validation list to ensure they are fenced correctly by hardware rasterization.	2006-02-01 15:50:14 +00:00
Keith Whitwell	a49c3c0fae	Ensure that color buffers and textures are mapped (bmBufferMap) before software rasterizer fallbacks. This has two functions, firstly to ensure that the Data pointers point to something and secondly to ensure than any pending fences on those buffers are discharged before allowing the software rasterizer to read/write the data. This needs to be integrated with Brian's validate code.	2006-02-01 15:48:52 +00:00
Keith Whitwell	baf5998d59	file gearbox.c was added on branch texman_0_1_branch on 2006-02-01 18:13:23 +0000	2006-01-30 15:35:35 +00:00
Keith Whitwell	4c5acef241	Get hardware-accelerated CopyTexSubImage working well enough to run Brian's gearbox demo.	2006-01-28 23:28:14 +00:00
Keith Whitwell	9839e272cf	Allow ValidateBuffers to allocate memory for buffers which haven't yet got it by other methods. Typically this is buffers being written to by hardware excluding the fixed front/back/depth buffers which are have pre-allocated memory. At some point will want to pass BM_READ/BM_WRITE flags to catch the couple of cases where buffers are treated differently in each case.	2006-01-28 23:27:39 +00:00
Keith Whitwell	b57e79ff14	remove debug	2006-01-28 17:52:50 +00:00
Keith Whitwell	43824acb4e	Build fixes	2006-01-28 17:52:05 +00:00
Keith Whitwell	d8f509e749	Use the x86 __memcpy to avoid performance clif for uploads where the source data is worse than 64-byte aligned.	2006-01-28 17:50:59 +00:00
Keith Whitwell	eb91c93c2e	file subtexrate.c was added on branch texman_0_1_branch on 2006-02-01 18:14:09 +0000	2006-01-27 15:44:40 +00:00
Keith Whitwell	79de983b6f	initial copytexsubimage code, untested	2006-01-27 11:58:00 +00:00
Brian Paul	fdb3acf016	added _swrast_eject_texture_images()	2006-01-27 03:42:56 +00:00
Keith Whitwell	2807d1f58a	Get readbuffer correctly.	2006-01-26 18:30:34 +00:00
Keith Whitwell	93f913926e	- Remove (most of) old agp client memory hack. - Implement an accelerated version of glCopyPixels using the blitter.	2006-01-26 17:21:38 +00:00
Keith Whitwell	33ca04f379	Expand the buffer manager to include a notion of multiple pools within a memory space. Allow some pools to be excluded from the upload/evict processing, meaning that any buffers within those pools are effectively fixed. Add a mechanism to create buffers in those pools. This allows the legacy fixed front/depth/back buffers to be represented in this scheme and will allow other future pinned buffers to be allocated from fixed pools in such a way that they cannot fragment the rest of the texture memory.	2006-01-26 14:50:02 +00:00
Brian Paul	398cb30c72	Added _swrast_validate_texture_images() to make sure all textures have data resident for software rasterization. Relies on new swrast driver function: ValidateTextureImage()	2006-01-26 04:05:53 +00:00
Keith Whitwell	f67bb30314	Fix confusion over pitch. demos/texobj renders correctly.	2006-01-25 17:16:46 +00:00
Keith Whitwell	4578d7b9f0	Offset returns weren't being returned.	2006-01-25 16:57:36 +00:00
Keith Whitwell	dcdfc154c3	Remove silly debug.	2006-01-25 16:46:10 +00:00
Keith Whitwell	696ba32779	Another texture manager checkpoint: - Add code to validate textures before use. - Simplify vertex paths for now. - Make vertex paths validate textures. This is done pretty with some pretty heavy-handed use of the dri lock - fixing this is a priority. - Add lots of debug statements demos/texobj renders, but textures are incorrect.	2006-01-25 15:40:50 +00:00
Keith Whitwell	dcfe55539f	Remove dead code. Gears runs.	2006-01-24 18:55:52 +00:00
Keith Whitwell	ff84b1f1b2	Checkpoint of texture manager rework for i915. Compiles but won't do any more than that.	2006-01-24 18:35:53 +00:00
Keith Whitwell	3bca9c47f4	Add another layer on top of the simple 2d regions in intel_regions.[ch] which keeps track of a whole, well-defined mipmap tree. These are a fixed layout on intel hardware and managing them is complicated in the face of GL's TexImage function calls where data can arrive in any order, making it difficult to guess a layout ahead of time. Wrapping mipmap trees up in a struct and programming interface like this reduces the burden elsewhere.	2006-01-24 16:38:43 +00:00
Keith Whitwell	d4d7fdb43b	layer 2d region semantics and blits on top of bufmgr buffers	2006-01-23 12:53:11 +00:00
Keith Whitwell	d65dab5777	import via texture semantics	2006-01-23 12:52:28 +00:00
Keith Whitwell	638ca019ef	Add a faked-out implementation of the buffer manager that uses the same techniques as the old dri memory manager behind the new interface. Will use this to port the i915 driver to this interface to hopefully get some easy insights from using the interface.	2006-01-16 15:30:45 +00:00