docs: Add release notes for 10.0.3 release.

Just before making the actual release.
Update version to 10.0.3
2014-02-03 11:21:23 -08:00 · 2014-02-03 11:17:06 -08:00 · 2014-02-03 09:59:52 -08:00 · 2014-02-03 09:59:52 -08:00 · 2014-02-03 09:59:37 -08:00 · 2014-01-31 13:01:44 -08:00
98 changed files with 1355 additions and 152 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-10.0.1
+10.0.3
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -11,3 +11,20 @@ f0f202e6b764be803470e27cba9102f14361ae22 glx: conditionaly build dri3 and presen
 # the 10.0 branch.  See
 # http://lists.freedesktop.org/archives/mesa-stable/2013-December/000500.html
 a057b837ddd1c725a7504eedc53c6df05a012773 egl: add HAVE_LIBDRM define, fix EGL X11 platform
+
+# Author requested skipping due to regressions
+# Picking it would require at least also picking:
+# 73c3c7e3, 3e0e9e3b, c59a605c
+b2d1c579bb84a88179072a6a783f8827e218db55 glcpp: Set extension defines after resolving the GLSL version.
+
+# These patches depend on other code not in stable branch.
+# (at least 3b22146dc714b6090f7423abbc4df53d7d1fdaa9)
+e190709119d8eb85c67bfbad5be699d39ad0118e mesa: Ensure that transform feedback refers to the correct program.
+43e77215b13b2f86e461cd8a62b542fc6854dd1c i965/gen7: Use to the correct program when uploading transform feedback state.
+
+# Author requested to ignore these four (since they depend on commits not in
+# stable).
+3313cc269bd428ca96a132d86da5fddc0f27386a i965: Add an option to ignore sample qualifier
+a92e5f7cf63d496ad7830b5cea4bbab287c25b8e i965: Use sample barycentric coordinates with per sample shading
+f5cfb4ae21df8eebfc6b86c0ce858b1c0a9160dd i965: Ignore 'centroid' interpolation qualifier in case of persample shading
+dc2f94bc786768329973403248820a2e5249f102 i965: Ignore 'centroid' interpolation qualifier in case of persample shading
--- a/common.py
+++ b/common.py
@@ -100,4 +100,4 @@ def AddOptions(opts):
 	opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes'))
 	opts.Add(BoolOption('texture_float', 'enable floating-point textures and renderbuffers', 'no'))
 	if host_platform == 'windows':
-		opts.Add(EnumOption('MSVC_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0', '10.0', '11.0')))
+		opts.Add(EnumOption('MSVC_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0', '10.0', '11.0', '12.0')))
--- a/configure.ac
+++ b/configure.ac
@@ -227,6 +227,12 @@ esac
 AC_SUBST([VISIBILITY_CFLAGS])
 AC_SUBST([VISIBILITY_CXXFLAGS])

+dnl
+dnl Optional flags, check for compiler support
+dnl
+AX_CHECK_COMPILE_FLAG([-msse4.1], [SSE41_SUPPORTED=1], [SSE41_SUPPORTED=0])
+AM_CONDITIONAL([SSE41_SUPPORTED], [test x$SSE41_SUPPORTED = x1])
+
 dnl
 dnl Hacks to enable 32 or 64 bit build
 dnl
@@ -1527,9 +1533,9 @@ if test "x$enable_gallium_llvm" = xauto; then
 fi
 if test "x$enable_gallium_llvm" = xyes; then
    if test "x$llvm_prefix" != x; then
-        AC_PATH_PROG([LLVM_CONFIG], [llvm-config], [no], ["$llvm_prefix/bin"])
+        AC_PATH_TOOL([LLVM_CONFIG], [llvm-config], [no], ["$llvm_prefix/bin"])
    else
-        AC_PATH_PROG([LLVM_CONFIG], [llvm-config], [no])
+        AC_PATH_TOOL([LLVM_CONFIG], [llvm-config], [no])
    fi

    if test "x$LLVM_CONFIG" != xno; then
--- a/docs/relnotes/10.0.1.html
+++ b/docs/relnotes/10.0.1.html
@@ -31,6 +31,9 @@ because compatibility contexts not supported.

 <h2>MD5 checksums</h2>
 <pre>
+0a72ca5b36046a658bf6038326ff32ed  MesaLib-10.0.1.tar.bz2
+01bde35c912e504ba62caf1ef9f7022c  MesaLib-10.0.1.tar.gz
+59a174a11a89e6b1b8ee9c3f7e3c388c  MesaLib-10.0.1.zip
 </pre>


--- a/docs/relnotes/10.0.2.html
+++ b/docs/relnotes/10.0.2.html
@@ -0,0 +1,161 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.0.2 Release Notes / (January 9, 2014)</h1>
+
+<p>
+Mesa 10.0.2 is a bug fix release which fixes bugs found since the 10.0.1 release.
+</p>
+<p>
+Mesa 10.0.2 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts not supported.
+</p>
+
+
+<h2>MD5 checksums</h2>
+<pre>
+de7d14baf0101b697c140d2f47ef27e9  MesaLib-10.0.2.tar.gz
+8544c0ab3e438a08b5103421ea15b6d2  MesaLib-10.0.2.tar.bz2
+181b0d6c1afca38e98a930d0e564ed90  MesaLib-10.0.2.zip
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=70740">Bug 70740</a> - HiZ on SNB causes GPU hang with WebGL web app</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=72026">Bug 72026</a> - SIGSEGV in fs_visitor::visit(ir_dereference_variable*)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=72264">Bug 72264</a> - GLSL error reporting</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=72369">Bug 72369</a> - glitches in serious sam 3 with the sb shader backend</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<p>The full set of changes can be viewed by using the following git command:</p>
+
+<pre>
+  git log mesa-10.0.1..mesa-10.0.2
+</pre>
+
+<p>Aaron Watry (8):</p>
+<ul>
+  <li>clover: Remove unused variable</li>
+  <li>pipe_loader/sw: close dev-&gt;lib when initialization fails</li>
+  <li>radeon/compute: Stop leaking LLVMContexts in radeon_llvm_parse_bitcode</li>
+  <li>r600/compute: Free compiled kernels when deleting compute state</li>
+  <li>r600/compute: Use the correct FREE macro when deleting compute state</li>
+  <li>radeon/llvm: Free target data at end of optimization</li>
+  <li>st/vdpau: Destroy context when initialization fails</li>
+  <li>r600/pipe: Stop leaking context-&gt;start_compute_cs_cmd.buf on EG/CM</li>
+</ul>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>r600g: fix SUMO2 pci id</li>
+</ul>
+
+<p>Alexander von Gluck IV (1):</p>
+<ul>
+  <li>Haiku: Add in public GL kit headers</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>mesa: Fix error code generation in glBeginConditionalRender()</li>
+</ul>
+
+<p>Carl Worth (2):</p>
+<ul>
+  <li>docs: Add md5sums for the 10.0.1 release.</li>
+  <li>Update version to 10.0.2</li>
+</ul>
+
+<p>Chad Versace (1):</p>
+<ul>
+  <li>i965/gen6: Fix HiZ hang in WebGL Google Maps</li>
+</ul>
+
+<p>Erik Faye-Lund (1):</p>
+<ul>
+  <li>glcpp: error on multiple #else/#elif directives</li>
+</ul>
+
+<p>Henri Verbeet (1):</p>
+<ul>
+  <li>i915: Add support for gl_FragData[0] reads.</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nv50: fix a small leak on context destroy</li>
+</ul>
+
+<p>Jonathan Liu (2):</p>
+<ul>
+  <li>st/mesa: use pipe_sampler_view_release()</li>
+  <li>llvmpipe: use pipe_sampler_view_release() to avoid segfault</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>i965: Fix 3DSTATE_PUSH_CONSTANT_ALLOC_PS packet creation.</li>
+  <li>Revert "mesa: Remove GLXContextID typedef from glx.h."</li>
+</ul>
+
+<p>Kevin Rogovin (1):</p>
+<ul>
+  <li>Use line number information from entire function expression</li>
+</ul>
+
+<p>Kristian Høgsberg (1):</p>
+<ul>
+  <li>dri_util: Don't assume __DRIcontext-&gt;driverPrivate is a gl_context</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>mesa: fix interpretation of glClearBuffer(drawbuffer)</li>
+  <li>st/mesa: fix glClear with multiple colorbuffers and different formats</li>
+</ul>
+
+<p>Paul Berry (2):</p>
+<ul>
+  <li>glsl: Teach ir_variable_refcount about ir_loop::counter variables.</li>
+  <li>glsl: Fix inconsistent assumptions about ir_loop::counter.</li>
+</ul>
+
+<p>Vadim Girlin (1):</p>
+<ul>
+  <li>r600g/sb: fix stack size computation on evergreen</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.0.3.html
+++ b/docs/relnotes/10.0.3.html
@@ -0,0 +1,203 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.0.3 Release Notes / (February 3, 2014)</h1>
+
+<p>
+Mesa 10.0.3 is a bug fix release which fixes bugs found since the 10.0.2 release.
+</p>
+<p>
+Mesa 10.0.3 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts not supported.
+</p>
+
+
+<h2>MD5 checksums</h2>
+<pre>
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=72708">Bug 72708</a> - Master fails to build with older gcc due to -msse4.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=72926">Bug 72926</a> - [REGRESSION,swrast] Memory-related crash with anti-aliasing enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73096">Bug 73096</a> - Query GL_RGBA_SIGNED_COMPONENTS_EXT missing</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73100">Bug 73100</a> - Please use AC_PATH_TOOL instead of AC_PATH_PROG for llvm-config</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73418">Bug 73418</a> - OpenCL hangs graphics on CAYMAN</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73473">Bug 73473</a> - Potential crash bug in src/gallium/auxiliary/rtasm/rtasm_execmem.c</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73915">Bug 73915</a> - sample shading + centroid broken since f5cfb4a</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73956">Bug 73956</a> - SIGSEGV when passing GL_NONE to glReadBuffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74026">Bug 74026</a> - Compiler rejects chained assignments involving array dereferences</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<p>The full set of changes can be viewed by using the following git command:</p>
+
+<pre>
+  git log mesa-10.0.2..mesa-10.0.3
+</pre>
+
+<p>Aaron Watry (2):</p>
+<ul>
+  <li>radeon: Move gfx/dma cs cleanup to r600_common_context_cleanup</li>
+  <li>st/dri: prevent leak of dri option default values</li>
+</ul>
+
+<p>Andreas Fänger (1):</p>
+<ul>
+  <li>swrast: fix delayed texel buffer allocation regression for OpenMP</li>
+</ul>
+
+<p>Anuj Phogat (3):</p>
+<ul>
+  <li>glsl: Disable ARB_texture_rectangle in shader version 100.</li>
+  <li>i965: Use sample barycentric coordinates with per sample shading</li>
+  <li>i965: Ignore 'centroid' interpolation qualifier in case of persample shading</li>
+</ul>
+
+<p>Brian Paul (3):</p>
+<ul>
+  <li>mesa: implement missing glGet(GL_RGBA_SIGNED_COMPONENTS_EXT) query</li>
+  <li>st/mesa: fix glReadBuffer(GL_NONE) segfault</li>
+  <li>draw: fix incorrect vertex size computation in LLVM drawing code</li>
+</ul>
+
+<p>Carl Worth (5):</p>
+<ul>
+  <li>Add md5sums for 10.0.2. release.</li>
+  <li>cherry-ignore: Ignore several patches not yet ready for the stable branch</li>
+  <li>Drop another couple of patches.</li>
+  <li>cherry-ignore: Ignore 4 patches at teh request of the author, (Anuj).</li>
+  <li>Update version to 10.0.3</li>
+</ul>
+
+<p>Chad Versace (1):</p>
+<ul>
+  <li>i965/gen6/blorp: Emit more flushes to workaround hangs</li>
+</ul>
+
+<p>Chris Forbes (1):</p>
+<ul>
+  <li>i965: fold offset into coord for textureOffset(gsampler2DRect)</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>mesa: use signed temporary variable to store _ColorDrawBufferIndexes</li>
+  <li>st/mesa: use signed temporary variable to store _ColorDrawBufferIndexes</li>
+  <li>nv50: access only the available amount of textures</li>
+  <li>nv50: access only the available amount of constbuf</li>
+  <li>gallium/rtasm: handle mmap failures appropriately</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>i965: Fix handling of MESA_pack_invert in blit (PBO) readpixels.</li>
+  <li>i965: Don't do the temporary-and-blit-copy for INVALIDATE_RANGE maps.</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>mesa: Add COMPRESSED_RGBA_S3TC_DXT1_EXT to COMPRESSED_TEXTURE_FORMATS for GLES</li>
+  <li>radeon / r200: Pass the API into _mesa_initialize_context</li>
+</ul>
+
+<p>Ilia Mirkin (2):</p>
+<ul>
+  <li>mesa: fix GL_COLOR_SUM enum for drivers without ARB_vertex_program</li>
+  <li>st/vdpau: don't return a device if the screen doesn't support NPOT</li>
+</ul>
+
+<p>José Fonseca (1):</p>
+<ul>
+  <li>mesa: Use IROUND instead of roundf.</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>glsl: Rename "expr" to "lhs_expr" in vector_extract munging code.</li>
+  <li>glsl: Fix chained assignments of vector channels.</li>
+</ul>
+
+<p>Lauri Kasanen (1):</p>
+<ul>
+  <li>mesa: Fix build to properly check for supported compiler flags</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>st/mesa: use sRGB formats for MSAA resolving if destination is sRGB</li>
+  <li>gallium/util: util_format_srgb should not return FORMAT_NONE for sRGB formats</li>
+</ul>
+
+<p>Matt Turner (2):</p>
+<ul>
+  <li>glcpp: Define GL_EXT_shader_integer_mix in both GL and ES.</li>
+  <li>glx: Update glxext.h to revision 24777.</li>
+</ul>
+
+<p>Michał Górny (1):</p>
+<ul>
+  <li>Use AC_PATH_TOOL instead of AC_PATH_PROG for llvm-config.</li>
+</ul>
+
+<p>Paul Berry (1):</p>
+<ul>
+  <li>i965: Ensure that all necessary state is re-emitted if we run out of aperture.</li>
+</ul>
+
+<p>Paul Seidler (1):</p>
+<ul>
+  <li>build: move ARCH_LIBS definition outside of ASM definition</li>
+</ul>
+
+<p>Thomas Sondergaard (4):</p>
+<ul>
+  <li>mesa: Preliminary support for MSVC_VERSION=12.0</li>
+  <li>mesa: Fix compile error with MSVC 2013</li>
+  <li>mesa: Work around internal compiler error</li>
+  <li>mesa: Namespace qualify fma to override ambiguity with fma from math.h</li>
+</ul>
+
+<p>Tom Stellard (1):</p>
+<ul>
+  <li>r600g/compute: Emit DEALLOC_STATE on cayman after dispatching a compute shader.</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/include/GL/glx.h
+++ b/include/GL/glx.h
@@ -168,6 +168,7 @@ typedef XID GLXDrawable;
 /* GLX 1.3 and later */
 typedef struct __GLXFBConfigRec *GLXFBConfig;
 typedef XID GLXFBConfigID;
+typedef XID GLXContextID;
 typedef XID GLXWindow;
 typedef XID GLXPbuffer;

--- a/include/GL/glxext.h
+++ b/include/GL/glxext.h
@@ -33,10 +33,10 @@ extern "C" {
 ** used to make the header, and the header can be found at
 **   http://www.opengl.org/registry/
 **
-** Khronos $Revision: 23422 $ on $Date: 2013-10-08 15:40:45 -0700 (Tue, 08 Oct 2013) $
+** Khronos $Revision: 24777 $ on $Date: 2014-01-14 14:02:32 -0800 (Tue, 14 Jan 2014) $
 */

-#define GLX_GLXEXT_VERSION 20131008
+#define GLX_GLXEXT_VERSION 20140114

 /* Generated C header for:
 * API: glx
@@ -49,6 +49,7 @@ extern "C" {

 #ifndef GLX_VERSION_1_3
 #define GLX_VERSION_1_3 1
+typedef XID GLXContextID;
 typedef struct __GLXFBConfigRec *GLXFBConfig;
 typedef XID GLXWindow;
 typedef XID GLXPbuffer;
@@ -272,7 +273,6 @@ __GLXextFuncPtr glXGetProcAddressARB (const GLubyte *procName);

 #ifndef GLX_EXT_import_context
 #define GLX_EXT_import_context 1
-typedef XID GLXContextID;
 #define GLX_SHARE_CONTEXT_EXT             0x800A
 #define GLX_VISUAL_ID_EXT                 0x800B
 #define GLX_SCREEN_EXT                    0x800C
@@ -433,6 +433,14 @@ void glXCopyImageSubDataNV (Display *dpy, GLXContext srcCtx, GLuint srcName, GLe
 #endif
 #endif /* GLX_NV_copy_image */

+#ifndef GLX_NV_delay_before_swap
+#define GLX_NV_delay_before_swap 1
+typedef Bool ( *PFNGLXDELAYBEFORESWAPNVPROC) (Display *dpy, GLXDrawable drawable, GLfloat seconds);
+#ifdef GLX_GLXEXT_PROTOTYPES
+Bool glXDelayBeforeSwapNV (Display *dpy, GLXDrawable drawable, GLfloat seconds);
+#endif
+#endif /* GLX_NV_delay_before_swap */
+
 #ifndef GLX_NV_float_buffer
 #define GLX_NV_float_buffer 1
 #define GLX_FLOAT_COMPONENTS_NV           0x20B0
--- a/include/HaikuGL/GLRenderer.h
+++ b/include/HaikuGL/GLRenderer.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2006, Philippe Houdoin. All rights reserved.
+ * Distributed under the terms of the MIT License.
+
+ * This header defines BGLRenderer, the base class making up
+ * the Haiku GL renderer add-ons (essentially selfcontained C++
+ * shared libraries that do the actual rendering such as
+ * libswpipe.so and libswrast.so)
+ */
+#ifndef GLRENDERER_H
+#define GLRENDERER_H
+
+
+#include <BeBuild.h>
+#include <GLView.h>
+
+
+class BGLDispatcher;
+class GLRendererRoster;
+
+typedef unsigned long renderer_id;
+
+class BGLRenderer
+{
+							// Private unimplemented copy constructors
+							BGLRenderer(const BGLRenderer &);
+							BGLRenderer & operator=(const BGLRenderer &);
+	
+public:
+							BGLRenderer(BGLView *view, ulong bgl_options,
+								BGLDispatcher *dispatcher);
+	virtual					~BGLRenderer();
+
+	void 					Acquire();
+	void					Release();
+
+	virtual void			LockGL();
+	virtual void 			UnlockGL();
+	
+	virtual	void 			SwapBuffers(bool VSync = false);
+	virtual	void			Draw(BRect updateRect);
+	virtual status_t		CopyPixelsOut(BPoint source, BBitmap *dest);
+	virtual status_t    	CopyPixelsIn(BBitmap *source, BPoint dest);
+
+ 	virtual void			FrameResized(float width, float height);
+	
+	virtual void			DirectConnected(direct_buffer_info *info);
+	virtual void			EnableDirectMode(bool enabled);
+
+	inline	int32			ReferenceCount() const { return fRefCount; };
+	inline	ulong			Options() const { return fOptions; };
+	inline	BGLView*		GLView() { return fView; };
+	inline	BGLDispatcher*	GLDispatcher() { return fDispatcher; };
+
+private:
+	friend class GLRendererRoster;
+
+	virtual status_t		_Reserved_Renderer_0(int32, void *);
+	virtual status_t		_Reserved_Renderer_1(int32, void *);
+	virtual status_t		_Reserved_Renderer_2(int32, void *);
+	virtual status_t		_Reserved_Renderer_3(int32, void *);
+	virtual status_t		_Reserved_Renderer_4(int32, void *);
+
+	volatile int32			fRefCount;	// How much we're still usefull?
+	BGLView*				fView;		// Never forget who is the boss!
+	ulong					fOptions;	// Keep that tune in memory
+	BGLDispatcher*			fDispatcher;// Our personal GL API call dispatcher
+
+	GLRendererRoster*		fOwningRoster;
+	renderer_id				fID;
+};
+
+extern "C" _EXPORT BGLRenderer* instantiate_gl_renderer(BGLView *view, ulong options, BGLDispatcher *dispatcher);
+
+
+#endif	// GLRENDERER_H
--- a/include/HaikuGL/GLView.h
+++ b/include/HaikuGL/GLView.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2008-2013, Haiku, Inc. All Rights Reserved.
+ * Distributed under the terms of the MIT License.
+ *
+ * This header defines BGLView, the base class making up
+ * the Haiku GL Kit.
+ *
+ */
+#ifndef BGLVIEW_H
+#define BGLVIEW_H
+
+
+#include <GL/gl.h>
+
+#define BGL_RGB			0
+#define BGL_INDEX		1
+#define BGL_SINGLE		0
+#define BGL_DOUBLE		2
+#define BGL_DIRECT		0
+#define BGL_INDIRECT	4
+#define BGL_ACCUM		8
+#define BGL_ALPHA		16
+#define BGL_DEPTH		32
+#define BGL_OVERLAY		64
+#define BGL_UNDERLAY	128
+#define BGL_STENCIL		512
+
+#ifdef __cplusplus
+
+#include <AppKit.h>
+#include <Bitmap.h>
+#include <DirectWindow.h>
+#include <View.h>
+#include <Window.h>
+#include <WindowScreen.h>
+
+
+struct glview_direct_info;
+class BGLRenderer;
+class GLRendererRoster;
+
+class BGLView : public BView {
+public:
+							BGLView(BRect rect, const char* name,
+								ulong resizingMode, ulong mode,
+								ulong options);
+	virtual					~BGLView();
+
+			void			LockGL();
+			void			UnlockGL();
+			void			SwapBuffers();
+			void			SwapBuffers(bool vSync);
+
+			BView*			EmbeddedView(); // deprecated, returns NULL
+			void*			GetGLProcAddress(const char* procName);
+
+			status_t		CopyPixelsOut(BPoint source, BBitmap *dest);
+			status_t		CopyPixelsIn(BBitmap *source, BPoint dest);
+
+	// Mesa's GLenum is uint where Be's ones was ulong!
+	virtual	void			ErrorCallback(unsigned long errorCode);
+
+	virtual void			Draw(BRect updateRect);
+	virtual void			AttachedToWindow();
+	virtual void			AllAttached();
+	virtual void			DetachedFromWindow();
+	virtual void			AllDetached();
+
+	virtual void			FrameResized(float newWidth, float newHeight);
+	virtual status_t		Perform(perform_code d, void *arg);
+
+	virtual status_t		Archive(BMessage *data, bool deep = true) const;
+
+	virtual void			MessageReceived(BMessage *message);
+	virtual void			SetResizingMode(uint32 mode);
+
+	virtual void			Show();
+	virtual void			Hide();
+
+	virtual BHandler*		ResolveSpecifier(BMessage *msg, int32 index,
+								BMessage *specifier, int32 form,
+								const char *property);
+	virtual status_t		GetSupportedSuites(BMessage *data);
+
+			void			DirectConnected(direct_buffer_info *info);
+			void			EnableDirectMode(bool enabled);
+
+			void*			getGC()	{ return fGc; } // ???
+
+	virtual void			GetPreferredSize(float* width, float* height);
+
+private:
+
+	virtual void			_ReservedGLView1();
+	virtual void			_ReservedGLView2();
+	virtual void			_ReservedGLView3();
+	virtual void			_ReservedGLView4();
+	virtual void			_ReservedGLView5();
+	virtual void			_ReservedGLView6();
+	virtual void			_ReservedGLView7();
+	virtual void			_ReservedGLView8();
+
+							BGLView(const BGLView &);
+							BGLView &operator=(const BGLView &);
+
+			void			_DitherFront();
+			bool			_ConfirmDither();
+			void			_Draw(BRect rect);
+			void			_CallDirectConnected();
+
+			void*			fGc;
+			uint32			fOptions;
+			uint32			fDitherCount;
+			BLocker			fDrawLock;
+			BLocker			fDisplayLock;
+			glview_direct_info* fClipInfo;
+
+			BGLRenderer*	fRenderer;
+			GLRendererRoster* fRoster;
+
+			BBitmap*		fDitherMap;
+			BRect			fBounds;
+			int16*			fErrorBuffer[2];
+			uint64			_reserved[8];
+
+			void			_LockDraw();
+			void			_UnlockDraw();
+
+// BeOS compatibility
+private:
+							BGLView(BRect rect, char* name,
+								ulong resizingMode, ulong mode,
+								ulong options);
+};
+
+
+class BGLScreen : public BWindowScreen {
+public:
+							BGLScreen(char* name,
+								ulong screenMode, ulong options,
+								status_t *error, bool debug=false);
+							~BGLScreen();
+
+			void			LockGL();
+			void			UnlockGL();
+			void			SwapBuffers();
+							// Mesa's GLenum is uint where Be's ones was ulong!
+	virtual	void			ErrorCallback(unsigned long errorCode);
+
+	virtual void			ScreenConnected(bool connected);
+	virtual void			FrameResized(float width, float height);
+	virtual status_t		Perform(perform_code code, void *arg);
+
+	virtual status_t		Archive(BMessage *data, bool deep = true) const;
+	virtual void			MessageReceived(BMessage *message);
+
+	virtual void			Show();
+	virtual void			Hide();
+
+	virtual BHandler*		ResolveSpecifier(BMessage *message,
+								int32 index,
+								BMessage *specifier,
+								int32 form,
+								const char *property);
+	virtual status_t		GetSupportedSuites(BMessage *data);
+
+private:
+
+	virtual void			_ReservedGLScreen1();
+	virtual void			_ReservedGLScreen2();
+	virtual void			_ReservedGLScreen3();
+	virtual void			_ReservedGLScreen4();
+	virtual void			_ReservedGLScreen5();
+	virtual void			_ReservedGLScreen6();
+	virtual void			_ReservedGLScreen7();
+	virtual void			_ReservedGLScreen8();
+
+							BGLScreen(const BGLScreen &);
+							BGLScreen &operator=(const BGLScreen &);
+
+			void*			fGc;
+			long			fOptions;
+			BLocker			fDrawLock;
+
+			int32			fColorSpace;
+			uint32			fScreenMode;
+
+			uint64			_reserved[7];
+};
+
+#endif // __cplusplus
+
+#endif // BGLVIEW_H
--- a/include/HaikuGL/OpenGLKit.h
+++ b/include/HaikuGL/OpenGLKit.h
@@ -0,0 +1,10 @@
+/*
+ * Master include file for the Haiku OpenGL Kit.
+ */
+
+#include <GL/gl.h>
+#include <GLView.h>
+
+// Projects needing GL/glu.h and GL/glut.h should now
+// include these headers independently as glu and glut
+// are no longe core parts of mesa
--- a/include/HaikuGL/README
+++ b/include/HaikuGL/README
@@ -0,0 +1,28 @@
+These headers make up the Haiku Op*nGL kit.
+
+Headers in this directory preserve some BeOS™ compatibility
+compatibility, so changes should be mentioned to the Haiku
+project mailing list.
+
+http://haiku-os.org
+
+Normal Haiku Op*enGL layout:
+
+  * headers/os/OpenGLKit.h
+  * headers/os/opengl/GLView.h
+  * headers/os/opengl/GLRenderer.h
+  * headers/os/opengl/GL/gl.h
+  * headers/os/opengl/GL/gl_mangle.h
+  * headers/os/opengl/GL/glext.h
+  * headers/os/opengl/GL/osmesa.h (needed?)
+
+Extras:
+
+  * headers/os/opengl/GL/glu.h
+  * headers/os/opengl/GL/glut.h
+
+OpenGL™ is a trademark of SGI. The usage of this trademark
+in the Haiku GL Kit is not a sign of any certification or
+endorsement by SGI or its affiliates. Usage is purely to
+allow legacy compatibility with the BeOS™ and its 3D GL
+rendering subsystem.
--- a/include/pci_ids/r600_pci_ids.h
+++ b/include/pci_ids/r600_pci_ids.h
@@ -208,7 +208,7 @@ CHIPSET(0x9644, SUMO2_9644, SUMO2)
 CHIPSET(0x9645, SUMO2_9645, SUMO2)
 CHIPSET(0x9647, SUMO_9647,  SUMO)
 CHIPSET(0x9648, SUMO_9648,  SUMO)
-CHIPSET(0x9649, SUMO_9649,  SUMO)
+CHIPSET(0x9649, SUMO2_9649, SUMO2)
 CHIPSET(0x964a, SUMO_964A,  SUMO)
 CHIPSET(0x964b, SUMO_964B,  SUMO)
 CHIPSET(0x964c, SUMO_964C,  SUMO)
--- a/m4/ax_check_compile_flag.m4
+++ b/m4/ax_check_compile_flag.m4
@@ -0,0 +1,72 @@
+# ===========================================================================
+#   http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS])
+#
+# DESCRIPTION
+#
+#   Check whether the given FLAG works with the current language's compiler
+#   or gives an error.  (Warnings, however, are ignored)
+#
+#   ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
+#   success/failure.
+#
+#   If EXTRA-FLAGS is defined, it is added to the current language's default
+#   flags (e.g. CFLAGS) when the check is done.  The check is thus made with
+#   the flags: "CFLAGS EXTRA-FLAGS FLAG".  This can for example be used to
+#   force the compiler to issue an error when a bad flag is given.
+#
+#   NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
+#   macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
+#   Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 2
+
+AC_DEFUN([AX_CHECK_COMPILE_FLAG],
+[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX
+AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
+AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
+  ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
+  _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
+    [AS_VAR_SET(CACHEVAR,[yes])],
+    [AS_VAR_SET(CACHEVAR,[no])])
+  _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
+AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
+  [m4_default([$2], :)],
+  [m4_default([$3], :)])
+AS_VAR_POPDEF([CACHEVAR])dnl
+])dnl AX_CHECK_COMPILE_FLAGS
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -72,8 +72,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,

   const unsigned gs_out_prim = (gs ? gs->output_primitive :
                                 u_assembled_prim(prim));
-   unsigned nr = MAX2( vs->info.num_inputs,
-		       draw_total_vs_outputs(draw) );
+   unsigned nr_vs_outputs = draw_total_vs_outputs(draw);
+   unsigned nr = MAX2(vs->info.num_inputs, nr_vs_outputs);

   if (gs) {
      nr = MAX2(nr, gs->info.num_outputs + 1);
@@ -129,6 +129,9 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
   /* No need to prepare the shader.
    */
   vs->prepare(vs, draw);
+
+   /* Make sure that the vertex size didn't change at any point above */
+   assert(nr_vs_outputs == draw_total_vs_outputs(draw));
 }


--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -141,19 +141,11 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
   struct draw_geometry_shader *gs = draw->gs.geometry_shader;
   const unsigned out_prim = gs ? gs->output_primitive :
      u_assembled_prim(in_prim);
-   const unsigned nr = MAX2(vs->info.num_inputs,
-                            draw_total_vs_outputs(draw));
+   unsigned nr;

   fpme->input_prim = in_prim;
   fpme->opt = opt;

-   /* Always leave room for the vertex header whether we need it or
-    * not.  It's hard to get rid of it in particular because of the
-    * viewport code in draw_pt_post_vs.c.
-    */
-   fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
-
-
   draw_pt_post_vs_prepare( fpme->post_vs,
                            draw->clip_xy,
                            draw->clip_z,
@@ -177,6 +169,30 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
      *max_vertices = 4096;
   }

+   /* Get the number of float[4] attributes per vertex.
+    * Note: this must be done after draw_pt_emit_prepare() since that
+    * can effect the vertex size.
+    */
+   nr = MAX2(vs->info.num_inputs, draw_total_vs_outputs(draw));
+
+   /* Always leave room for the vertex header whether we need it or
+    * not.  It's hard to get rid of it in particular because of the
+    * viewport code in draw_pt_post_vs.c.
+    */
+   fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
+
+   /* Get the number of float[4] attributes per vertex.
+    * Note: this must be done after draw_pt_emit_prepare() since that
+    * can effect the vertex size.
+    */
+   nr = MAX2(vs->info.num_inputs, draw_total_vs_outputs(draw));
+
+   /* Always leave room for the vertex header whether we need it or
+    * not.  It's hard to get rid of it in particular because of the
+    * viewport code in draw_pt_post_vs.c.
+    */
+   fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
+
   /* return even number */
   *max_vertices = *max_vertices & ~1;

--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -95,8 +95,11 @@ pipe_loader_sw_create_screen(struct pipe_loader_device *dev,
      return NULL;

   init = (void *)util_dl_get_proc_address(sdev->lib, "swrast_create_screen");
-   if (!init)
+   if (!init){
+      util_dl_close(sdev->lib);
+      sdev->lib = NULL;
      return NULL;
+   }

   return init(sdev->ws);
 }
--- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
@@ -69,7 +69,7 @@ static struct mem_block *exec_heap = NULL;
 static unsigned char *exec_mem = NULL;


-static void
+static int
 init_heap(void)
 {
   if (!exec_heap)
@@ -79,6 +79,8 @@ init_heap(void)
      exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE, 
 					PROT_EXEC | PROT_READ | PROT_WRITE, 
 					MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+   return (exec_mem != MAP_FAILED);
 }


@@ -90,7 +92,8 @@ rtasm_exec_malloc(size_t size)

   pipe_mutex_lock(exec_mutex);

-   init_heap();
+   if (!init_heap())
+      goto bail;

   if (exec_heap) {
      size = (size + 31) & ~31;  /* next multiple of 32 bytes */
@@ -101,7 +104,8 @@ rtasm_exec_malloc(size_t size)
      addr = exec_mem + block->ofs;
   else 
      debug_printf("rtasm_exec_malloc failed\n");
-   
+
+bail:
   pipe_mutex_unlock(exec_mutex);
   
   return addr;
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -872,6 +872,9 @@ util_format_get_component_bits(enum pipe_format format,
 static INLINE enum pipe_format
 util_format_srgb(enum pipe_format format)
 {
+   if (util_format_is_srgb(format))
+      return format;
+
   switch (format) {
   case PIPE_FORMAT_L8_UNORM:
      return PIPE_FORMAT_L8_SRGB;
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -136,6 +136,12 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,

   /* set the new sampler views */
   for (i = 0; i < num; i++) {
+      /* Note: we're using pipe_sampler_view_release() here to work around
+       * a possible crash when the old view belongs to another context that
+       * was already destroyed.
+       */
+      pipe_sampler_view_release(pipe,
+                                &llvmpipe->sampler_views[shader][start + i]);
      pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i],
                                  views[i]);
   }
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -114,6 +114,8 @@ nv50_destroy(struct pipe_context *pipe)
   draw_destroy(nv50->draw);
 #endif

+   FREE(nv50->blit);
+
   nouveau_context_destroy(&nv50->base);
 }

@@ -163,7 +165,7 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
   }

   if (res->bind & PIPE_BIND_SAMPLER_VIEW) {
-      for (s = 0; s < 5; ++s) {
+      for (s = 0; s < 3; ++s) {
      for (i = 0; i < nv50->num_textures[s]; ++i) {
         if (nv50->textures[s][i] &&
             nv50->textures[s][i]->texture == res) {
@@ -177,7 +179,7 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
   }

   if (res->bind & PIPE_BIND_CONSTANT_BUFFER) {
-      for (s = 0; s < 5; ++s) {
+      for (s = 0; s < 3; ++s) {
      for (i = 0; i < nv50->num_vtxbufs; ++i) {
         if (!nv50->constbuf[s][i].user &&
             nv50->constbuf[s][i].u.buf == res) {
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -204,6 +204,8 @@ void *evergreen_create_compute_state(
 	const unsigned char * code;
 	unsigned i;

+	shader->llvm_ctx = LLVMContextCreate();
+
 	COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n");

 	header = cso->prog;
@@ -216,13 +218,14 @@ void *evergreen_create_compute_state(
 	shader->input_size = cso->req_input_mem;

 #ifdef HAVE_OPENCL 
-	shader->num_kernels = radeon_llvm_get_num_kernels(code, header->num_bytes);
+	shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, code,
+							header->num_bytes);
 	shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels);

 	for (i = 0; i < shader->num_kernels; i++) {
 		struct r600_kernel *kernel = &shader->kernels[i];
-		kernel->llvm_module = radeon_llvm_get_kernel_module(i, code,
-							header->num_bytes);
+		kernel->llvm_module = radeon_llvm_get_kernel_module(shader->llvm_ctx, i,
+							code, header->num_bytes);
 	}
 #endif
 	return shader;
@@ -232,7 +235,18 @@ void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
 {
 	struct r600_pipe_compute *shader = (struct r600_pipe_compute *)state;

-	free(shader);
+	if (!shader)
+		return;
+
+	FREE(shader->kernels);
+
+#ifdef HAVE_OPENCL
+	if (shader->llvm_ctx){
+		LLVMContextDispose(shader->llvm_ctx);
+	}
+#endif
+
+	FREE(shader);
 }

 static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state)
@@ -475,7 +489,14 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 	ctx->b.flags = 0;

 	if (ctx->b.chip_class >= CAYMAN) {
-		ctx->skip_surface_sync_on_next_cs_flush = true;
+		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4);
+		/* DEALLOC_STATE prevents the GPU from hanging when a
+		 * SURFACE_SYNC packet is emitted some time after a DISPATCH_DIRECT
+		 * with any of the CB*_DEST_BASE_ENA or DB_DEST_BASE_ENA bits set.
+		 */
+		cs->buf[cs->cdw++] = PKT3C(PKT3_DEALLOC_STATE, 0, 0);
+		cs->buf[cs->cdw++] = 0;
 	}

 #if 0
--- a/src/gallium/drivers/r600/evergreen_compute_internal.h
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.h
@@ -47,6 +47,10 @@ struct r600_pipe_compute {
 	unsigned private_size;
 	unsigned input_size;
 	struct r600_resource *kernel_param;
+
+#ifdef HAVE_OPENCL
+	LLVMContextRef llvm_ctx;
+#endif
 };

 struct r600_resource* r600_compute_buffer_alloc_vram(struct r600_screen *screen, unsigned size);
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -63,6 +63,7 @@
 #define R600_TEXEL_PITCH_ALIGNMENT_MASK        0x7

 #define PKT3_NOP                               0x10
+#define PKT3_DEALLOC_STATE                     0x14
 #define PKT3_DISPATCH_DIRECT                   0x15
 #define PKT3_DISPATCH_INDIRECT                 0x16
 #define PKT3_INDIRECT_BUFFER_END               0x17
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -293,7 +293,7 @@ void r600_flush_emit(struct r600_context *rctx)
 				S_0085F0_SMX_ACTION_ENA(1);
 	}

-	if (cp_coher_cntl && !rctx->skip_surface_sync_on_next_cs_flush) {
+	if (cp_coher_cntl) {
 		cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
 		cs->buf[cs->cdw++] = cp_coher_cntl;   /* CP_COHER_CNTL */
 		cs->buf[cs->cdw++] = 0xffffffff;      /* CP_COHER_SIZE */
@@ -354,8 +354,6 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)

 	/* Flush the CS. */
 	ctx->b.ws->cs_flush(ctx->b.rings.gfx.cs, flags, ctx->screen->cs_count++);
-
-	ctx->skip_surface_sync_on_next_cs_flush = false;
 }

 void r600_begin_new_cs(struct r600_context *ctx)
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -192,12 +192,7 @@ static void r600_destroy_context(struct pipe_context *context)

 	r600_release_command_buffer(&rctx->start_cs_cmd);

-	if (rctx->b.rings.gfx.cs) {
-		rctx->b.ws->cs_destroy(rctx->b.rings.gfx.cs);
-	}
-	if (rctx->b.rings.dma.cs) {
-		rctx->b.ws->cs_destroy(rctx->b.rings.dma.cs);
-	}
+	FREE(rctx->start_compute_cs_cmd.buf);

 	r600_common_context_cleanup(&rctx->b);
 	FREE(rctx);
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -507,16 +507,6 @@ struct r600_context {

 	void				*sb_context;
 	struct r600_isa		*isa;
-
-	/* Work-around for flushing problems with compute shaders on Cayman:
-	 * Emitting a SURFACE_SYNC packet with any of the CB*_DEST_BASE_ENA
-	 * or DB_DEST_BASE_ENA bits set after dispatching a compute shader
-	 * hangs the GPU.
-	 *
-	 * Setting this to true will prevent r600_flush_emit() from emitting
-	 * a SURFACE_SYNC packet.  This field will be cleared by
-	 * by r600_context_flush() after flushing the command stream. */
-	boolean				skip_surface_sync_on_next_cs_flush;
 };

 static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -770,7 +770,6 @@ void bc_finalizer::update_ngpr(unsigned gpr) {
 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
                                           unsigned &ifs, unsigned add) {
 	unsigned stack_elements = add;
-	bool has_non_wqm_push_with_loops_on_stack = false;
 	bool has_non_wqm_push = (add != 0);
 	region_node *r = n->is_region() ?
 			static_cast<region_node*>(n) : n->get_parent_region();
@@ -781,8 +780,6 @@ unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
 	while (r) {
 		if (r->is_loop()) {
 			++loops;
-			if (has_non_wqm_push)
-				has_non_wqm_push_with_loops_on_stack = true;
 		} else {
 			++ifs;
 			has_non_wqm_push = true;
@@ -795,15 +792,26 @@ unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
 	switch (ctx.hw_class) {
 	case HW_CLASS_R600:
 	case HW_CLASS_R700:
+		// If any non-WQM push is invoked, 2 elements should be reserved.
 		if (has_non_wqm_push)
 			stack_elements += 2;
 		break;
 	case HW_CLASS_CAYMAN:
+		// If any stack operation is invoked, 2 elements should be reserved
 		if (stack_elements)
 			stack_elements += 2;
 		break;
 	case HW_CLASS_EVERGREEN:
-		if (has_non_wqm_push_with_loops_on_stack)
+		// According to the docs we need to reserve 1 element for each of the
+		// following cases:
+		//   1) non-WQM push is used with WQM/LOOP frames on stack
+		//   2) ALU_ELSE_AFTER is used at the point of max stack usage
+		// NOTE:
+		// It was found that the conditions above are not sufficient, there are
+		// other cases where we also need to reserve stack space, that's why
+		// we always reserve 1 stack element if we have non-WQM push on stack.
+		// Condition 2 is ignored for now because we don't use this instruction.
+		if (has_non_wqm_push)
 			++stack_elements;
 		break;
 	}
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -234,6 +234,13 @@ bool r600_common_context_init(struct r600_common_context *rctx,

 void r600_common_context_cleanup(struct r600_common_context *rctx)
 {
+	if (rctx->rings.gfx.cs) {
+		rctx->ws->cs_destroy(rctx->rings.gfx.cs);
+	}
+	if (rctx->rings.dma.cs) {
+		rctx->ws->cs_destroy(rctx->rings.dma.cs);
+	}
+
 	if (rctx->allocator_so_filled_size) {
 		u_suballocator_destroy(rctx->allocator_so_filled_size);
 	}
--- a/src/gallium/drivers/radeon/radeon_llvm_util.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_util.c
@@ -33,11 +33,10 @@
 #include <llvm-c/Transforms/IPO.h>
 #include <llvm-c/Transforms/PassManagerBuilder.h>

-LLVMModuleRef radeon_llvm_parse_bitcode(const unsigned char * bitcode,
-							unsigned bitcode_len)
+LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
+							const unsigned char * bitcode, unsigned bitcode_len)
 {
 	LLVMMemoryBufferRef buf;
-	LLVMContextRef ctx = LLVMContextCreate();
 	LLVMModuleRef module;

 	buf = LLVMCreateMemoryBufferWithMemoryRangeCopy((const char*)bitcode,
@@ -47,10 +46,10 @@ LLVMModuleRef radeon_llvm_parse_bitcode(const unsigned char * bitcode,
 	return module;
 }

-unsigned radeon_llvm_get_num_kernels(const unsigned char *bitcode,
-				unsigned bitcode_len)
+unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
+				const unsigned char *bitcode, unsigned bitcode_len)
 {
-	LLVMModuleRef mod = radeon_llvm_parse_bitcode(bitcode, bitcode_len);
+	LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
 	return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
 }

@@ -85,9 +84,10 @@ static void radeon_llvm_optimize(LLVMModuleRef mod)
 	LLVMRunPassManager(pass_manager, mod);
 	LLVMPassManagerBuilderDispose(builder);
 	LLVMDisposePassManager(pass_manager);
+	LLVMDisposeTargetData(TD);
 }

-LLVMModuleRef radeon_llvm_get_kernel_module(unsigned index,
+LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
 		const unsigned char *bitcode, unsigned bitcode_len)
 {
 	LLVMModuleRef mod;
@@ -95,7 +95,7 @@ LLVMModuleRef radeon_llvm_get_kernel_module(unsigned index,
 	LLVMValueRef *kernel_metadata;
 	unsigned i;

-	mod = radeon_llvm_parse_bitcode(bitcode, bitcode_len);
+	mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
 	num_kernels = LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
 	kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef));
 	LLVMGetNamedMetadataOperands(mod, "opencl.kernels", kernel_metadata);
--- a/src/gallium/drivers/radeon/radeon_llvm_util.h
+++ b/src/gallium/drivers/radeon/radeon_llvm_util.h
@@ -29,10 +29,11 @@

 #include <llvm-c/Core.h>

-LLVMModuleRef radeon_llvm_parse_bitcode(const unsigned char * bitcode,
-							unsigned bitcode_len);
-unsigned radeon_llvm_get_num_kernels(const unsigned char *bitcode, unsigned bitcode_len);
-LLVMModuleRef radeon_llvm_get_kernel_module(unsigned index,
+LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
+			const unsigned char * bitcode, unsigned bitcode_len);
+unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
+			const unsigned char *bitcode, unsigned bitcode_len);
+LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
 			const unsigned char *bitcode, unsigned bitcode_len);

 #endif
--- a/src/gallium/drivers/radeonsi/radeonsi_compute.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_compute.c
@@ -20,6 +20,7 @@ struct si_pipe_compute {

        struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];

+	LLVMContextRef llvm_ctx;
 };

 static void *radeonsi_create_compute_state(
@@ -33,6 +34,8 @@ static void *radeonsi_create_compute_state(
 	const unsigned char *code;
 	unsigned i;

+	program->llvm_ctx = LLVMContextCreate();
+
 	header = cso->prog;
 	code = cso->prog + sizeof(struct pipe_llvm_program_header);

@@ -41,13 +44,13 @@ static void *radeonsi_create_compute_state(
 	program->private_size = cso->req_private_mem;
 	program->input_size = cso->req_input_mem;

-	program->num_kernels = radeon_llvm_get_num_kernels(code,
+	program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, code,
 							header->num_bytes);
 	program->kernels = CALLOC(sizeof(struct si_pipe_shader),
 							program->num_kernels);
 	for (i = 0; i < program->num_kernels; i++) {
-		LLVMModuleRef mod = radeon_llvm_get_kernel_module(i, code,
-							header->num_bytes);
+		LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
+							code, header->num_bytes);
 		si_compile_llvm(rctx, &program->kernels[i], mod);
 		LLVMDisposeModule(mod);
 	}
@@ -272,6 +275,10 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
 		FREE(program->kernels);
 	}

+	if (program->llvm_ctx){
+		LLVMContextDispose(program->llvm_ctx);
+	}
+
 	//And then free the program itself.
 	FREE(program);
 }
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -860,8 +860,8 @@ clamp_colors(float (*quadColor)[4])
 {
   unsigned i, j;

-   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-      for (i = 0; i < 4; i++) {
+   for (i = 0; i < 4; i++) {
+      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
         quadColor[i][j] = CLAMP(quadColor[i][j], 0.0F, 1.0F);
      }
   }
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -117,7 +117,6 @@ namespace {
           const std::string &opts, clang::LangAS::Map& address_spaces) {

      clang::CompilerInstance c;
-      clang::CompilerInvocation invocation;
      clang::EmitLLVMOnlyAction act(&llvm::getGlobalContext());
      std::string log;
      llvm::raw_string_ostream s_log(log);
--- a/src/gallium/state_trackers/dri/common/dri_screen.c
+++ b/src/gallium/state_trackers/dri/common/dri_screen.c
@@ -360,6 +360,12 @@ dri_destroy_option_cache(struct dri_screen * screen)
   }

   free(screen->optionCache.values);
+
+   /* Default values are copied to screen->optionCache->values in
+    * initOptionCache. The info field, however, is a pointer copy, so don't free
+    * that twice.
+    */
+   free(screen->optionCacheDefaults.values);
 }

 void
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -72,6 +72,11 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
      goto no_context;
   }

+   if (!pscreen->get_param(pscreen, PIPE_CAP_NPOT_TEXTURES)) {
+      ret = VDP_STATUS_NO_IMPLEMENTATION;
+      goto no_context;
+   }
+
   *device = vlAddDataHTAB(dev);
   if (*device == 0) {
      ret = VDP_STATUS_ERROR;
@@ -86,6 +91,7 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
   return VDP_STATUS_OK;

 no_handle:
+   dev->context->destroy(dev->context);
   /* Destroy vscreen */
 no_context:
   vl_screen_destroy(dev->vscreen);
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -1651,7 +1651,7 @@ ast_function_expression::hir(exec_list *instructions,
   } else {
      const ast_expression *id = subexpressions[0];
      const char *func_name = id->primary_expression.identifier;
-      YYLTYPE loc = id->get_location();
+      YYLTYPE loc = get_location();
      exec_list actual_parameters;

      process_parameters(instructions, &actual_parameters, &this->expressions,
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -734,14 +734,15 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
 {
   void *ctx = state;
   bool error_emitted = (lhs->type->is_error() || rhs->type->is_error());
+   ir_rvalue *extract_channel = NULL;

   /* If the assignment LHS comes back as an ir_binop_vector_extract
    * expression, move it to the RHS as an ir_triop_vector_insert.
    */
   if (lhs->ir_type == ir_type_expression) {
-      ir_expression *const expr = lhs->as_expression();
+      ir_expression *const lhs_expr = lhs->as_expression();

-      if (unlikely(expr->operation == ir_binop_vector_extract)) {
+      if (unlikely(lhs_expr->operation == ir_binop_vector_extract)) {
         ir_rvalue *new_rhs =
            validate_assignment(state, lhs_loc, lhs->type,
                                rhs, is_initializer);
@@ -749,12 +750,24 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
         if (new_rhs == NULL) {
            return lhs;
         } else {
+            /* This converts:
+             * - LHS: (expression float vector_extract <vec> <channel>)
+             * - RHS: <scalar>
+             * into:
+             * - LHS: <vec>
+             * - RHS: (expression vec2 vector_insert <vec> <channel> <scalar>)
+             *
+             * The LHS type is now a vector instead of a scalar.  Since GLSL
+             * allows assignments to be used as rvalues, we need to re-extract
+             * the channel from assignment_temp when returning the rvalue.
+             */
+            extract_channel = lhs_expr->operands[1];
            rhs = new(ctx) ir_expression(ir_triop_vector_insert,
-                                         expr->operands[0]->type,
-                                         expr->operands[0],
+                                         lhs_expr->operands[0]->type,
+                                         lhs_expr->operands[0],
                                         new_rhs,
-                                         expr->operands[1]);
-            lhs = expr->operands[0]->clone(ctx, NULL);
+                                         extract_channel);
+            lhs = lhs_expr->operands[0]->clone(ctx, NULL);
         }
      }
   }
@@ -848,6 +861,11 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
   if (!error_emitted)
      instructions->push_tail(new(ctx) ir_assignment(lhs, deref_var));

+   if (extract_channel) {
+      return new(ctx) ir_expression(ir_binop_vector_extract,
+                                    new(ctx) ir_dereference_variable(var),
+                                    extract_channel->clone(ctx, NULL));
+   }
   return new(ctx) ir_dereference_variable(var);
 }

--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -3864,7 +3864,7 @@ builtin_builder::_fma(const glsl_type *type)
   ir_variable *c = in_var(type, "c");
   MAKE_SIG(type, gpu_shader5, 3, a, b, c);

-   body.emit(ret(fma(a, b, c)));
+   body.emit(ret(ir_builder::fma(a, b, c)));

   return sig;
 }
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -310,6 +310,11 @@ control_line:
 			_glcpp_parser_expand_and_lex_from (parser,
 							   ELIF_EXPANDED, $2);
 		}
+		else if (parser->skip_stack &&
+		    parser->skip_stack->has_else)
+		{
+			glcpp_error(& @1, parser, "#elif after #else");
+		}
 		else
 		{
 			_glcpp_parser_skip_stack_change_if (parser, & @1,
@@ -324,6 +329,11 @@ control_line:
 		{
 			glcpp_error(& @1, parser, "#elif with no expression");
 		}
+		else if (parser->skip_stack &&
+		    parser->skip_stack->has_else)
+		{
+			glcpp_error(& @1, parser, "#elif after #else");
+		}
 		else
 		{
 			_glcpp_parser_skip_stack_change_if (parser, & @1,
@@ -332,7 +342,17 @@ control_line:
 		}
 	}
 |	HASH_ELSE {
-		_glcpp_parser_skip_stack_change_if (parser, & @1, "else", 1);
+		if (parser->skip_stack &&
+		    parser->skip_stack->has_else)
+		{
+			glcpp_error(& @1, parser, "multiple #else");
+		}
+		else
+		{
+			_glcpp_parser_skip_stack_change_if (parser, & @1, "else", 1);
+			if (parser->skip_stack)
+				parser->skip_stack->has_else = true;
+		}
 	} NEWLINE
 |	HASH_ENDIF {
 		_glcpp_parser_skip_stack_pop (parser, & @1);
@@ -1252,9 +1272,6 @@ glcpp_parser_create (const struct gl_extensions *extensions, int api)
 	      if (extensions->ARB_sample_shading)
 	         add_builtin_define(parser, "GL_ARB_sample_shading", 1);

-	      if (extensions->EXT_shader_integer_mix)
-	         add_builtin_define(parser, "GL_EXT_shader_integer_mix", 1);
-
 	      if (extensions->ARB_texture_gather)
 	         add_builtin_define(parser, "GL_ARB_texture_gather", 1);

@@ -1263,6 +1280,11 @@ glcpp_parser_create (const struct gl_extensions *extensions, int api)
 	   }
 	}

+	if (extensions != NULL) {
+	   if (extensions->EXT_shader_integer_mix)
+	      add_builtin_define(parser, "GL_EXT_shader_integer_mix", 1);
+	}
+
 	language_version = 110;
 	add_builtin_define(parser, "__VERSION__", language_version);

@@ -2024,6 +2046,7 @@ _glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, YYLTYPE *loc,
 		node->type = SKIP_TO_ENDIF;
 	}

+	node->has_else = false;
 	node->next = parser->skip_stack;
 	parser->skip_stack = node;
 }
--- a/src/glsl/glcpp/glcpp.h
+++ b/src/glsl/glcpp/glcpp.h
@@ -153,6 +153,7 @@ typedef enum skip_type {

 typedef struct skip_node {
 	skip_type_t type;
+	bool has_else;
 	YYLTYPE loc; /* location of the initial #if/#elif/... */
 	struct skip_node *next;
 } skip_node_t;
--- a/src/glsl/glcpp/tests/118-multiple-else.c
+++ b/src/glsl/glcpp/tests/118-multiple-else.c
@@ -0,0 +1,6 @@
+#if 0
+#else
+int foo;
+#else
+int bar;
+#endif
--- a/src/glsl/glcpp/tests/118-multiple-else.c.expected
+++ b/src/glsl/glcpp/tests/118-multiple-else.c.expected
@@ -0,0 +1,8 @@
+0:4(1): preprocessor error: multiple #else
+
+
+int foo;
+
+int bar;
+
+
--- a/src/glsl/glcpp/tests/119-elif-after-else.c
+++ b/src/glsl/glcpp/tests/119-elif-after-else.c
@@ -0,0 +1,6 @@
+#if 0
+#else
+int foo;
+#elif 0
+int bar;
+#endif
--- a/src/glsl/glcpp/tests/119-elif-after-else.c.expected
+++ b/src/glsl/glcpp/tests/119-elif-after-else.c.expected
@@ -0,0 +1,8 @@
+0:4(1): preprocessor error: #elif after #else
+
+
+int foo;
+
+int bar;
+
+
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -292,6 +292,10 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version,
      }
   }

+   if (this->es_shader) {
+      this->ARB_texture_rectangle_enable = false;
+   }
+
   this->language_version = version;

   bool supported = false;
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -163,7 +163,8 @@ ir_loop::clone(void *mem_ctx, struct hash_table *ht) const
      new_loop->to = this->to->clone(mem_ctx, ht);
   if (this->increment)
      new_loop->increment = this->increment->clone(mem_ctx, ht);
-   new_loop->counter = counter;
+   if (this->counter)
+      new_loop->counter = this->counter->clone(mem_ctx, ht);

   foreach_iter(exec_list_iterator, iter, this->body_instructions) {
      ir_instruction *ir = (ir_instruction *)iter.get();
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -1402,7 +1402,7 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
         data.f[c] = ldexp(op[0]->value.f[c], op[1]->value.i[c]);
         /* Flush subnormal values to zero. */
         if (!isnormal(data.f[c]))
-            data.f[c] = copysign(0.0, op[0]->value.f[c]);
+            data.f[c] = copysign(0.0f, op[0]->value.f[c]);
      }
      break;

--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -87,6 +87,12 @@ ir_loop::accept(ir_hierarchical_visitor *v)
   if (s != visit_continue)
      return (s == visit_continue_with_parent) ? visit_continue : s;

+   if (this->counter) {
+      s = this->counter->accept(v);
+      if (s != visit_continue)
+         return (s == visit_continue_with_parent) ? visit_continue : s;
+   }
+
   s = visit_list_elements(v, &this->body_instructions);
   if (s == visit_stop)
      return s;
--- a/src/glsl/ir_variable_refcount.cpp
+++ b/src/glsl/ir_variable_refcount.cpp
@@ -132,3 +132,24 @@ ir_variable_refcount_visitor::visit_leave(ir_assignment *ir)

   return visit_continue;
 }
+
+
+ir_visitor_status
+ir_variable_refcount_visitor::visit_leave(ir_loop *ir)
+{
+   /* If the loop has a counter variable, it is implicitly referenced and
+    * assigned to.  Note that since the LHS of an assignment is counted as a
+    * reference, we actually have to increment referenced_count by 2 so that
+    * later code will know that the variable isn't just assigned to.
+    */
+   if (ir->counter != NULL) {
+      ir_variable_refcount_entry *entry =
+         this->get_variable_entry(ir->counter);
+      if (entry) {
+         entry->referenced_count += 2;
+         entry->assigned_count++;
+      }
+   }
+
+   return visit_continue;
+}
--- a/src/glsl/ir_variable_refcount.h
+++ b/src/glsl/ir_variable_refcount.h
@@ -60,6 +60,7 @@ public:

   virtual ir_visitor_status visit_enter(ir_function_signature *);
   virtual ir_visitor_status visit_leave(ir_assignment *);
+   virtual ir_visitor_status visit_leave(ir_loop *);

   ir_variable_refcount_entry *get_variable_entry(ir_variable *var);

--- a/src/glsl/loop_controls.cpp
+++ b/src/glsl/loop_controls.cpp
@@ -254,7 +254,7 @@ loop_control_visitor::visit_leave(ir_loop *ir)
 		     ir->from = init->clone(ir, NULL);
 		     ir->to = limit->clone(ir, NULL);
 		     ir->increment = lv->increment->clone(ir, NULL);
-		     ir->counter = lv->var;
+		     ir->counter = lv->var->clone(ir, NULL);
 		     ir->cmp = cmp;

 		     max_iterations = iterations;
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -95,6 +95,12 @@ AM_CPPFLAGS = $(DEFINES) $(INCLUDE_DIRS)
 AM_CFLAGS = $(LLVM_CFLAGS) $(VISIBILITY_CFLAGS)
 AM_CXXFLAGS = $(LLVM_CFLAGS) $(VISIBILITY_CXXFLAGS)

+ARCH_LIBS =
+
+if SSE41_SUPPORTED
+ARCH_LIBS += libmesa_sse41.la
+endif
+
 MESA_ASM_FILES_FOR_ARCH =

 if HAVE_X86_ASM
@@ -103,8 +109,6 @@ noinst_PROGRAMS = gen_matypes
 gen_matypes_SOURCES = x86/gen_matypes.c
 BUILT_SOURCES += matypes.h

-ARCH_LIBS = libmesa_sse41.la
-
 if HAVE_X86_64_ASM
 MESA_ASM_FILES_FOR_ARCH += $(X86_64_FILES)
 AM_CPPFLAGS += -I$(builddir)/x86-64 -I$(srcdir)/x86-64
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -438,16 +438,19 @@ driCreateContextAttribs(__DRIscreen *screen, int api,
        return NULL;
    }

-    struct gl_context *ctx = context->driverPrivate;
+    *error = __DRI_CTX_ERROR_SUCCESS;
+    return context;
+}
+
+void
+driContextSetFlags(struct gl_context *ctx, uint32_t flags)
+{
    if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0)
        ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;
    if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
        ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT;
        ctx->Debug.DebugOutput = GL_TRUE;
    }
-
-    *error = __DRI_CTX_ERROR_SUCCESS;
-    return context;
 }

 static __DRIcontext *
--- a/src/mesa/drivers/dri/common/dri_util.h
+++ b/src/mesa/drivers/dri/common/dri_util.h
@@ -289,6 +289,9 @@ dri2InvalidateDrawable(__DRIdrawable *drawable);
 extern void
 driUpdateFramebufferSize(struct gl_context *ctx, const __DRIdrawable *dPriv);

+extern void
+driContextSetFlags(struct gl_context *ctx, uint32_t flags);
+
 extern const __DRIimageDriverExtension driImageDriverExtension;

 #endif /* _DRI_UTIL_H_ */
--- a/src/mesa/drivers/dri/i915/i830_context.c
+++ b/src/mesa/drivers/dri/i915/i830_context.c
@@ -56,6 +56,7 @@ i830CreateContext(int api,
                  __DRIcontext * driContextPriv,
                  unsigned major_version,
                  unsigned minor_version,
+                  uint32_t flags,
                  unsigned *error,
                  void *sharedContextPrivate)
 {
@@ -73,7 +74,7 @@ i830CreateContext(int api,
   i830InitDriverFunctions(&functions);

   if (!intelInitContext(intel, __DRI_API_OPENGL,
-                         major_version, minor_version,
+                         major_version, minor_version, flags,
                         mesaVis, driContextPriv,
                         sharedContextPrivate, &functions,
                         error)) {
--- a/src/mesa/drivers/dri/i915/i830_context.h
+++ b/src/mesa/drivers/dri/i915/i830_context.h
@@ -183,6 +183,7 @@ i830CreateContext(int api,
                  __DRIcontext * driContextPriv,
                  unsigned major_version,
                  unsigned minor_version,
+                  uint32_t flags,
                  unsigned *error,
                  void *sharedContextPrivate);

--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -151,6 +151,7 @@ i915CreateContext(int api,
                  __DRIcontext * driContextPriv,
                  unsigned major_version,
                  unsigned minor_version,
+                  uint32_t flags,
                  unsigned *error,
                  void *sharedContextPrivate)
 {
@@ -168,7 +169,7 @@ i915CreateContext(int api,

   i915InitDriverFunctions(&functions);

-   if (!intelInitContext(intel, api, major_version, minor_version,
+   if (!intelInitContext(intel, api, major_version, minor_version, flags,
                         mesaVis, driContextPriv,
                         sharedContextPrivate, &functions,
                         error)) {
--- a/src/mesa/drivers/dri/i915/i915_context.h
+++ b/src/mesa/drivers/dri/i915/i915_context.h
@@ -324,6 +324,7 @@ extern bool i915CreateContext(int api,
 			      __DRIcontext * driContextPriv,
                              unsigned major_version,
                              unsigned minor_version,
+                              uint32_t flags,
                              unsigned *error,
 			      void *sharedContextPrivate);

--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -146,6 +146,7 @@ src_vector(struct i915_fragment_program *p,
   case PROGRAM_OUTPUT:
      switch (source->Index) {
      case FRAG_RESULT_COLOR:
+      case FRAG_RESULT_DATA0:
 	 src = UREG(REG_TYPE_OC, 0);
 	 break;
      case FRAG_RESULT_DEPTH:
--- a/src/mesa/drivers/dri/i915/intel_context.c
+++ b/src/mesa/drivers/dri/i915/intel_context.c
@@ -409,6 +409,7 @@ intelInitContext(struct intel_context *intel,
                 int api,
                 unsigned major_version,
                 unsigned minor_version,
+                 uint32_t flags,
                 const struct gl_config * mesaVis,
                 __DRIcontext * driContextPriv,
                 void *sharedContextPrivate,
--- a/src/mesa/drivers/dri/i915/intel_context.h
+++ b/src/mesa/drivers/dri/i915/intel_context.h
@@ -401,6 +401,7 @@ extern bool intelInitContext(struct intel_context *intel,
                             int api,
                             unsigned major_version,
                             unsigned minor_version,
+                             uint32_t flags,
                             const struct gl_config * mesaVis,
                             __DRIcontext * driContextPriv,
                             void *sharedContextPrivate,
--- a/src/mesa/drivers/dri/i915/intel_screen.c
+++ b/src/mesa/drivers/dri/i915/intel_screen.c
@@ -930,6 +930,7 @@ i830CreateContext(int api,
 		  __DRIcontext *driContextPriv,
 		  unsigned major_version,
 		  unsigned minor_version,
+                  uint32_t flags,
 		  unsigned *error,
 		  void *sharedContextPrivate);

@@ -939,6 +940,7 @@ i915CreateContext(int api,
 		  __DRIcontext *driContextPriv,
                  unsigned major_version,
                  unsigned minor_version,
+                  uint32_t flags,
                  unsigned *error,
 		  void *sharedContextPrivate);

@@ -970,13 +972,13 @@ intelCreateContext(gl_api api,

   if (IS_9XX(intelScreen->deviceID)) {
      success = i915CreateContext(api, mesaVis, driContextPriv,
-                                  major_version, minor_version, error,
-                                  sharedContextPrivate);
+                                  major_version, minor_version, flags,
+                                  error, sharedContextPrivate);
   } else {
      intelScreen->no_vbo = true;
      success = i830CreateContext(api, mesaVis, driContextPriv,
-                                  major_version, minor_version, error,
-                                  sharedContextPrivate);
+                                  major_version, minor_version, flags,
+                                  error, sharedContextPrivate);
   }

   if (success)
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -266,7 +266,6 @@ retry:
    */
   brw->state.dirty.brw = ~0;
   brw->state.dirty.cache = ~0;
-   brw->batch.need_workaround_flush = true;
   brw->ib.type = -1;
   intel_batchbuffer_clear_cache(brw);

--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -662,6 +662,8 @@ brwCreateContext(gl_api api,
      return false;
   }

+   driContextSetFlags(ctx, flags);
+
   /* Initialize the software rasterizer and helper modules.
    *
    * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -445,6 +445,12 @@ retry:
 	    }
 	 }
      }
+
+      /* Now that we know we haven't run out of aperture space, we can safely
+       * reset the dirty bits.
+       */
+      if (brw->state.dirty.brw)
+         brw_clear_dirty_bits(brw);
   }

   if (brw->always_flush_batch)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1008,7 +1008,7 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
 fs_inst *
 fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
                         glsl_interp_qualifier interpolation_mode,
-                         bool is_centroid)
+                         bool is_centroid, bool is_sample)
 {
   brw_wm_barycentric_interp_mode barycoord_mode;
   if (brw->gen >= 6) {
@@ -1017,6 +1017,11 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
            barycoord_mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
         else
            barycoord_mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
+      } else if (is_sample) {
+          if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
+            barycoord_mode = BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC;
+         else
+            barycoord_mode = BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC;
      } else {
         if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
            barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
@@ -1094,7 +1099,9 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
 		*/
               struct brw_reg interp = interp_reg(location, k);
               emit_linterp(attr, fs_reg(interp), interpolation_mode,
-                            ir->centroid);
+                            ir->centroid && !c->key.persample_shading,
+                            c->key.persample_shading);
+
               if (brw->needs_unlit_centroid_workaround && ir->centroid) {
                  /* Get the pixel/sample mask into f0 so that we know
                   * which pixels are lit.  Then, for each channel that is
@@ -1103,7 +1110,8 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
                   */
                  emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
                  fs_inst *inst = emit_linterp(attr, fs_reg(interp),
-                                               interpolation_mode, false);
+                                               interpolation_mode,
+                                               false, false);
                  inst->predicate = BRW_PREDICATE_NORMAL;
                  inst->predicate_inverse = true;
               }
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -338,7 +338,7 @@ public:
   fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
   fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp,
                         glsl_interp_qualifier interpolation_mode,
-                         bool is_centroid);
+                         bool is_centroid, bool is_sample);
   fs_reg *emit_frontfacing_interpolation(ir_variable *ir);
   fs_reg *emit_samplepos_setup(ir_variable *ir);
   fs_reg *emit_sampleid_setup(ir_variable *ir);
--- a/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp
+++ b/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp
@@ -55,7 +55,7 @@ brw_lower_unnormalized_offset_visitor::visit_leave(ir_texture *ir)
   if (!ir->offset)
      return visit_continue;

-   if (ir->op == ir_tg4) {
+   if (ir->op == ir_tg4 || ir->op == ir_tex) {
      if (ir->sampler->type->sampler_dimensionality != GLSL_SAMPLER_DIM_RECT)
         return visit_continue;
   }
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -143,6 +143,7 @@ brw_depthbuffer_format(struct brw_context *brw);
 * brw_state.c
 */
 void brw_upload_state(struct brw_context *brw);
+void brw_clear_dirty_bits(struct brw_context *brw);
 void brw_init_state(struct brw_context *brw);
 void brw_destroy_state(struct brw_context *brw);

--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -573,6 +573,20 @@ void brw_upload_state(struct brw_context *brw)
 	 fprintf(stderr, "\n");
      }
   }
+}

+
+/**
+ * Clear dirty bits to account for the fact that the state emitted by
+ * brw_upload_state() has been committed to the hardware.  This is a separate
+ * call from brw_upload_state() because it's possible that after the call to
+ * brw_upload_state(), we will discover that we've run out of aperture space,
+ * and need to rewind the batch buffer to the state it had before the
+ * brw_upload_state() call.
+ */
+void
+brw_clear_dirty_bits(struct brw_context *brw)
+{
+   struct brw_state_flags *state = &brw->state.dirty;
   memset(state, 0, sizeof(*state));
 }
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -48,6 +48,7 @@
 static unsigned
 brw_compute_barycentric_interp_modes(struct brw_context *brw,
                                     bool shade_model_flat,
+                                     bool persample_shading,
                                     const struct gl_fragment_program *fprog)
 {
   unsigned barycentric_interp_modes = 0;
@@ -60,7 +61,9 @@ brw_compute_barycentric_interp_modes(struct brw_context *brw,
   for (attr = 0; attr < VARYING_SLOT_MAX; ++attr) {
      enum glsl_interp_qualifier interp_qualifier =
         fprog->InterpQualifier[attr];
-      bool is_centroid = fprog->IsCentroid & BITFIELD64_BIT(attr);
+      bool is_centroid = (fprog->IsCentroid & BITFIELD64_BIT(attr)) &&
+         !persample_shading;
+      bool is_sample = persample_shading;
      bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == VARYING_SLOT_COL1;

      /* Ignore unused inputs. */
@@ -81,8 +84,12 @@ brw_compute_barycentric_interp_modes(struct brw_context *brw,
         if (is_centroid) {
            barycentric_interp_modes |=
               1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
+         } else if (is_sample) {
+            barycentric_interp_modes |=
+               1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC;
         }
-         if (!is_centroid || brw->needs_unlit_centroid_workaround) {
+         if ((!is_centroid && !is_sample) ||
+             brw->needs_unlit_centroid_workaround) {
            barycentric_interp_modes |=
               1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
         }
@@ -92,8 +99,12 @@ brw_compute_barycentric_interp_modes(struct brw_context *brw,
         if (is_centroid) {
            barycentric_interp_modes |=
               1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
+         } else if (is_sample) {
+            barycentric_interp_modes |=
+               1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC;
         }
-         if (!is_centroid || brw->needs_unlit_centroid_workaround) {
+         if ((!is_centroid && !is_sample) ||
+             brw->needs_unlit_centroid_workaround) {
            barycentric_interp_modes |=
               1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
         }
@@ -170,6 +181,7 @@ bool do_wm_prog(struct brw_context *brw,

   c->prog_data.barycentric_interp_modes =
      brw_compute_barycentric_interp_modes(brw, c->key.flat_shade,
+                                           c->key.persample_shading,
                                           &fp->program);

   program = brw_wm_fs_emit(brw, c, &fp->program, prog, &program_size);
@@ -490,6 +502,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
      (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled);

   /* _NEW_BUFFERS _NEW_MULTISAMPLE */
+   key->persample_shading =
+      _mesa_get_min_invocations_per_fragment(ctx, &fp->program) > 1;
+
   key->compute_pos_offset =
      _mesa_get_min_invocations_per_fragment(ctx, &fp->program) > 1 &&
      fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS;
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -61,6 +61,7 @@ struct brw_wm_prog_key {
   uint8_t iz_lookup;
   GLuint stats_wm:1;
   GLuint flat_shade:1;
+   GLuint persample_shading:1;
   GLuint nr_color_regions:5;
   GLuint replicate_alpha:1;
   GLuint render_to_fbo:1;
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -1010,8 +1010,10 @@ gen6_blorp_emit_primitive(struct brw_context *brw,
   OUT_BATCH(0);
   OUT_BATCH(0);
   ADVANCE_BATCH();
-}

+   /* Only used on Sandybridge; harmless to set elsewhere. */
+   brw->batch.need_workaround_flush = true;
+}

 /**
 * \brief Execute a blit or render pass operation.
@@ -1034,6 +1036,10 @@ gen6_blorp_exec(struct brw_context *brw,
   uint32_t wm_bind_bo_offset = 0;

   uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
+
+   /* Emit workaround flushes when we switch from drawing to blorping. */
+   brw->batch.need_workaround_flush = true;
+
   gen6_emit_3dstate_multisample(brw, params->num_samples);
   gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u);
   gen6_blorp_emit_state_base_address(brw, params);
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -112,7 +112,7 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
   offset += gs_size;

   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
-   OUT_BATCH(offset | fs_size << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
+   OUT_BATCH(fs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
   ADVANCE_BATCH();

   /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -357,7 +357,8 @@ intel_bufferobj_map_range(struct gl_context * ctx,
    * BO, and we'll copy what they put in there out at unmap or
    * FlushRange time.
    */
-   if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
+   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
+       (access & GL_MAP_INVALIDATE_RANGE_BIT) &&
       drm_intel_bo_busy(intel_obj->buffer)) {
      if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
 	 intel_obj->range_map_buffer = malloc(length);
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -106,13 +106,15 @@ do_blit_readpixels(struct gl_context * ctx,
   /* Mesa flips the dst_stride for pack->Invert, but we want our mt to have a
    * normal dst_stride.
    */
+   struct gl_pixelstore_attrib uninverted_pack = *pack;
   if (pack->Invert) {
      dst_stride = -dst_stride;
      dst_flip = true;
+      uninverted_pack.Invert = false;
   }

   dst_offset = (GLintptr)pixels;
-   dst_offset += _mesa_image_offset(2, pack, width, height,
+   dst_offset += _mesa_image_offset(2, &uninverted_pack, width, height,
 				    format, type, 0, 0, 0);

   if (!_mesa_clip_copytexsubimage(ctx,
--- a/src/mesa/drivers/dri/nouveau/nouveau_context.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c
@@ -78,6 +78,8 @@ nouveau_context_create(gl_api api,
 		return GL_FALSE;
 	}

+	driContextSetFlags(ctx, flags);
+
 	nctx = to_nouveau_context(ctx);
 	nctx->dri_context = dri_ctx;
 	dri_ctx->driverPrivate = ctx;
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -271,7 +271,7 @@ GLboolean r200CreateContext( gl_api api,
   r200InitShaderFuncs(&functions);
   radeonInitQueryObjFunctions(&functions);

-   if (!radeonInitContext(&rmesa->radeon, &functions,
+   if (!radeonInitContext(&rmesa->radeon, api, &functions,
 			  glVisual, driContextPriv,
 			  sharedContextPrivate)) {
     free(rmesa);
@@ -279,6 +279,8 @@ GLboolean r200CreateContext( gl_api api,
     return GL_FALSE;
   }

+   driContextSetFlags(ctx, flags);
+
   rmesa->radeon.swtcl.RenderIndex = ~0;
   rmesa->radeon.hw.all_dirty = 1;

--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -126,6 +126,7 @@ static void radeonInitDriverFuncs(struct dd_function_table *functions)
 * including the Mesa context itself.
 */
 GLboolean radeonInitContext(radeonContextPtr radeon,
+                            gl_api api,
 			    struct dd_function_table* functions,
 			    const struct gl_config * glVisual,
 			    __DRIcontext * driContextPriv,
@@ -147,7 +148,7 @@ GLboolean radeonInitContext(radeonContextPtr radeon,
 	else
 		shareCtx = NULL;

-	if (!_mesa_initialize_context(&radeon->glCtx, API_OPENGL_COMPAT,
+	if (!_mesa_initialize_context(&radeon->glCtx, api,
 				      glVisual, shareCtx,
 				      functions))
 		return GL_FALSE;
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -515,6 +515,7 @@ static inline __DRIdrawable* radeon_get_readable(radeonContextPtr radeon)
 }

 GLboolean radeonInitContext(radeonContextPtr radeon,
+                            gl_api api,
 			    struct dd_function_table* functions,
 			    const struct gl_config * glVisual,
 			    __DRIcontext * driContextPriv,
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -234,7 +234,7 @@ r100CreateContext( gl_api api,
   radeonInitTextureFuncs( &rmesa->radeon, &functions );
   radeonInitQueryObjFunctions(&functions);

-   if (!radeonInitContext(&rmesa->radeon, &functions,
+   if (!radeonInitContext(&rmesa->radeon, api, &functions,
 			  glVisual, driContextPriv,
 			  sharedContextPrivate)) {
     free(rmesa);
@@ -242,6 +242,8 @@ r100CreateContext( gl_api api,
     return GL_FALSE;
   }

+   driContextSetFlags(ctx, flags);
+
   rmesa->radeon.swtcl.RenderIndex = ~0;
   rmesa->radeon.hw.all_dirty = GL_TRUE;

--- a/src/mesa/drivers/dri/swrast/swrast.c
+++ b/src/mesa/drivers/dri/swrast/swrast.c
@@ -705,6 +705,8 @@ dri_create_context(gl_api api,
 	goto context_fail;
    }

+    driContextSetFlags(ctx, flags);
+
    /* do bounds checking to prevent segfaults and server crashes! */
    mesaCtx->Const.CheckArrayBounds = GL_TRUE;

--- a/src/mesa/main/clear.c
+++ b/src/mesa/main/clear.c
@@ -219,7 +219,25 @@ make_color_buffer_mask(struct gl_context *ctx, GLint drawbuffer)
   const struct gl_renderbuffer_attachment *att = ctx->DrawBuffer->Attachment;
   GLbitfield mask = 0x0;

-   switch (drawbuffer) {
+   /* From the GL 4.0 specification:
+    *	If buffer is COLOR, a particular draw buffer DRAW_BUFFERi is
+    *	specified by passing i as the parameter drawbuffer, and value
+    *	points to a four-element vector specifying the R, G, B, and A
+    *	color to clear that draw buffer to. If the draw buffer is one
+    *	of FRONT, BACK, LEFT, RIGHT, or FRONT_AND_BACK, identifying
+    *	multiple buffers, each selected buffer is cleared to the same
+    *	value.
+    *
+    * Note that "drawbuffer" and "draw buffer" have different meaning.
+    * "drawbuffer" specifies DRAW_BUFFERi, while "draw buffer" is what's
+    * assigned to DRAW_BUFFERi. It could be COLOR_ATTACHMENT0, FRONT, BACK,
+    * etc.
+    */
+   if (drawbuffer < 0 || drawbuffer >= (GLint)ctx->Const.MaxDrawBuffers) {
+      return INVALID_MASK;
+   }
+
+   switch (ctx->DrawBuffer->ColorDrawBuffer[drawbuffer]) {
   case GL_FRONT:
      if (att[BUFFER_FRONT_LEFT].Renderbuffer)
         mask |= BUFFER_BIT_FRONT_LEFT;
@@ -255,11 +273,12 @@ make_color_buffer_mask(struct gl_context *ctx, GLint drawbuffer)
         mask |= BUFFER_BIT_BACK_RIGHT;
      break;
   default:
-      if (drawbuffer < 0 || drawbuffer >= (GLint)ctx->Const.MaxDrawBuffers) {
-         mask = INVALID_MASK;
-      }
-      else if (att[BUFFER_COLOR0 + drawbuffer].Renderbuffer) {
-         mask |= (BUFFER_BIT_COLOR0 << drawbuffer);
+      {
+         GLint buf = ctx->DrawBuffer->_ColorDrawBufferIndexes[drawbuffer];
+
+         if (buf >= 0 && att[buf].Renderbuffer) {
+            mask |= 1 << buf;
+         }
      }
   }

--- a/src/mesa/main/condrender.c
+++ b/src/mesa/main/condrender.c
@@ -72,7 +72,9 @@ _mesa_BeginConditionalRender(GLuint queryId, GLenum mode)
   }
   ASSERT(q->Id == queryId);

-   if (q->Target != GL_SAMPLES_PASSED || q->Active) {
+   if ((q->Target != GL_SAMPLES_PASSED &&
+        q->Target != GL_ANY_SAMPLES_PASSED &&
+        q->Target != GL_ANY_SAMPLES_PASSED_CONSERVATIVE) || q->Active) {
      _mesa_error(ctx, GL_INVALID_OPERATION, "glBeginConditionalRender()");
      return;
   }
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -762,7 +762,6 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
      case GL_COLOR_SUM_EXT:
         if (ctx->API != API_OPENGL_COMPAT)
            goto invalid_enum_error;
-         CHECK_EXTENSION(ARB_vertex_program, cap);
         if (ctx->Fog.ColorSumEnabled == state)
            return;
         FLUSH_VERTICES(ctx, _NEW_FOG);
@@ -1462,7 +1461,6 @@ _mesa_IsEnabled( GLenum cap )
      case GL_COLOR_SUM_EXT:
         if (ctx->API != API_OPENGL_COMPAT)
            goto invalid_enum_error;
-         CHECK_EXTENSION(ARB_vertex_program);
         return ctx->Fog.ColorSumEnabled;

      /* GL_ARB_multisample */
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -1966,6 +1966,26 @@ _mesa_is_format_unsigned(gl_format format)
 }


+/**
+ * Does the given format store signed values?
+ */
+GLboolean
+_mesa_is_format_signed(gl_format format)
+{
+   if (format == MESA_FORMAT_R11_G11_B10_FLOAT || 
+       format == MESA_FORMAT_RGB9_E5_FLOAT) {
+      /* these packed float formats only store unsigned values */
+      return GL_FALSE;
+   }
+   else {
+      const struct gl_format_info *info = _mesa_get_format_info(format);
+      return (info->DataType == GL_SIGNED_NORMALIZED ||
+              info->DataType == GL_INT ||
+              info->DataType == GL_FLOAT);
+   }
+}
+
+
 /**
 * Return color encoding for given format.
 * \return GL_LINEAR or GL_SRGB
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -341,6 +341,9 @@ _mesa_is_format_integer_color(gl_format format);
 extern GLboolean
 _mesa_is_format_unsigned(gl_format format);

+extern GLboolean
+_mesa_is_format_signed(gl_format format);
+
 extern GLenum
 _mesa_get_format_color_encoding(gl_format format);

--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -327,6 +327,12 @@ static const int extra_EXT_framebuffer_sRGB_and_new_buffers[] = {
   EXTRA_END
 };

+static const int extra_EXT_packed_float[] = {
+   EXT(EXT_packed_float),
+   EXTRA_NEW_BUFFERS,
+   EXTRA_END
+};
+
 static const int extra_MESA_texture_array_es3[] = {
   EXT(MESA_texture_array),
   EXTRA_API_ES3,
@@ -758,6 +764,45 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
 	 ctx->Texture.Unit[unit].CurrentTex[d->offset]->Name;
      break;

+   /* GL_EXT_packed_float */
+   case GL_RGBA_SIGNED_COMPONENTS_EXT:
+      {
+         /* Note: we only check the 0th color attachment. */
+         const struct gl_renderbuffer *rb =
+            ctx->DrawBuffer->_ColorDrawBuffers[0];
+         if (rb && _mesa_is_format_signed(rb->Format)) {
+            /* Issue 17 of GL_EXT_packed_float:  If a component (such as
+             * alpha) has zero bits, the component should not be considered
+             * signed and so the bit for the respective component should be
+             * zeroed.
+             */
+            GLint r_bits =
+               _mesa_get_format_bits(rb->Format, GL_RED_BITS);
+            GLint g_bits =
+               _mesa_get_format_bits(rb->Format, GL_GREEN_BITS);
+            GLint b_bits =
+               _mesa_get_format_bits(rb->Format, GL_BLUE_BITS);
+            GLint a_bits =
+               _mesa_get_format_bits(rb->Format, GL_ALPHA_BITS);
+            GLint l_bits =
+               _mesa_get_format_bits(rb->Format, GL_TEXTURE_LUMINANCE_SIZE);
+            GLint i_bits =
+               _mesa_get_format_bits(rb->Format, GL_TEXTURE_INTENSITY_SIZE);
+
+            v->value_int_4[0] = r_bits + l_bits + i_bits > 0;
+            v->value_int_4[1] = g_bits + l_bits + i_bits > 0;
+            v->value_int_4[2] = b_bits + l_bits + i_bits > 0;
+            v->value_int_4[3] = a_bits + i_bits > 0;
+         }
+         else {
+            v->value_int_4[0] =
+            v->value_int_4[1] =
+            v->value_int_4[2] =
+            v->value_int_4[3] = 0;
+         }
+      }
+      break;
+
   /* GL_ARB_vertex_buffer_object */
   case GL_VERTEX_ARRAY_BUFFER_BINDING_ARB:
   case GL_NORMAL_ARRAY_BUFFER_BINDING_ARB:
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -543,7 +543,7 @@ descriptor=[
  [ "TRANSPOSE_TEXTURE_MATRIX_ARB", "CONTEXT_MATRIX_T(TextureMatrixStack), NO_EXTRA" ],

 # GL_EXT_secondary_color
-  [ "COLOR_SUM", "CONTEXT_BOOL(Fog.ColorSumEnabled), extra_ARB_vertex_program" ],
+  [ "COLOR_SUM", "CONTEXT_BOOL(Fog.ColorSumEnabled), NO_EXTRA" ],
  [ "CURRENT_SECONDARY_COLOR", "CONTEXT_FIELD(Current.Attrib[VERT_ATTRIB_COLOR1][0], TYPE_FLOATN_4), extra_flush_current" ],
  [ "SECONDARY_COLOR_ARRAY", "ARRAY_BOOL(VertexAttrib[VERT_ATTRIB_COLOR1].Enabled), NO_EXTRA" ],
  [ "SECONDARY_COLOR_ARRAY_TYPE", "ARRAY_ENUM(VertexAttrib[VERT_ATTRIB_COLOR1].Type), NO_EXTRA" ],
@@ -613,6 +613,9 @@ descriptor=[
 # GL_ARB_fragment_program
  [ "FRAGMENT_PROGRAM_ARB", "CONTEXT_BOOL(FragmentProgram.Enabled), extra_ARB_fragment_program" ],

+# GL_EXT_packed_float
+  [ "RGBA_SIGNED_COMPONENTS_EXT", "LOC_CUSTOM, TYPE_INT_4, 0, extra_EXT_packed_float" ],
+
 # GL_EXT_depth_bounds_test
  [ "DEPTH_BOUNDS_TEST_EXT", "CONTEXT_BOOL(Depth.BoundsTest), extra_EXT_depth_bounds_test" ],
  [ "DEPTH_BOUNDS_EXT", "CONTEXT_FLOAT2(Depth.BoundsMin), extra_EXT_depth_bounds_test" ],
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -263,6 +263,43 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats)
      else {
         n += 3;
      }
+
+      /* The ES and desktop GL specs diverge here.
+       *
+       * In desktop OpenGL, the driver can perform online compression of
+       * uncompressed texture data.  GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+       * GL_COMPRESSED_TEXTURE_FORMATS give the application a list of
+       * formats that it could ask the driver to compress with some
+       * expectation of quality.  The GL_ARB_texture_compression spec
+       * calls this "suitable for general-purpose usage."  As noted
+       * above, this means GL_COMPRESSED_RGBA_S3TC_DXT1_EXT is not
+       * included in the list.
+       *
+       * In OpenGL ES, the driver never performs compression.
+       * GL_NUM_COMPRESSED_TEXTURE_FORMATS and
+       * GL_COMPRESSED_TEXTURE_FORMATS give the application a list of
+       * formats that the driver can receive from the application.  It
+       * is the *complete* list of formats.  The
+       * GL_EXT_texture_compression_s3tc spec says:
+       *
+       *     "New State for OpenGL ES 2.0.25 and 3.0.2 Specifications
+       *
+       *         The queries for NUM_COMPRESSED_TEXTURE_FORMATS and
+       *         COMPRESSED_TEXTURE_FORMATS include
+       *         COMPRESSED_RGB_S3TC_DXT1_EXT,
+       *         COMPRESSED_RGBA_S3TC_DXT1_EXT,
+       *         COMPRESSED_RGBA_S3TC_DXT3_EXT, and
+       *         COMPRESSED_RGBA_S3TC_DXT5_EXT."
+       *
+       * Note that the addition is only to the OpenGL ES specification!
+       */
+      if (_mesa_is_gles(ctx)) {
+         if (formats) {
+            formats[n++] = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
+         } else {
+            n += 1;
+         }
+      }
   }

   /* The GL_OES_compressed_ETC1_RGB8_texture spec says:
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1704,7 +1704,7 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
         /* GL spec 'Data Conversions' section specifies that floating-point
          * value in integer Get function is rounded to nearest integer
          */
-         *params = (GLint) roundf(obj->Sampler.LodBias);
+         *params = IROUND(obj->Sampler.LodBias);
         break;
      case GL_TEXTURE_CROP_RECT_OES:
         if (ctx->API != API_OPENGLES || !ctx->Extensions.OES_draw_texture)
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -255,7 +255,7 @@ update_single_texture(struct st_context *st,
 				stObj->base.DepthMode) ||
 	  (view_format != stObj->sampler_view->format) ||
 	  stObj->base.BaseLevel != stObj->sampler_view->u.tex.first_level) {
-	 pipe_sampler_view_reference(&stObj->sampler_view, NULL);
+	 pipe_sampler_view_release(pipe, &stObj->sampler_view);
      }
   }

--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -43,6 +43,34 @@
 #include "util/u_format.h"


+static void
+st_adjust_blit_for_msaa_resolve(struct pipe_blit_info *blit)
+{
+   /* Even though we do multisample resolves at the time of the blit, OpenGL
+    * specification defines them as if they happen at the time of rendering,
+    * which means that the type of averaging we do during the resolve should
+    * only depend on the source format; the destination format should be
+    * ignored. But, specification doesn't seem to be strict about it.
+    *
+    * It has been observed that mulitisample resolves produce slightly better
+    * looking images when averaging is done using destination format. NVIDIA's
+    * proprietary OpenGL driver also follows this approach.
+    *
+    * When multisampling, if the source and destination formats are equal
+    * (aside from the color space), we choose to blit in sRGB space to get
+    * this higher quality image.
+    */
+   if (blit->src.resource->nr_samples > 1 &&
+       blit->dst.resource->nr_samples <= 1) {
+      blit->dst.format = blit->dst.resource->format;
+
+      if (util_format_is_srgb(blit->dst.resource->format))
+         blit->src.format = util_format_srgb(blit->src.resource->format);
+      else
+         blit->src.format = util_format_linear(blit->src.resource->format);
+   }
+}
+
 static void
 st_BlitFramebuffer(struct gl_context *ctx,
                   GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
@@ -192,6 +220,8 @@ st_BlitFramebuffer(struct gl_context *ctx,
                  blit.src.box.z = srcAtt->Zoffset + srcAtt->CubeMapFace;
                  blit.src.format = util_format_linear(srcObj->pt->format);

+                  st_adjust_blit_for_msaa_resolve(&blit);
+
                  st->pipe->blit(st->pipe, &blit);
               }
            }
@@ -227,6 +257,8 @@ st_BlitFramebuffer(struct gl_context *ctx,
                  blit.src.box.z = srcSurf->u.tex.first_layer;
                  blit.src.format = util_format_linear(srcSurf->format);

+                  st_adjust_blit_for_msaa_resolve(&blit);
+
                  st->pipe->blit(st->pipe, &blit);
               }
            }
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -205,7 +205,6 @@ clear_with_quad(struct gl_context *ctx,
   const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f;
   const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f;
   const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f;
-   union pipe_color_union clearColor;

   /*
   printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__, 
@@ -308,18 +307,13 @@ clear_with_quad(struct gl_context *ctx,
   set_vertex_shader(st);
   cso_set_geometry_shader_handle(st->cso_context, NULL);

-   if (ctx->DrawBuffer->_ColorDrawBuffers[0]) {
-      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
-      GLboolean is_integer = _mesa_is_enum_format_integer(rb->InternalFormat);
-
-      st_translate_color(&ctx->Color.ClearColor,
-                         &clearColor,
-                         ctx->DrawBuffer->_ColorDrawBuffers[0]->_BaseFormat,
-                         is_integer);
-   }
+   /* We can't translate the clear color to the colorbuffer format,
+    * because different colorbuffers may have different formats.
+    */

   /* draw quad matching scissor rect */
-   draw_quad(st, x0, y0, x1, y1, (GLfloat) ctx->Depth.Clear, &clearColor);
+   draw_quad(st, x0, y0, x1, y1, (GLfloat) ctx->Depth.Clear,
+             (union pipe_color_union*)&ctx->Color.ClearColor);

   /* Restore pipe state */
   cso_restore_blend(st->cso_context);
@@ -397,7 +391,7 @@ st_Clear(struct gl_context *ctx, GLbitfield mask)

   if (mask & BUFFER_BITS_COLOR) {
      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
-         GLuint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i];
+         GLint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i];

         if (mask & (1 << b)) {
            struct gl_renderbuffer *rb
@@ -450,19 +444,11 @@ st_Clear(struct gl_context *ctx, GLbitfield mask)
                      quad_buffers & PIPE_CLEAR_DEPTH,
                      quad_buffers & PIPE_CLEAR_STENCIL);
   } else if (clear_buffers) {
-      union pipe_color_union clearColor;
-
-      if (ctx->DrawBuffer->_ColorDrawBuffers[0]) {
-         struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
-         GLboolean is_integer = _mesa_is_enum_format_integer(rb->InternalFormat);
-
-         st_translate_color(&ctx->Color.ClearColor,
-                            &clearColor,
-			    ctx->DrawBuffer->_ColorDrawBuffers[0]->_BaseFormat,
-			    is_integer);
-      }
-
-      st->pipe->clear(st->pipe, clear_buffers, &clearColor,
+      /* We can't translate the clear color to the colorbuffer format,
+       * because different colorbuffers may have different formats.
+       */
+      st->pipe->clear(st->pipe, clear_buffers,
+                      (union pipe_color_union*)&ctx->Color.ClearColor,
                      ctx->Depth.Clear, ctx->Stencil.Clear);
   }
   if (mask & BUFFER_BIT_ACCUM)
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -648,7 +648,8 @@ st_ReadBuffer(struct gl_context *ctx, GLenum buffer)
   (void) buffer;

   /* add the renderbuffer on demand */
-   st_manager_add_color_renderbuffer(st, fb, fb->_ColorReadBufferIndex);
+   if (fb->_ColorReadBufferIndex >= 0)
+      st_manager_add_color_renderbuffer(st, fb, fb->_ColorReadBufferIndex);
 }


--- a/src/mesa/swrast/s_texcombine.c
+++ b/src/mesa/swrast/s_texcombine.c
@@ -602,6 +602,14 @@ _swrast_texture_span( struct gl_context *ctx, SWspan *span )
   if (!swrast->TexelBuffer) {
 #ifdef _OPENMP
      const GLint maxThreads = omp_get_max_threads();
+
+      /* TexelBuffer memory allocation needs to be done in a critical section
+       * as this code runs in a parallel loop.
+       * When entering the section, first check if TexelBuffer has been
+       * initialized already by another thread while this thread was waiting.
+       */
+      #pragma omp critical
+      if (!swrast->TexelBuffer) {
 #else
      const GLint maxThreads = 1;
 #endif
@@ -613,6 +621,10 @@ _swrast_texture_span( struct gl_context *ctx, SWspan *span )
      swrast->TexelBuffer =
 	 malloc(ctx->Const.FragmentProgram.MaxTextureImageUnits * maxThreads *
 			    SWRAST_MAX_WIDTH * 4 * sizeof(GLfloat));
+#ifdef _OPENMP
+      } /* critical section */
+#endif
+
      if (!swrast->TexelBuffer) {
 	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
 	 return;
@@ -1 +1 @@
 .0.1
 .0.3