docs: add release notes for 17.0.7

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Update version to 17.0.7
2017-06-01 11:34:38 +01:00 · 2017-06-01 11:30:44 +01:00 · 2017-05-31 01:46:21 +01:00 · 2017-05-31 01:43:36 +01:00 · 2017-05-31 01:42:22 +01:00 · 2017-05-31 01:40:02 +01:00
27 changed files with 557 additions and 257 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-17.0.6
+17.0.7
--- a/docs/relnotes/17.0.6.html
+++ b/docs/relnotes/17.0.6.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+f1b2497d553e9a584f0caa3a2d9d310e27ead15fb0af170da69f6e70fb5031cd  mesa-17.0.6.tar.gz
+89ecf3bcd0f18dcca5aaa42bf36bb52a2df33be89889f94aaaad91f7a504a69d  mesa-17.0.6.tar.xz
 </pre>


--- a/docs/relnotes/17.0.7.html
+++ b/docs/relnotes/17.0.7.html
@@ -0,0 +1,144 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.0.7 Release Notes / June 1, 2017</h1>
+
+<p>
+Mesa 17.0.7 is a bug fix release which fixes bugs found since the 17.0.6 release.
+</p>
+<p>
+Mesa 17.0.7 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98833">Bug 98833</a> - [REGRESSION, bisected] Wayland revert commit breaks non-Vsync fullscreen frame updates</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100741">Bug 100741</a> - Chromium - Memory leak</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100925">Bug 100925</a> - [HSW/BSW/BDW/SKL] Google Earth is not resolving all the details in the map correctly</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Gomez (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.0.6</li>
+</ul>
+
+<p>Bartosz Tomczyk (1):</p>
+<ul>
+  <li>mesa: Avoid leaking surface in st_renderbuffer_delete</li>
+</ul>
+
+<p>Chad Versace (1):</p>
+<ul>
+  <li>egl: Partially revert 23c86c74, fix eglMakeCurrent</li>
+</ul>
+
+<p>Daniel Stone (7):</p>
+<ul>
+  <li>vulkan: Fix Wayland uninitialised registry</li>
+  <li>vulkan/wsi/wayland: Remove roundtrip when creating image</li>
+  <li>vulkan/wsi/wayland: Use per-display event queue</li>
+  <li>vulkan/wsi/wayland: Use proxy wrappers for swapchain</li>
+  <li>egl/wayland: Don't open-code roundtrip</li>
+  <li>egl/wayland: Use per-surface event queues</li>
+  <li>egl/wayland: Ensure we get a back buffer</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>st/va: fix misplaced closing bracket</li>
+  <li>anv: automake: list shared libraries after the static ones</li>
+  <li>radv: automake: list shared libraries after the static ones</li>
+  <li>egl/wayland: select the format based on the interface used</li>
+  <li>Update version to 17.0.7</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>renderonly: Initialize fields of struct winsys_handle.</li>
+  <li>vc4: Don't allocate new BOs to avoid synchronization when they're shared.</li>
+</ul>
+
+<p>Hans de Goede (1):</p>
+<ul>
+  <li>glxglvnddispatch: Add missing dispatch for GetDriverConfig</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nvc0/ir: SHLADD's middle source must be an immediate</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>i965/blorp: Do and end-of-pipe sync on both sides of fast-clear ops</li>
+  <li>i965: Round copy size to the nearest block in intel_miptree_copy</li>
+</ul>
+
+<p>Lucas Stach (1):</p>
+<ul>
+  <li>etnaviv: stop oversizing buffer resources</li>
+</ul>
+
+<p>Nanley Chery (2):</p>
+<ul>
+  <li>anv/formats: Update the three-channel BC1 mappings</li>
+  <li>i965/formats: Update the three-channel DXT1 mappings</li>
+</ul>
+
+<p>Pohjolainen, Topi (1):</p>
+<ul>
+  <li>intel/isl/gen7: Use stencil vertical alignment of 8 instead of 4</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (3):</p>
+<ul>
+  <li>i965/vec4/gs: restore the uniform values which was overwritten by failed vec4_gs_visitor execution</li>
+  <li>i965/vec4: fix swizzle and writemask when loading an uniform with constant offset</li>
+  <li>i965/vec4: load dvec3/4 uniforms first in the push constant buffer</li>
+</ul>
+
+<p>Tom Stellard (1):</p>
+<ul>
+  <li>gallivm: Make sure module has the correct data layout when pass manager runs</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -57,8 +57,21 @@ VULKAN_SOURCES = \
 	$(VULKAN_GENERATED_FILES) \
 	$(VULKAN_FILES)

-VULKAN_LIB_DEPS =
-
+VULKAN_LIB_DEPS = \
+	libvulkan_common.la \
+	$(top_builddir)/src/vulkan/wsi/libvulkan_wsi.la \
+	$(top_builddir)/src/amd/common/libamd_common.la \
+	$(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \
+	$(top_builddir)/src/compiler/nir/libnir.la \
+	$(top_builddir)/src/util/libmesautil.la \
+	$(LLVM_LIBS) \
+	$(LIBELF_LIBS) \
+	$(PTHREAD_LIBS) \
+	$(AMDGPU_LIBS) \
+	$(LIBDRM_LIBS) \
+	$(PTHREAD_LIBS) \
+	$(DLOPEN_LIBS) \
+	-lm

 if HAVE_PLATFORM_X11
 AM_CPPFLAGS += \
@@ -90,22 +103,6 @@ endif
 noinst_LTLIBRARIES = libvulkan_common.la
 libvulkan_common_la_SOURCES = $(VULKAN_SOURCES)

-VULKAN_LIB_DEPS += \
-	libvulkan_common.la \
-	$(top_builddir)/src/vulkan/wsi/libvulkan_wsi.la \
-	$(top_builddir)/src/amd/common/libamd_common.la \
-	$(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \
-	$(top_builddir)/src/compiler/nir/libnir.la \
-	$(top_builddir)/src/util/libmesautil.la \
-	$(LLVM_LIBS) \
-	$(LIBELF_LIBS) \
-	$(PTHREAD_LIBS) \
-	$(AMDGPU_LIBS) \
-	$(LIBDRM_LIBS) \
-	$(PTHREAD_LIBS) \
-	$(DLOPEN_LIBS) \
-	-lm
-
 nodist_EXTRA_libvulkan_radeon_la_SOURCES = dummy.cpp
 libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES)

--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -264,6 +264,10 @@ struct dri2_egl_surface
   struct wl_egl_window  *wl_win;
   int                    dx;
   int                    dy;
+   struct wl_event_queue *wl_queue;
+   struct wl_surface     *wl_surface_wrapper;
+   struct wl_display     *wl_dpy_wrapper;
+   struct wl_drm         *wl_drm_wrapper;
   struct wl_callback    *throttle_callback;
   int                    format;
 #endif
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -55,34 +55,10 @@ static EGLBoolean
 dri2_wl_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
                      EGLint interval);

-static void
-sync_callback(void *data, struct wl_callback *callback, uint32_t serial)
-{
-   int *done = data;
-
-   *done = 1;
-   wl_callback_destroy(callback);
-}
-
-static const struct wl_callback_listener sync_listener = {
-   .done = sync_callback
-};
-
 static int
 roundtrip(struct dri2_egl_display *dri2_dpy)
 {
-   struct wl_callback *callback;
-   int done = 0, ret = 0;
-
-   callback = wl_display_sync(dri2_dpy->wl_dpy_wrapper);
-   wl_callback_add_listener(callback, &sync_listener, &done);
-   while (ret != -1 && !done)
-      ret = wl_display_dispatch_queue(dri2_dpy->wl_dpy, dri2_dpy->wl_queue);
-
-   if (!done)
-      wl_callback_destroy(callback);
-
-   return ret;
+   return wl_display_roundtrip_queue(dri2_dpy->wl_dpy, dri2_dpy->wl_queue);
 }

 static void
@@ -148,7 +124,7 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
   if (!_eglInitSurface(&dri2_surf->base, disp, EGL_WINDOW_BIT, conf, attrib_list))
      goto cleanup_surf;

-   if (dri2_dpy->dri2) {
+   if (dri2_dpy->wl_drm) {
      if (conf->RedSize == 5)
         dri2_surf->format = WL_DRM_FORMAT_RGB565;
      else if (conf->AlphaSize == 0)
@@ -156,6 +132,7 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
      else
         dri2_surf->format = WL_DRM_FORMAT_ARGB8888;
   } else {
+      assert(dri2_dpy->wl_shm);
      if (conf->RedSize == 5)
         dri2_surf->format = WL_SHM_FORMAT_RGB565;
      else if (conf->AlphaSize == 0)
@@ -170,6 +147,37 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
   }

   dri2_surf->wl_win = window;
+   dri2_surf->wl_queue = wl_display_create_queue(dri2_dpy->wl_dpy);
+   if (!dri2_surf->wl_queue) {
+      _eglError(EGL_BAD_ALLOC, "dri2_create_surface");
+      goto cleanup_surf;
+   }
+
+   if (dri2_dpy->wl_drm) {
+      dri2_surf->wl_drm_wrapper = wl_proxy_create_wrapper(dri2_dpy->wl_drm);
+      if (!dri2_surf->wl_drm_wrapper) {
+         _eglError(EGL_BAD_ALLOC, "dri2_create_surface");
+         goto cleanup_queue;
+      }
+      wl_proxy_set_queue((struct wl_proxy *)dri2_surf->wl_drm_wrapper,
+                         dri2_surf->wl_queue);
+   }
+
+   dri2_surf->wl_dpy_wrapper = wl_proxy_create_wrapper(dri2_dpy->wl_dpy);
+   if (!dri2_surf->wl_dpy_wrapper) {
+      _eglError(EGL_BAD_ALLOC, "dri2_create_surface");
+      goto cleanup_drm;
+   }
+   wl_proxy_set_queue((struct wl_proxy *)dri2_surf->wl_dpy_wrapper,
+                      dri2_surf->wl_queue);
+
+   dri2_surf->wl_surface_wrapper = wl_proxy_create_wrapper(window->surface);
+   if (!dri2_surf->wl_surface_wrapper) {
+      _eglError(EGL_BAD_ALLOC, "dri2_create_surface");
+      goto cleanup_drm;
+   }
+   wl_proxy_set_queue((struct wl_proxy *)dri2_surf->wl_surface_wrapper,
+                      dri2_surf->wl_queue);

   dri2_surf->wl_win->private = dri2_surf;
   dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;
@@ -200,6 +208,11 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,

   return &dri2_surf->base;

+ cleanup_drm:
+   if (dri2_surf->wl_drm_wrapper)
+      wl_proxy_wrapper_destroy(dri2_surf->wl_drm_wrapper);
+ cleanup_queue:
+   wl_event_queue_destroy(dri2_surf->wl_queue);
 cleanup_surf:
   free(dri2_surf);

@@ -265,6 +278,12 @@ dri2_wl_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
      dri2_surf->wl_win->destroy_window_callback = NULL;
   }

+   if (dri2_surf->wl_drm_wrapper)
+      wl_proxy_wrapper_destroy(dri2_surf->wl_drm_wrapper);
+   wl_proxy_wrapper_destroy(dri2_surf->wl_surface_wrapper);
+   wl_proxy_wrapper_destroy(dri2_surf->wl_dpy_wrapper);
+   wl_event_queue_destroy(dri2_surf->wl_queue);
+
   free(surf);

   return EGL_TRUE;
@@ -333,9 +352,9 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
   }

   /* There might be a buffer release already queued that wasn't processed */
-   wl_display_dispatch_queue_pending(dri2_dpy->wl_dpy, dri2_dpy->wl_queue);
+   wl_display_dispatch_queue_pending(dri2_dpy->wl_dpy, dri2_surf->wl_queue);

-   if (dri2_surf->back == NULL) {
+   while (dri2_surf->back == NULL) {
      for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
         /* Get an unlocked buffer, preferrably one with a dri_buffer
          * already allocated. */
@@ -346,6 +365,14 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
         else if (dri2_surf->back->dri_image == NULL)
            dri2_surf->back = &dri2_surf->color_buffers[i];
      }
+
+      if (dri2_surf->back)
+         break;
+
+      /* If we don't have a buffer, then block on the server to release one for
+       * us, and try again. */
+      if (wl_display_dispatch_queue(dri2_dpy->wl_dpy, dri2_surf->wl_queue) < 0)
+          return -1;
   }

   if (dri2_surf->back == NULL)
@@ -634,7 +661,7 @@ create_wl_buffer(struct dri2_egl_surface *dri2_surf)
      dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_STRIDE, &stride);

      dri2_surf->current->wl_buffer =
-         wl_drm_create_prime_buffer(dri2_dpy->wl_drm,
+         wl_drm_create_prime_buffer(dri2_surf->wl_drm_wrapper,
                                    fd,
                                    dri2_surf->base.Width,
                                    dri2_surf->base.Height,
@@ -648,7 +675,7 @@ create_wl_buffer(struct dri2_egl_surface *dri2_surf)
      dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_STRIDE, &stride);

      dri2_surf->current->wl_buffer =
-         wl_drm_create_buffer(dri2_dpy->wl_drm,
+         wl_drm_create_buffer(dri2_surf->wl_drm_wrapper,
                              name,
                              dri2_surf->base.Width,
                              dri2_surf->base.Height,
@@ -656,8 +683,6 @@ create_wl_buffer(struct dri2_egl_surface *dri2_surf)
                              dri2_surf->format);
   }

-   wl_proxy_set_queue((struct wl_proxy *) dri2_surf->current->wl_buffer,
-                      dri2_dpy->wl_queue);
   wl_buffer_add_listener(dri2_surf->current->wl_buffer,
                          &wl_buffer_listener, dri2_surf);
 }
@@ -669,14 +694,14 @@ try_damage_buffer(struct dri2_egl_surface *dri2_surf,
 {
   int i;

-   if (wl_proxy_get_version((struct wl_proxy *) dri2_surf->wl_win->surface)
+   if (wl_proxy_get_version((struct wl_proxy *) dri2_surf->wl_surface_wrapper)
       < WL_SURFACE_DAMAGE_BUFFER_SINCE_VERSION)
      return EGL_FALSE;

   for (i = 0; i < n_rects; i++) {
      const int *rect = &rects[i * 4];

-      wl_surface_damage_buffer(dri2_surf->wl_win->surface,
+      wl_surface_damage_buffer(dri2_surf->wl_surface_wrapper,
                               rect[0],
                               dri2_surf->base.Height - rect[1] - rect[3],
                               rect[2], rect[3]);
@@ -699,7 +724,7 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,

   while (dri2_surf->throttle_callback != NULL)
      if (wl_display_dispatch_queue(dri2_dpy->wl_dpy,
-                                    dri2_dpy->wl_queue) == -1)
+                                    dri2_surf->wl_queue) == -1)
         return -1;

   for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++)
@@ -715,11 +740,9 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,

   if (draw->SwapInterval > 0) {
      dri2_surf->throttle_callback =
-         wl_surface_frame(dri2_surf->wl_win->surface);
+         wl_surface_frame(dri2_surf->wl_surface_wrapper);
      wl_callback_add_listener(dri2_surf->throttle_callback,
                               &throttle_listener, dri2_surf);
-      wl_proxy_set_queue((struct wl_proxy *) dri2_surf->throttle_callback,
-                         dri2_dpy->wl_queue);
   }

   dri2_surf->back->age = 1;
@@ -728,7 +751,7 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,

   create_wl_buffer(dri2_surf);

-   wl_surface_attach(dri2_surf->wl_win->surface,
+   wl_surface_attach(dri2_surf->wl_surface_wrapper,
                     dri2_surf->current->wl_buffer,
                     dri2_surf->dx, dri2_surf->dy);

@@ -742,7 +765,7 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,
    * ignore the damage region and post maximum damage, due to
    * https://bugs.freedesktop.org/78190 */
   if (!n_rects || !try_damage_buffer(dri2_surf, rects, n_rects))
-      wl_surface_damage(dri2_surf->wl_win->surface,
+      wl_surface_damage(dri2_surf->wl_surface_wrapper,
                        0, 0, INT32_MAX, INT32_MAX);

   if (dri2_dpy->is_different_gpu) {
@@ -760,14 +783,14 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,
   dri2_flush_drawable_for_swapbuffers(disp, draw);
   dri2_dpy->flush->invalidate(dri2_surf->dri_drawable);

-   wl_surface_commit(dri2_surf->wl_win->surface);
+   wl_surface_commit(dri2_surf->wl_surface_wrapper);

   /* If we're not waiting for a frame callback then we'll at least throttle
    * to a sync callback so that we always give a chance for the compositor to
    * handle the commit and send a release event before checking for a free
    * buffer */
   if (dri2_surf->throttle_callback == NULL) {
-      dri2_surf->throttle_callback = wl_display_sync(dri2_dpy->wl_dpy_wrapper);
+      dri2_surf->throttle_callback = wl_display_sync(dri2_surf->wl_dpy_wrapper);
      wl_callback_add_listener(dri2_surf->throttle_callback,
                               &throttle_listener, dri2_surf);
   }
@@ -1410,11 +1433,13 @@ os_create_anonymous_file(off_t size)


 static EGLBoolean
-dri2_wl_swrast_allocate_buffer(struct dri2_egl_display *dri2_dpy,
+dri2_wl_swrast_allocate_buffer(struct dri2_egl_surface *dri2_surf,
                               int format, int w, int h,
                               void **data, int *size,
                               struct wl_buffer **buffer)
 {
+   struct dri2_egl_display *dri2_dpy =
+      dri2_egl_display(dri2_surf->base.Resource.Display);
   struct wl_shm_pool *pool;
   int fd, stride, size_map;
   void *data_map;
@@ -1435,6 +1460,7 @@ dri2_wl_swrast_allocate_buffer(struct dri2_egl_display *dri2_dpy,

   /* Share it in a wl_buffer */
   pool = wl_shm_create_pool(dri2_dpy->wl_shm, fd, size_map);
+   wl_proxy_set_queue((struct wl_proxy *)pool, dri2_surf->wl_queue);
   *buffer = wl_shm_pool_create_buffer(pool, 0, w, h, stride, format);
   wl_shm_pool_destroy(pool);
   close(fd);
@@ -1470,7 +1496,7 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf)
   /* find back buffer */

   /* There might be a buffer release already queued that wasn't processed */
-   wl_display_dispatch_queue_pending(dri2_dpy->wl_dpy, dri2_dpy->wl_queue);
+   wl_display_dispatch_queue_pending(dri2_dpy->wl_dpy, dri2_surf->wl_queue);

   /* try get free buffer already created */
   for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
@@ -1486,7 +1512,7 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf)
      for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
         if (!dri2_surf->color_buffers[i].locked) {
             dri2_surf->back = &dri2_surf->color_buffers[i];
-             if (!dri2_wl_swrast_allocate_buffer(dri2_dpy,
+             if (!dri2_wl_swrast_allocate_buffer(dri2_surf,
                                                 dri2_surf->format,
                                                 dri2_surf->base.Width,
                                                 dri2_surf->base.Height,
@@ -1496,8 +1522,6 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf)
                _eglError(EGL_BAD_ALLOC, "failed to allocate color buffer");
                 return -1;
             }
-             wl_proxy_set_queue((struct wl_proxy *) dri2_surf->back->wl_buffer,
-                                dri2_dpy->wl_queue);
             wl_buffer_add_listener(dri2_surf->back->wl_buffer,
                                    &wl_buffer_listener, dri2_surf);
             break;
@@ -1553,22 +1577,20 @@ dri2_wl_swrast_commit_backbuffer(struct dri2_egl_surface *dri2_surf)

   while (dri2_surf->throttle_callback != NULL)
      if (wl_display_dispatch_queue(dri2_dpy->wl_dpy,
-                                    dri2_dpy->wl_queue) == -1)
+                                    dri2_surf->wl_queue) == -1)
         return;

   if (dri2_surf->base.SwapInterval > 0) {
      dri2_surf->throttle_callback =
-         wl_surface_frame(dri2_surf->wl_win->surface);
+         wl_surface_frame(dri2_surf->wl_surface_wrapper);
      wl_callback_add_listener(dri2_surf->throttle_callback,
                               &throttle_listener, dri2_surf);
-      wl_proxy_set_queue((struct wl_proxy *) dri2_surf->throttle_callback,
-                         dri2_dpy->wl_queue);
   }

   dri2_surf->current = dri2_surf->back;
   dri2_surf->back = NULL;

-   wl_surface_attach(dri2_surf->wl_win->surface,
+   wl_surface_attach(dri2_surf->wl_surface_wrapper,
                     dri2_surf->current->wl_buffer,
                     dri2_surf->dx, dri2_surf->dy);

@@ -1578,9 +1600,9 @@ dri2_wl_swrast_commit_backbuffer(struct dri2_egl_surface *dri2_surf)
   dri2_surf->dx = 0;
   dri2_surf->dy = 0;

-   wl_surface_damage(dri2_surf->wl_win->surface,
+   wl_surface_damage(dri2_surf->wl_surface_wrapper,
                     0, 0, INT32_MAX, INT32_MAX);
-   wl_surface_commit(dri2_surf->wl_win->surface);
+   wl_surface_commit(dri2_surf->wl_surface_wrapper);

   /* If we're not waiting for a frame callback then we'll at least throttle
    * to a sync callback so that we always give a chance for the compositor to
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -818,25 +818,6 @@ eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read,
         RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_FALSE);
   }

-   _EGLThreadInfo *t =_eglGetCurrentThread();
-   _EGLContext *old_ctx = t->CurrentContext;
-   _EGLSurface *old_draw_surf = old_ctx ? old_ctx->DrawSurface : NULL;
-   _EGLSurface *old_read_surf = old_ctx ? old_ctx->ReadSurface : NULL;
-
-   /* From the EGL 1.5 spec, Section 3.7.3 Binding Context and Drawables:
-    *
-    *    If the previous context of the calling thread has unflushed commands,
-    *    and the previous surface is no longer valid, an
-    *    EGL_BAD_CURRENT_SURFACE error is generated.
-    *
-    * It's difficult to check if the context has unflushed commands, but it's
-    * easy to check if the surface is no longer valid.
-    */
-   if (old_draw_surf && old_draw_surf->Lost)
-      RETURN_EGL_ERROR(disp, EGL_BAD_CURRENT_SURFACE, EGL_FALSE);
-   if (old_read_surf && old_read_surf->Lost)
-      RETURN_EGL_ERROR(disp, EGL_BAD_CURRENT_SURFACE, EGL_FALSE);
-
   /*    If a native window underlying either draw or read is no longer valid,
    *    an EGL_BAD_NATIVE_WINDOW error is generated.
    */
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -125,19 +125,6 @@ create_pass_manager(struct gallivm_state *gallivm)
   LLVMAddTargetData(gallivm->target, gallivm->passmgr);
 #endif

-   /* Setting the module's DataLayout to an empty string will cause the
-    * ExecutionEngine to copy to the DataLayout string from its target
-    * machine to the module.  As of LLVM 3.8 the module and the execution
-    * engine are required to have the same DataLayout.
-    *
-    * TODO: This is just a temporary work-around.  The correct solution is
-    * for gallivm_init_state() to create a TargetMachine and pull the
-    * DataLayout from there.  Currently, the TargetMachine used by llvmpipe
-    * is being implicitly created by the EngineBuilder in
-    * lp_build_create_jit_compiler_for_module()
-    */
-
-#if HAVE_LLVM < 0x0308
   {
      char *td_str;
      // New ones from the Module.
@@ -145,9 +132,6 @@ create_pass_manager(struct gallivm_state *gallivm)
      LLVMSetDataLayout(gallivm->module, td_str);
      free(td_str);
   }
-#else
-   LLVMSetDataLayout(gallivm->module, "");
-#endif

   if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
      /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
@@ -628,6 +612,24 @@ gallivm_compile_module(struct gallivm_state *gallivm)
   }

   if (use_mcjit) {
+      /* Setting the module's DataLayout to an empty string will cause the
+       * ExecutionEngine to copy to the DataLayout string from its target
+       * machine to the module.  As of LLVM 3.8 the module and the execution
+       * engine are required to have the same DataLayout.
+       *
+       * We must make sure we do this after running the optimization passes,
+       * because those passes need a correct datalayout string.  For example,
+       * if those optimization passes see an empty datalayout, they will assume
+       * this is a little endian target and will do optimizations that break big
+       * endian machines.
+       *
+       * TODO: This is just a temporary work-around.  The correct solution is
+       * for gallivm_init_state() to create a TargetMachine and pull the
+       * DataLayout from there.  Currently, the TargetMachine used by llvmpipe
+       * is being implicitly created by the EngineBuilder in
+       * lp_build_create_jit_compiler_for_module()
+       */
+      LLVMSetDataLayout(gallivm->module, "");
      assert(!gallivm->engine);
      if (!init_gallivm_engine(gallivm)) {
         assert(0);
--- a/src/gallium/auxiliary/renderonly/renderonly.c
+++ b/src/gallium/auxiliary/renderonly/renderonly.c
@@ -117,6 +117,7 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,
   }

   /* import dumb buffer */
+   memset(&handle, 0, sizeof(handle));
   handle.type = DRM_API_HANDLE_TYPE_FD;
   handle.handle = prime_fd;
   handle.stride = create_dumb.pitch;
--- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
@@ -179,7 +179,7 @@ etna_resource_alloc(struct pipe_screen *pscreen, unsigned layout,
                        &paddingY, &halign);
   assert(paddingX && paddingY);

-   if (templat->bind != PIPE_BUFFER) {
+   if (templat->target != PIPE_BUFFER) {
      unsigned min_paddingY = 4 * screen->specs.pixel_pipes;
      if (paddingY < min_paddingY)
         paddingY = min_paddingY;
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -521,6 +521,8 @@ NVC0LegalizePostRA::replaceZero(Instruction *i)
   for (int s = 0; i->srcExists(s); ++s) {
      if (s == 2 && i->op == OP_SUCLAMP)
         continue;
+      if (s == 1 && i->op == OP_SHLADD)
+         continue;
      ImmediateValue *imm = i->getSrc(s)->asImm();
      if (imm) {
         if (i->op == OP_SELP && s == 2) {
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -165,7 +165,8 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
            prsc->width0 == box->width &&
            prsc->height0 == box->height &&
            prsc->depth0 == box->depth &&
-            prsc->array_size == 1) {
+            prsc->array_size == 1 &&
+            rsc->bo->private) {
                usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
        }

--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -139,8 +139,8 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
      drv->vscreen = vl_drm_screen_create(drm_info->fd);
      if (!drv->vscreen)
         goto error_screen;
-      }
      break;
+   }
   default:
      FREE(drv);
      return VA_STATUS_ERROR_INVALID_DISPLAY;
--- a/src/glx/g_glxglvnddispatchfuncs.c
+++ b/src/glx/g_glxglvnddispatchfuncs.c
@@ -4,6 +4,7 @@
 */
 #include <stdlib.h>

+#include "glxclient.h"
 #include "glxglvnd.h"
 #include "glxglvnddispatchfuncs.h"
 #include "g_glxglvnddispatchindices.h"
@@ -50,6 +51,7 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = {
    __ATTRIB(GetCurrentDisplayEXT),
    // glXGetCurrentDrawable implemented by libglvnd
    // glXGetCurrentReadDrawable implemented by libglvnd
+    __ATTRIB(GetDriverConfig),
    // glXGetFBConfigAttrib implemented by libglvnd
    __ATTRIB(GetFBConfigAttribSGIX),
    __ATTRIB(GetFBConfigFromVisualSGIX),
@@ -334,6 +336,17 @@ static Display *dispatch_GetCurrentDisplayEXT(void)



+static const char *dispatch_GetDriverConfig(const char *driverName)
+{
+    /*
+     * The options are constant for a given driverName, so we do not need
+     * a context (and apps expect to be able to call this without one).
+     */
+    return glXGetDriverConfig(driverName);
+}
+
+
+
 static int dispatch_GetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config,
                                          int attribute, int *value_return)
 {
@@ -939,6 +952,7 @@ const void * const __glXDispatchFunctions[DI_LAST_INDEX + 1] = {
    __ATTRIB(DestroyGLXPbufferSGIX),
    __ATTRIB(GetContextIDEXT),
    __ATTRIB(GetCurrentDisplayEXT),
+    __ATTRIB(GetDriverConfig),
    __ATTRIB(GetFBConfigAttribSGIX),
    __ATTRIB(GetFBConfigFromVisualSGIX),
    __ATTRIB(GetMscRateOML),
--- a/src/glx/g_glxglvnddispatchindices.h
+++ b/src/glx/g_glxglvnddispatchindices.h
@@ -39,6 +39,7 @@ typedef enum __GLXdispatchIndex {
    DI_GetCurrentDisplayEXT,
    // GetCurrentDrawable implemented by libglvnd
    // GetCurrentReadDrawable implemented by libglvnd
+    DI_GetDriverConfig,
    // GetFBConfigAttrib implemented by libglvnd
    DI_GetFBConfigAttribSGIX,
    DI_GetFBConfigFromVisualSGIX,
--- a/src/intel/isl/isl_gen7.c
+++ b/src/intel/isl/isl_gen7.c
@@ -352,30 +352,12 @@ gen7_choose_valign_el(const struct isl_device *dev,
   if (isl_surf_usage_is_stencil(info->usage)) {
      /* The Ivybridge PRM states that the stencil buffer's vertical alignment
       * is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment
-       * Unit Size]. However, valign=8 is outside the set of valid values of
-       * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2
-       * (0x0) and VALIGN_4 (0x1).
-       *
-       * The PRM is generally confused about the width, height, and alignment
-       * of the stencil buffer; and this confusion appears elsewhere. For
-       * example, the following PRM text effectively converts the stencil
-       * buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM,
-       * Volume 1, Part 1, Section
-       * 6.18.4.2 Base Address and LOD Calculation]:
-       *
-       *    For separate stencil buffer, the width must be mutiplied by 2 and
-       *    height divided by 2 as follows:
-       *
-       *       w_L = 2*i*ceil(W_L/i)
-       *       h_L = 1/2*j*ceil(H_L/j)
-       *
-       * The root of the confusion is that, in W tiling, each pair of rows is
-       * interleaved into one.
-       *
-       * FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API
-       * is more polished.
+       * Unit Size]. valign=8 is outside the set of valid values of
+       * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, but that's ok because
+       * a stencil buffer will never be used directly for texturing or
+       * rendering on gen7.
       */
-      require_valign4 = true;
+      return 8;
   }

   assert(!require_valign2 || !require_valign4);
--- a/src/intel/vulkan/Makefile.am
+++ b/src/intel/vulkan/Makefile.am
@@ -93,7 +93,20 @@ VULKAN_SOURCES = \
 	$(VULKAN_GENERATED_FILES) \
 	$(VULKAN_FILES)

-VULKAN_LIB_DEPS =
+VULKAN_LIB_DEPS = \
+	libvulkan_common.la \
+	$(top_builddir)/src/vulkan/wsi/libvulkan_wsi.la \
+	$(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \
+	$(top_builddir)/src/compiler/nir/libnir.la \
+	$(top_builddir)/src/util/libmesautil.la \
+	$(top_builddir)/src/intel/common/libintel_common.la \
+	$(top_builddir)/src/intel/isl/libisl.la \
+	$(top_builddir)/src/intel/blorp/libblorp.la \
+	$(PER_GEN_LIBS) \
+	$(PTHREAD_LIBS) \
+	$(DLOPEN_LIBS) \
+	-lm
+

 if HAVE_PLATFORM_X11
 AM_CPPFLAGS += \
@@ -123,20 +136,6 @@ endif
 noinst_LTLIBRARIES += libvulkan_common.la
 libvulkan_common_la_SOURCES = $(VULKAN_SOURCES)

-VULKAN_LIB_DEPS += \
-	libvulkan_common.la \
-	$(top_builddir)/src/vulkan/wsi/libvulkan_wsi.la \
-	$(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \
-	$(top_builddir)/src/compiler/nir/libnir.la \
-	$(top_builddir)/src/util/libmesautil.la \
-	$(top_builddir)/src/intel/common/libintel_common.la \
-	$(top_builddir)/src/intel/isl/libisl.la \
-	$(top_builddir)/src/intel/blorp/libblorp.la \
-	$(PER_GEN_LIBS) \
-	$(PTHREAD_LIBS) \
-	$(DLOPEN_LIBS) \
-	-lm
-
 nodist_EXTRA_libvulkan_intel_la_SOURCES = dummy.cpp
 libvulkan_intel_la_SOURCES = $(VULKAN_GEM_FILES)

--- a/src/intel/vulkan/anv_formats.c
+++ b/src/intel/vulkan/anv_formats.c
@@ -177,8 +177,8 @@ static const struct anv_format anv_formats[] = {
   fmt(VK_FORMAT_D24_UNORM_S8_UINT,       ISL_FORMAT_R24_UNORM_X8_TYPELESS),
   fmt(VK_FORMAT_D32_SFLOAT_S8_UINT,      ISL_FORMAT_R32_FLOAT),

-   fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK,     ISL_FORMAT_DXT1_RGB),
-   fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK,      ISL_FORMAT_DXT1_RGB_SRGB),
+   swiz_fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK,     ISL_FORMAT_BC1_UNORM, RGB1),
+   swiz_fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK,      ISL_FORMAT_BC1_UNORM_SRGB, RGB1),
   fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK,    ISL_FORMAT_BC1_UNORM),
   fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK,     ISL_FORMAT_BC1_UNORM_SRGB),
   fmt(VK_FORMAT_BC2_UNORM_BLOCK,         ISL_FORMAT_BC2_UNORM),
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -877,6 +877,22 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
      DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__,
          irb->mt, irb->mt_level, irb->mt_layer, num_layers);

+      /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
+       *
+       *    "Any transition from any value in {Clear, Render, Resolve} to a
+       *    different value in {Clear, Render, Resolve} requires end of pipe
+       *    synchronization."
+       *
+       * In other words, fast clear ops are not properly synchronized with
+       * other drawing.  We need to use a PIPE_CONTROL to ensure that the
+       * contents of the previous draw hit the render target before we resolve
+       * and again afterwards to ensure that the resolve is complete before we
+       * do any more regular drawing.
+       */
+      brw_emit_pipe_control_flush(brw,
+                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                  PIPE_CONTROL_CS_STALL);
+
      struct blorp_batch batch;
      blorp_batch_init(&brw->blorp, &batch, brw, 0);
      blorp_fast_clear(&batch, &surf,
@@ -885,6 +901,10 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
                       x0, y0, x1, y1);
      blorp_batch_finish(&batch);

+      brw_emit_pipe_control_flush(brw,
+                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                  PIPE_CONTROL_CS_STALL);
+
      /* Now that the fast clear has occurred, put the buffer in
       * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
       * redundant clears.
@@ -910,17 +930,6 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
      blorp_batch_finish(&batch);
   }

-   /*
-    * Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
-    *
-    *  Any transition from any value in {Clear, Render, Resolve} to a
-    *  different value in {Clear, Render, Resolve} requires end of pipe
-    *  synchronization.
-    */
-   brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                               PIPE_CONTROL_CS_STALL);
-
   return true;
 }

@@ -982,6 +991,23 @@ brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt,
      resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
   }

+   /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
+    *
+    *    "Any transition from any value in {Clear, Render, Resolve} to a
+    *    different value in {Clear, Render, Resolve} requires end of pipe
+    *    synchronization."
+    *
+    * In other words, fast clear ops are not properly synchronized with
+    * other drawing.  We need to use a PIPE_CONTROL to ensure that the
+    * contents of the previous draw hit the render target before we resolve
+    * and again afterwards to ensure that the resolve is complete before we
+    * do any more regular drawing.
+    */
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                               PIPE_CONTROL_CS_STALL);
+
+
   struct blorp_batch batch;
   blorp_batch_init(&brw->blorp, &batch, brw, 0);
   blorp_ccs_resolve(&batch, &surf, level, layer,
@@ -989,13 +1015,7 @@ brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt,
                     resolve_op);
   blorp_batch_finish(&batch);

-   /*
-    * Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
-    *
-    *  Any transition from any value in {Clear, Render, Resolve} to a
-    *  different value in {Clear, Render, Resolve} requires end of pipe
-    *  synchronization.
-    */
+   /* See comment above */
   brw_emit_pipe_control_flush(brw,
                               PIPE_CONTROL_RENDER_TARGET_FLUSH |
                               PIPE_CONTROL_CS_STALL);
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -94,7 +94,7 @@ brw_format_for_mesa_format(mesa_format mesa_format)
      [MESA_FORMAT_L_SRGB8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB,
      [MESA_FORMAT_L8A8_SRGB] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB,
      [MESA_FORMAT_A8L8_SRGB] = 0,
-      [MESA_FORMAT_SRGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB_SRGB,
+      [MESA_FORMAT_SRGB_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB,
      [MESA_FORMAT_SRGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB,
      [MESA_FORMAT_SRGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM_SRGB,
      [MESA_FORMAT_SRGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM_SRGB,
@@ -541,17 +541,6 @@ translate_tex_format(struct brw_context *brw,
       */
      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;

-   case MESA_FORMAT_SRGB_DXT1:
-      if (brw->gen == 4 && !brw->is_g4x) {
-         /* Work around missing SRGB DXT1 support on original gen4 by just
-          * skipping SRGB decode.  It's not worth not supporting sRGB in
-          * general to prevent this.
-          */
-         WARN_ONCE(true, "Demoting sRGB DXT1 texture to non-sRGB\n");
-         mesa_format = MESA_FORMAT_RGB_DXT1;
-      }
-      return brw_format_for_mesa_format(mesa_format);
-
   case MESA_FORMAT_RGBA_ASTC_4x4:
   case MESA_FORMAT_RGBA_ASTC_5x4:
   case MESA_FORMAT_RGBA_ASTC_5x5:
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -578,16 +578,46 @@ vec4_visitor::split_uniform_registers()
   }
 }

+/* This function returns the register number where we placed the uniform */
+static int
+set_push_constant_loc(const int nr_uniforms, int *new_uniform_count,
+                      const int src, const int size, const int channel_size,
+                      int *new_loc, int *new_chan,
+                      int *new_chans_used)
+{
+   int dst;
+   /* Find the lowest place we can slot this uniform in. */
+   for (dst = 0; dst < nr_uniforms; dst++) {
+      if (ALIGN(new_chans_used[dst], channel_size) + size <= 4)
+         break;
+   }
+
+   assert(dst < nr_uniforms);
+
+   new_loc[src] = dst;
+   new_chan[src] = ALIGN(new_chans_used[dst], channel_size);
+   new_chans_used[dst] = ALIGN(new_chans_used[dst], channel_size) + size;
+
+   *new_uniform_count = MAX2(*new_uniform_count, dst + 1);
+   return dst;
+}
+
 void
 vec4_visitor::pack_uniform_registers()
 {
   uint8_t chans_used[this->uniforms];
   int new_loc[this->uniforms];
   int new_chan[this->uniforms];
+   bool is_aligned_to_dvec4[this->uniforms];
+   int new_chans_used[this->uniforms];
+   int channel_sizes[this->uniforms];

   memset(chans_used, 0, sizeof(chans_used));
   memset(new_loc, 0, sizeof(new_loc));
   memset(new_chan, 0, sizeof(new_chan));
+   memset(new_chans_used, 0, sizeof(new_chans_used));
+   memset(is_aligned_to_dvec4, 0, sizeof(is_aligned_to_dvec4));
+   memset(channel_sizes, 0, sizeof(channel_sizes));

   /* Find which uniform vectors are actually used by the program.  We
    * expect unused vector elements when we've moved array access out
@@ -617,7 +647,7 @@ vec4_visitor::pack_uniform_registers()
            continue;

         assert(type_sz(inst->src[i].type) % 4 == 0);
-         unsigned channel_size = type_sz(inst->src[i].type) / 4;
+         int channel_size = type_sz(inst->src[i].type) / 4;

         int reg = inst->src[i].nr;
         for (int c = 0; c < 4; c++) {
@@ -626,10 +656,15 @@ vec4_visitor::pack_uniform_registers()

            unsigned channel = BRW_GET_SWZ(inst->src[i].swizzle, c) + 1;
            unsigned used = MAX2(chans_used[reg], channel * channel_size);
-            if (used <= 4)
+            if (used <= 4) {
               chans_used[reg] = used;
-            else
+               channel_sizes[reg] = MAX2(channel_sizes[reg], channel_size);
+            } else {
+               is_aligned_to_dvec4[reg] = true;
+               is_aligned_to_dvec4[reg + 1] = true;
               chans_used[reg + 1] = used - 4;
+               channel_sizes[reg + 1] = MAX2(channel_sizes[reg + 1], channel_size);
+            }
         }
      }

@@ -654,42 +689,60 @@ vec4_visitor::pack_uniform_registers()

   int new_uniform_count = 0;

+   /* As the uniforms are going to be reordered, take the data from a temporary
+    * copy of the original param[].
+    */
+   gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
+                                            stage_prog_data->nr_params);
+   memcpy(param, stage_prog_data->param,
+          sizeof(gl_constant_value*) * stage_prog_data->nr_params);
+
   /* Now, figure out a packing of the live uniform vectors into our
-    * push constants.
+    * push constants. Start with dvec{3,4} because they are aligned to
+    * dvec4 size (2 vec4).
    */
   for (int src = 0; src < uniforms; src++) {
      int size = chans_used[src];

-      if (size == 0)
+      if (size == 0 || !is_aligned_to_dvec4[src])
         continue;

-      int dst;
-      /* Find the lowest place we can slot this uniform in. */
-      for (dst = 0; dst < src; dst++) {
-         if (chans_used[dst] + size <= 4)
-            break;
+      /* dvec3 are aligned to dvec4 size, apply the alignment of the size
+       * to 4 to avoid moving last component of a dvec3 to the available
+       * location at the end of a previous dvec3. These available locations
+       * could be filled by smaller variables in next loop.
+       */
+      size = ALIGN(size, 4);
+      int dst = set_push_constant_loc(uniforms, &new_uniform_count,
+                                      src, size, channel_sizes[src],
+                                      new_loc, new_chan,
+                                      new_chans_used);
+      /* Move the references to the data */
+      for (int j = 0; j < size; j++) {
+         stage_prog_data->param[dst * 4 + new_chan[src] + j] =
+            param[src * 4 + j];
      }
-
-      if (src == dst) {
-         new_loc[src] = dst;
-         new_chan[src] = 0;
-      } else {
-         new_loc[src] = dst;
-         new_chan[src] = chans_used[dst];
-
-         /* Move the references to the data */
-         for (int j = 0; j < size; j++) {
-            stage_prog_data->param[dst * 4 + new_chan[src] + j] =
-               stage_prog_data->param[src * 4 + j];
-         }
-
-         chans_used[dst] += size;
-         chans_used[src] = 0;
-      }
-
-      new_uniform_count = MAX2(new_uniform_count, dst + 1);
   }

+   /* Continue with the rest of data, which is aligned to vec4. */
+   for (int src = 0; src < uniforms; src++) {
+      int size = chans_used[src];
+
+      if (size == 0 || is_aligned_to_dvec4[src])
+         continue;
+
+      int dst = set_push_constant_loc(uniforms, &new_uniform_count,
+                                      src, size, channel_sizes[src],
+                                      new_loc, new_chan,
+                                      new_chans_used);
+      /* Move the references to the data */
+      for (int j = 0; j < size; j++) {
+         stage_prog_data->param[dst * 4 + new_chan[src] + j] =
+            param[src * 4 + j];
+      }
+   }
+
+   ralloc_free(param);
   this->uniforms = new_uniform_count;

   /* Now, update the instructions for our repacked uniforms. */
@@ -700,9 +753,9 @@ vec4_visitor::pack_uniform_registers()
         if (inst->src[i].file != UNIFORM)
            continue;

+         int chan = new_chan[src] / channel_sizes[src];
         inst->src[i].nr = new_loc[src];
-         inst->src[i].swizzle += BRW_SWIZZLE4(new_chan[src], new_chan[src],
-                                              new_chan[src], new_chan[src]);
+         inst->src[i].swizzle += BRW_SWIZZLE4(chan, chan, chan, chan);
      }
   }
 }
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -849,10 +849,36 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,

         vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
                           mem_ctx, true /* no_spills */, shader_time_index);
+
+         /* Backup 'nr_params' and 'param' as they can be modified by the
+          * the DUAL_OBJECT visitor. If it fails, we will run the fallback
+          * (DUAL_INSTANCED or SINGLE mode) and we need to restore original
+          * values.
+          */
+         const unsigned param_count = prog_data->base.base.nr_params;
+         gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
+                                                  param_count);
+         memcpy(param, prog_data->base.base.param,
+                sizeof(gl_constant_value*) * param_count);
+
         if (v.run()) {
+            /* Success! Backup is not needed */
+            ralloc_free(param);
            return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
                                              shader, &prog_data->base, v.cfg,
                                              final_assembly_size);
+         } else {
+            /* These variables could be modified by the execution of the GS
+             * visitor if it packed the uniforms in the push constant buffer.
+             * As it failed, we need restore them so we can start again with
+             * DUAL_INSTANCED or SINGLE mode.
+             *
+             * FIXME: Could more variables be modified by this execution?
+             */
+            memcpy(prog_data->base.base.param, param,
+                   sizeof(gl_constant_value*) * param_count);
+            prog_data->base.base.nr_params = param_count;
+            ralloc_free(param);
         }
      }
   }
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -804,7 +804,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       * The swizzle also works in the indirect case as the generator adds
       * the swizzle to the offset for us.
       */
-      unsigned shift = (nir_intrinsic_base(instr) % 16) / 4;
+      const int type_size = type_sz(src.type);
+      unsigned shift = (nir_intrinsic_base(instr) % 16) / type_size;
      assert(shift + instr->num_components <= 4);

      nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
@@ -812,14 +813,20 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
         /* Offsets are in bytes but they should always be multiples of 4 */
         assert(const_offset->u32[0] % 4 == 0);

-         unsigned offset = const_offset->u32[0] + shift * 4;
+         src.swizzle = brw_swizzle_for_size(instr->num_components);
+         dest.writemask = brw_writemask_for_size(instr->num_components);
+         unsigned offset = const_offset->u32[0] + shift * type_size;
         src.offset = ROUND_DOWN_TO(offset, 16);
-         shift = (offset % 16) / 4;
+         shift = (offset % 16) / type_size;
+         assert(shift + instr->num_components <= 4);
         src.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);

         emit(MOV(dest, src));
      } else {
-         src.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);
+         /* Uniform arrays are vec4 aligned, because of std140 alignment
+          * rules.
+          */
+         assert(shift == 0);

         src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);

--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -391,7 +391,9 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
   case GL_RED:
   case GL_RG:
   case GL_RGB:
-      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
+      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
+          img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
+          img->TexFormat == MESA_FORMAT_SRGB_DXT1)
         swizzles[3] = SWIZZLE_ONE;
      break;
   }
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -430,8 +430,8 @@ intel_miptree_copy(struct brw_context *brw,

      src_x /= (int)bw;
      src_y /= (int)bh;
-      src_width /= (int)bw;
-      src_height /= (int)bh;
+      src_width = DIV_ROUND_UP(src_width, (int)bw);
+      src_height = DIV_ROUND_UP(src_height, (int)bh);
   }
   src_x += src_image_x;
   src_y += src_image_y;
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -1646,17 +1646,23 @@ _mesa_make_current( struct gl_context *newCtx,
      _mesa_flush(curCtx);

   /* We used to call _glapi_check_multithread() here.  Now do it in drivers */
-   _glapi_set_context((void *) newCtx);
-   assert(_mesa_get_current_context() == newCtx);

   if (!newCtx) {
      _glapi_set_dispatch(NULL);  /* none current */
+      /* We need old ctx to correctly release Draw/ReadBuffer
+       * and avoid a surface leak in st_renderbuffer_delete.
+       * Therefore, first drop buffers then set new ctx to NULL.
+       */
      if (curCtx) {
         _mesa_reference_framebuffer(&curCtx->WinSysDrawBuffer, NULL);
         _mesa_reference_framebuffer(&curCtx->WinSysReadBuffer, NULL);
      }
+      _glapi_set_context(NULL);
+      assert(_mesa_get_current_context() == NULL);
   }
   else {
+      _glapi_set_context((void *) newCtx);
+      assert(_mesa_get_current_context() == newCtx);
      _glapi_set_dispatch(newCtx->CurrentDispatch);

      if (drawBuffer && readBuffer) {
--- a/src/vulkan/wsi/wsi_common_wayland.c
+++ b/src/vulkan/wsi/wsi_common_wayland.c
@@ -45,7 +45,11 @@
 struct wsi_wayland;

 struct wsi_wl_display {
-   struct wl_display *                          display;
+   /* The real wl_display */
+   struct wl_display *                          wl_display;
+   /* Actually a proxy wrapper around the event queue */
+   struct wl_display *                          wl_display_wrapper;
+   struct wl_event_queue *                      queue;
   struct wl_drm *                              drm;

   struct wsi_wayland *wsi_wl;
@@ -250,6 +254,10 @@ wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display)
   u_vector_finish(&display->formats);
   if (display->drm)
      wl_drm_destroy(display->drm);
+   if (display->wl_display_wrapper)
+      wl_proxy_wrapper_destroy(display->wl_display_wrapper);
+   if (display->queue)
+      wl_event_queue_destroy(display->queue);
   vk_free(wsi->alloc, display);
 }

@@ -264,40 +272,53 @@ wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display)

   memset(display, 0, sizeof(*display));

-   display->display = wl_display;
   display->wsi_wl = wsi;
+   display->wl_display = wl_display;

   if (!u_vector_init(&display->formats, sizeof(VkFormat), 8))
      goto fail;

-   struct wl_registry *registry = wl_display_get_registry(wl_display);
+   display->queue = wl_display_create_queue(wl_display);
+   if (!display->queue)
+      goto fail;
+
+   display->wl_display_wrapper = wl_proxy_create_wrapper(wl_display);
+   if (!display->wl_display_wrapper)
+      goto fail;
+
+   wl_proxy_set_queue((struct wl_proxy *) display->wl_display_wrapper,
+                      display->queue);
+
+   struct wl_registry *registry =
+      wl_display_get_registry(display->wl_display_wrapper);
   if (!registry)
-      return NULL;
+      goto fail;

   wl_registry_add_listener(registry, &registry_listener, display);

-   /* Round-rip to get the wl_drm global */
-   wl_display_roundtrip(wl_display);
+   /* Round-trip to get the wl_drm global */
+   wl_display_roundtrip_queue(display->wl_display, display->queue);

   if (!display->drm)
-      goto fail;
+      goto fail_registry;

-   /* Round-rip to get wl_drm formats and capabilities */
-   wl_display_roundtrip(wl_display);
+   /* Round-trip to get wl_drm formats and capabilities */
+   wl_display_roundtrip_queue(display->wl_display, display->queue);

   /* We need prime support */
   if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME))
-      goto fail;
+      goto fail_registry;

   /* We don't need this anymore */
   wl_registry_destroy(registry);

   return display;

-fail:
+fail_registry:
   if (registry)
      wl_registry_destroy(registry);

+fail:
   wsi_wl_display_destroy(wsi, display);
   return NULL;
 }
@@ -486,6 +507,8 @@ struct wsi_wl_swapchain {
   struct wsi_wl_display *                      display;
   struct wl_event_queue *                      queue;
   struct wl_surface *                          surface;
+   struct wl_drm *                              drm_wrapper;
+   struct wl_callback *                         frame;

   VkExtent2D                                   extent;
   VkFormat                                     vk_format;
@@ -532,7 +555,7 @@ wsi_wl_swapchain_acquire_next_image(struct wsi_swapchain *wsi_chain,
 {
   struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain;

-   int ret = wl_display_dispatch_queue_pending(chain->display->display,
+   int ret = wl_display_dispatch_queue_pending(chain->display->wl_display,
                                               chain->queue);
   /* XXX: I'm not sure if out-of-date is the right error here.  If
    * wl_display_dispatch_queue_pending fails it most likely means we got
@@ -554,7 +577,7 @@ wsi_wl_swapchain_acquire_next_image(struct wsi_swapchain *wsi_chain,
      /* This time we do a blocking dispatch because we can't go
       * anywhere until we get an event.
       */
-      int ret = wl_display_roundtrip_queue(chain->display->display,
+      int ret = wl_display_roundtrip_queue(chain->display->wl_display,
                                           chain->queue);
      if (ret < 0)
         return VK_ERROR_OUT_OF_DATE_KHR;
@@ -566,6 +589,7 @@ frame_handle_done(void *data, struct wl_callback *callback, uint32_t serial)
 {
   struct wsi_wl_swapchain *chain = data;

+   chain->frame = NULL;
   chain->fifo_ready = true;

   wl_callback_destroy(callback);
@@ -583,7 +607,7 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,

   if (chain->base.present_mode == VK_PRESENT_MODE_FIFO_KHR) {
      while (!chain->fifo_ready) {
-         int ret = wl_display_dispatch_queue(chain->display->display,
+         int ret = wl_display_dispatch_queue(chain->display->wl_display,
                                             chain->queue);
         if (ret < 0)
            return VK_ERROR_OUT_OF_DATE_KHR;
@@ -595,15 +619,14 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
   wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX);

   if (chain->base.present_mode == VK_PRESENT_MODE_FIFO_KHR) {
-      struct wl_callback *frame = wl_surface_frame(chain->surface);
-      wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue);
-      wl_callback_add_listener(frame, &frame_listener, chain);
+      chain->frame = wl_surface_frame(chain->surface);
+      wl_callback_add_listener(chain->frame, &frame_listener, chain);
      chain->fifo_ready = false;
   }

   chain->images[image_index].busy = true;
   wl_surface_commit(chain->surface);
-   wl_display_flush(chain->display->display);
+   wl_display_flush(chain->display->wl_display);

   return VK_SUCCESS;
 }
@@ -646,7 +669,7 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain,
   if (result != VK_SUCCESS)
      return result;

-   image->buffer = wl_drm_create_prime_buffer(chain->display->drm,
+   image->buffer = wl_drm_create_prime_buffer(chain->drm_wrapper,
                                              fd, /* name */
                                              chain->extent.width,
                                              chain->extent.height,
@@ -654,13 +677,11 @@ wsi_wl_image_init(struct wsi_wl_swapchain *chain,
                                              offset,
                                              row_pitch,
                                              0, 0, 0, 0 /* unused */);
-   wl_display_roundtrip(chain->display->display);
   close(fd);

   if (!image->buffer)
      goto fail_image;

-   wl_proxy_set_queue((struct wl_proxy *)image->buffer, chain->queue);
   wl_buffer_add_listener(image->buffer, &buffer_listener, image);

   return VK_SUCCESS;
@@ -679,12 +700,23 @@ wsi_wl_swapchain_destroy(struct wsi_swapchain *wsi_chain,
   struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain;

   for (uint32_t i = 0; i < chain->image_count; i++) {
-      if (chain->images[i].buffer)
+      if (chain->images[i].buffer) {
+         wl_buffer_destroy(chain->images[i].buffer);
         chain->base.image_fns->free_wsi_image(chain->base.device, pAllocator,
                                               chain->images[i].image,
                                               chain->images[i].memory);
+      }
   }

+   if (chain->frame)
+      wl_callback_destroy(chain->frame);
+   if (chain->surface)
+      wl_proxy_wrapper_destroy(chain->surface);
+   if (chain->drm_wrapper)
+      wl_proxy_wrapper_destroy(chain->drm_wrapper);
+   if (chain->queue)
+      wl_event_queue_destroy(chain->queue);
+
   vk_free(pAllocator, chain);

   return VK_SUCCESS;
@@ -713,6 +745,16 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
   if (chain == NULL)
      return VK_ERROR_OUT_OF_HOST_MEMORY;

+   /* Mark a bunch of stuff as NULL.  This way we can just call
+    * destroy_swapchain for cleanup.
+    */
+   for (uint32_t i = 0; i < num_images; i++)
+      chain->images[i].buffer = NULL;
+   chain->queue = NULL;
+   chain->surface = NULL;
+   chain->drm_wrapper = NULL;
+   chain->frame = NULL;
+
   bool alpha = pCreateInfo->compositeAlpha ==
                      VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR;

@@ -723,35 +765,39 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
   chain->base.queue_present = wsi_wl_swapchain_queue_present;
   chain->base.image_fns = image_fns;
   chain->base.present_mode = pCreateInfo->presentMode;
-   chain->surface = surface->surface;
   chain->extent = pCreateInfo->imageExtent;
   chain->vk_format = pCreateInfo->imageFormat;
   chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, alpha);

-   chain->fifo_ready = true;
-
   chain->image_count = num_images;

-   /* Mark a bunch of stuff as NULL.  This way we can just call
-    * destroy_swapchain for cleanup.
-    */
-   for (uint32_t i = 0; i < chain->image_count; i++)
-      chain->images[i].buffer = NULL;
-   chain->queue = NULL;
-
-   chain->display = wsi_wl_get_display(wsi_device,
-                                       surface->display);
+   chain->display = wsi_wl_get_display(wsi_device, surface->display);
   if (!chain->display) {
      result = VK_ERROR_INITIALIZATION_FAILED;
      goto fail;
   }

-   chain->queue = wl_display_create_queue(chain->display->display);
+   chain->queue = wl_display_create_queue(chain->display->wl_display);
   if (!chain->queue) {
      result = VK_ERROR_INITIALIZATION_FAILED;
      goto fail;
   }

+   chain->surface = wl_proxy_create_wrapper(surface->surface);
+   if (!chain->surface) {
+      result = VK_ERROR_INITIALIZATION_FAILED;
+      goto fail;
+   }
+   wl_proxy_set_queue((struct wl_proxy *) chain->surface, chain->queue);
+   chain->drm_wrapper = wl_proxy_create_wrapper(chain->display->drm);
+   if (!chain->drm_wrapper) {
+      result = VK_ERROR_INITIALIZATION_FAILED;
+      goto fail;
+   }
+   wl_proxy_set_queue((struct wl_proxy *) chain->drm_wrapper, chain->queue);
+
+   chain->fifo_ready = true;
+
   for (uint32_t i = 0; i < chain->image_count; i++) {
      result = wsi_wl_image_init(chain, &chain->images[i],
                                 pCreateInfo, pAllocator);
@@ -1 +1 @@
 .0.6
 .0.7