VERSION: bump to 20.0-rc2

gallium/swr: Fix gcc 4.8.5 compile error
Stop using C++14 feature so it can be compile on default centos7 gcc compiler. Reviewed-by: Jan Zielinski <jan.zielinski@intel.com> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3679> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3679>
2020-02-07 08:42:33 -08:00 · 2020-02-05 17:58:23 +00:00 · 2020-02-05 08:58:42 -08:00 · 2020-02-05 08:58:41 -08:00 · 2020-02-05 08:58:41 -08:00 · 2020-02-05 08:58:40 -08:00
56 changed files with 2859 additions and 224 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
--- a/2
+++ b/2
@@ -1 +1 @@
-20.0.0-devel
+20.0.0-rc2
--- a/bin/pick-ui.py
+++ b/bin/pick-ui.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+# Copyright © 2019-2020 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import asyncio
+
+import urwid
+
+from pick.ui import UI, PALETTE
+
+if __name__ == "__main__":
+    u = UI()
+    evl = urwid.AsyncioEventLoop(loop=asyncio.get_event_loop())
+    loop = urwid.MainLoop(u.render(), PALETTE, event_loop=evl)
+    u.mainloop = loop
+    loop.run()
--- a/bin/pick/init.py
+++ b/bin/pick/init.py
--- a/bin/pick/core.py
+++ b/bin/pick/core.py
@@ -0,0 +1,367 @@
+# Copyright © 2019-2020 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""Core data structures and routines for pick."""
+
+import asyncio
+import enum
+import json
+import pathlib
+import re
+import typing
+
+import attr
+
+if typing.TYPE_CHECKING:
+    from .ui import UI
+
+    import typing_extensions
+
+    class CommitDict(typing_extensions.TypedDict):
+
+        sha: str
+        description: str
+        nomintated: bool
+        nomination_type: typing.Optional[int]
+        resolution: typing.Optional[int]
+        master_sha: typing.Optional[str]
+
+IS_FIX = re.compile(r'^\s*fixes:\s*([a-f0-9]{6,40})', flags=re.MULTILINE | re.IGNORECASE)
+# FIXME: I dislike the duplication in this regex, but I couldn't get it to work otherwise
+IS_CC = re.compile(r'^\s*cc:\s*["\']?([0-9]{2}\.[0-9])?["\']?\s*["\']?([0-9]{2}\.[0-9])?["\']?\s*\<?mesa-stable',
+                   flags=re.MULTILINE | re.IGNORECASE)
+IS_REVERT = re.compile(r'This reverts commit ([0-9a-f]{40})')
+
+# XXX: hack
+SEM = asyncio.Semaphore(50)
+
+COMMIT_LOCK = asyncio.Lock()
+
+
+class PickUIException(Exception):
+    pass
+
+
+@enum.unique
+class NominationType(enum.Enum):
+
+    CC = 0
+    FIXES = 1
+    REVERT = 2
+
+
+@enum.unique
+class Resolution(enum.Enum):
+
+    UNRESOLVED = 0
+    MERGED = 1
+    DENOMINATED = 2
+    BACKPORTED = 3
+    NOTNEEDED = 4
+
+
+async def commit_state(*, amend: bool = False, message: str = 'Update') -> None:
+    """Commit the .pick_status.json file."""
+    f = pathlib.Path(__file__).parent.parent.parent / '.pick_status.json'
+    async with COMMIT_LOCK:
+        p = await asyncio.create_subprocess_exec(
+            'git', 'add', f.as_posix(),
+            stdout=asyncio.subprocess.DEVNULL,
+            stderr=asyncio.subprocess.DEVNULL,
+        )
+        v = await p.wait()
+        if v != 0:
+            return False
+
+        if amend:
+            cmd = ['--amend', '--no-edit']
+        else:
+            cmd = ['--message', f'.pick_status.json: {message}']
+        p = await asyncio.create_subprocess_exec(
+            'git', 'commit', *cmd,
+            stdout=asyncio.subprocess.DEVNULL,
+            stderr=asyncio.subprocess.DEVNULL,
+        )
+        v = await p.wait()
+        if v != 0:
+            return False
+    return True
+
+
+@attr.s(slots=True)
+class Commit:
+
+    sha: str = attr.ib()
+    description: str = attr.ib()
+    nominated: bool = attr.ib(False)
+    nomination_type: typing.Optional[NominationType] = attr.ib(None)
+    resolution: Resolution = attr.ib(Resolution.UNRESOLVED)
+    master_sha: typing.Optional[str] = attr.ib(None)
+    because_sha: typing.Optional[str] = attr.ib(None)
+
+    def to_json(self) -> 'CommitDict':
+        d: typing.Dict[str, typing.Any] = attr.asdict(self)
+        if self.nomination_type is not None:
+            d['nomination_type'] = self.nomination_type.value
+        if self.resolution is not None:
+            d['resolution'] = self.resolution.value
+        return typing.cast('CommitDict', d)
+
+    @classmethod
+    def from_json(cls, data: 'CommitDict') -> 'Commit':
+        c = cls(data['sha'], data['description'], data['nominated'], master_sha=data['master_sha'], because_sha=data['because_sha'])
+        if data['nomination_type'] is not None:
+            c.nomination_type = NominationType(data['nomination_type'])
+        if data['resolution'] is not None:
+            c.resolution = Resolution(data['resolution'])
+        return c
+
+    async def apply(self, ui: 'UI') -> typing.Tuple[bool, str]:
+        # FIXME: This isn't really enough if we fail to cherry-pick because the
+        # git tree will still be dirty
+        async with COMMIT_LOCK:
+            p = await asyncio.create_subprocess_exec(
+                'git', 'cherry-pick', '-x', self.sha,
+                stdout=asyncio.subprocess.DEVNULL,
+                stderr=asyncio.subprocess.PIPE,
+            )
+            _, err = await p.communicate()
+
+        if p.returncode != 0:
+            return (False, err)
+
+        self.resolution = Resolution.MERGED
+        await ui.feedback(f'{self.sha} ({self.description}) applied successfully')
+
+        # Append the changes to the .pickstatus.json file
+        ui.save()
+        v = await commit_state(amend=True)
+        return (v, '')
+
+    async def abort_cherry(self, ui: 'UI', err: str) -> None:
+        await ui.feedback(f'{self.sha} ({self.description}) failed to apply\n{err}')
+        async with COMMIT_LOCK:
+            p = await asyncio.create_subprocess_exec(
+                'git', 'cherry-pick', '--abort',
+                stdout=asyncio.subprocess.DEVNULL,
+                stderr=asyncio.subprocess.DEVNULL,
+            )
+            r = await p.wait()
+        await ui.feedback(f'{"Successfully" if r == 0 else "Failed to"} abort cherry-pick.')
+
+    async def denominate(self, ui: 'UI') -> bool:
+        self.resolution = Resolution.DENOMINATED
+        ui.save()
+        v = await commit_state(message=f'Mark {self.sha} as denominated')
+        assert v
+        await ui.feedback(f'{self.sha} ({self.description}) denominated successfully')
+        return True
+
+    async def backport(self, ui: 'UI') -> bool:
+        self.resolution = Resolution.BACKPORTED
+        ui.save()
+        v = await commit_state(message=f'Mark {self.sha} as backported')
+        assert v
+        await ui.feedback(f'{self.sha} ({self.description}) backported successfully')
+        return True
+
+    async def resolve(self, ui: 'UI') -> None:
+        self.resolution = Resolution.MERGED
+        ui.save()
+        v = await commit_state(amend=True)
+        assert v
+        await ui.feedback(f'{self.sha} ({self.description}) committed successfully')
+
+
+async def get_new_commits(sha: str) -> typing.List[typing.Tuple[str, str]]:
+    # TODO: config file that points to the upstream branch
+    p = await asyncio.create_subprocess_exec(
+        'git', 'log', '--pretty=oneline', f'{sha}..master',
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.DEVNULL)
+    out, _ = await p.communicate()
+    assert p.returncode == 0, f"git log didn't work: {sha}"
+    return list(split_commit_list(out.decode().strip()))
+
+
+def split_commit_list(commits: str) -> typing.Generator[typing.Tuple[str, str], None, None]:
+    if not commits:
+        return
+    for line in commits.split('\n'):
+        v = tuple(line.split(' ', 1))
+        assert len(v) == 2, 'this is really just for mypy'
+        yield typing.cast(typing.Tuple[str, str], v)
+
+
+async def is_commit_in_branch(sha: str) -> bool:
+    async with SEM:
+        p = await asyncio.create_subprocess_exec(
+            'git', 'merge-base', '--is-ancestor', sha, 'HEAD',
+            stdout=asyncio.subprocess.DEVNULL,
+            stderr=asyncio.subprocess.DEVNULL,
+        )
+        await p.wait()
+    return p.returncode == 0
+
+
+async def full_sha(sha: str) -> str:
+    async with SEM:
+        p = await asyncio.create_subprocess_exec(
+            'git', 'rev-parse', sha,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.DEVNULL,
+        )
+        out, _ = await p.communicate()
+    if p.returncode:
+        raise PickUIException(f'Invalid Sha {sha}')
+    return out.decode().strip()
+
+
+async def resolve_nomination(commit: 'Commit', version: str) -> 'Commit':
+    async with SEM:
+        p = await asyncio.create_subprocess_exec(
+            'git', 'log', '--pretty=medium', '-1', commit.sha,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.DEVNULL,
+        )
+        _out, _ = await p.communicate()
+        assert p.returncode == 0, f'git log for {commit.sha} failed'
+    out = _out.decode()
+
+    # We give presedence to fixes and cc tags over revert tags.
+    # XXX: not having the wallrus operator available makes me sad :=
+    m = IS_FIX.search(out)
+    if m:
+        # We set the nomination_type and because_sha here so that we can later
+        # check to see if this fixes another staged commit.
+        try:
+            commit.because_sha = fixed = await full_sha(m.group(1))
+        except PickUIException:
+            pass
+        else:
+            commit.nomination_type = NominationType.FIXES
+            if await is_commit_in_branch(fixed):
+                commit.nominated = True
+                return commit
+
+    m = IS_CC.search(out)
+    if m:
+        if m.groups() == (None, None) or version in m.groups():
+            commit.nominated = True
+            commit.nomination_type = NominationType.CC
+            return commit
+
+    m = IS_REVERT.search(out)
+    if m:
+        # See comment for IS_FIX path
+        try:
+            commit.because_sha = reverted = await full_sha(m.group(1))
+        except PickUIException:
+            pass
+        else:
+            commit.nomination_type = NominationType.REVERT
+            if await is_commit_in_branch(reverted):
+                commit.nominated = True
+                return commit
+
+    return commit
+
+
+async def resolve_fixes(commits: typing.List['Commit'], previous: typing.List['Commit']) -> None:
+    """Determine if any of the undecided commits fix/revert a staged commit.
+
+    The are still needed if they apply to a commit that is staged for
+    inclusion, but not yet included.
+
+    This must be done in order, because a commit 3 might fix commit 2 which
+    fixes commit 1.
+    """
+    shas: typing.Set[str] = set(c.sha for c in previous if c.nominated)
+    assert None not in shas, 'None in shas'
+
+    for commit in reversed(commits):
+        if not commit.nominated and commit.nomination_type is NominationType.FIXES:
+            commit.nominated = commit.because_sha in shas
+
+        if commit.nominated:
+            shas.add(commit.sha)
+
+    for commit in commits:
+        if (commit.nomination_type is NominationType.REVERT and
+                commit.because_sha in shas):
+            for oldc in reversed(commits):
+                if oldc.sha == commit.because_sha:
+                    # In this case a commit that hasn't yet been applied is
+                    # reverted, we don't want to apply that commit at all
+                    oldc.nominated = False
+                    oldc.resolution = Resolution.DENOMINATED
+                    commit.nominated = False
+                    commit.resolution = Resolution.DENOMINATED
+                    shas.remove(commit.because_sha)
+                    break
+
+
+async def gather_commits(version: str, previous: typing.List['Commit'],
+                         new: typing.List[typing.Tuple[str, str]], cb) -> typing.List['Commit']:
+    # We create an array of the final size up front, then we pass that array
+    # to the "inner" co-routine, which is turned into a list of tasks and
+    # collected by asyncio.gather. We do this to allow the tasks to be
+    # asyncrounously gathered, but to also ensure that the commits list remains
+    # in order.
+    commits = [None] * len(new)
+    tasks = []
+
+    async def inner(commit: 'Commit', version: str, commits: typing.List['Commit'],
+                    index: int, cb) -> None:
+        commits[index] = await resolve_nomination(commit, version)
+        cb()
+
+    for i, (sha, desc) in enumerate(new):
+        tasks.append(asyncio.ensure_future(
+            inner(Commit(sha, desc), version, commits, i, cb)))
+
+    await asyncio.gather(*tasks)
+    assert None not in commits
+
+    await resolve_fixes(commits, previous)
+
+    for commit in commits:
+        if commit.resolution is Resolution.UNRESOLVED and not commit.nominated:
+            commit.resolution = Resolution.NOTNEEDED
+
+    return commits
+
+
+def load() -> typing.List['Commit']:
+    p = pathlib.Path(__file__).parent.parent.parent / '.pick_status.json'
+    if not p.exists():
+        return []
+    with p.open('r') as f:
+        raw = json.load(f)
+        return [Commit.from_json(c) for c in raw]
+
+
+def save(commits: typing.Iterable['Commit']) -> None:
+    p = pathlib.Path(__file__).parent.parent.parent / '.pick_status.json'
+    commits = list(commits)
+    with p.open('wt') as f:
+        json.dump([c.to_json() for c in commits], f, indent=4)
+
+    asyncio.ensure_future(commit_state(message=f'Update to {commits[0].sha}'))
--- a/bin/pick/core_test.py
+++ b/bin/pick/core_test.py
@@ -0,0 +1,470 @@
+# Copyright © 2019-2020 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""Tests for pick's core data structures and routines."""
+
+from unittest import mock
+import textwrap
+import typing
+
+import attr
+import pytest
+
+from . import core
+
+
+class TestCommit:
+
+    @pytest.fixture
+    def unnominated_commit(self) -> 'core.Commit':
+        return core.Commit('abc123', 'sub: A commit', master_sha='45678')
+
+    @pytest.fixture
+    def nominated_commit(self) -> 'core.Commit':
+        return core.Commit('abc123', 'sub: A commit', True,
+                           core.NominationType.CC, core.Resolution.UNRESOLVED)
+
+    class TestToJson:
+
+        def test_not_nominated(self, unnominated_commit: 'core.Commit'):
+            c = unnominated_commit
+            v = c.to_json()
+            assert v == {'sha': 'abc123', 'description': 'sub: A commit', 'nominated': False,
+                         'nomination_type': None, 'resolution': core.Resolution.UNRESOLVED.value,
+                         'master_sha': '45678', 'because_sha': None}
+
+        def test_nominated(self, nominated_commit: 'core.Commit'):
+            c = nominated_commit
+            v = c.to_json()
+            assert v == {'sha': 'abc123',
+                         'description': 'sub: A commit',
+                         'nominated': True,
+                         'nomination_type': core.NominationType.CC.value,
+                         'resolution': core.Resolution.UNRESOLVED.value,
+                         'master_sha': None,
+                         'because_sha': None}
+
+    class TestFromJson:
+
+        def test_not_nominated(self, unnominated_commit: 'core.Commit'):
+            c = unnominated_commit
+            v = c.to_json()
+            c2 = core.Commit.from_json(v)
+            assert c == c2
+
+        def test_nominated(self, nominated_commit: 'core.Commit'):
+            c = nominated_commit
+            v = c.to_json()
+            c2 = core.Commit.from_json(v)
+            assert c == c2
+
+
+class TestRE:
+
+    """Tests for the regular expressions used to identify commits."""
+
+    class TestFixes:
+
+        def test_simple(self):
+            message = textwrap.dedent("""\
+                etnaviv: fix vertex buffer state emission for single stream GPUs
+
+                GPUs with a single supported vertex stream must use the single state
+                address to program the stream.
+
+                Fixes: 3d09bb390a39 (etnaviv: GC7000: State changes for HALTI3..5)
+                Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
+                Reviewed-by: Jonathan Marek <jonathan@marek.ca>
+            """)
+
+            m = core.IS_FIX.search(message)
+            assert m is not None
+            assert m.group(1) == '3d09bb390a39'
+
+    class TestCC:
+
+        def test_single_branch(self):
+            """Tests commit meant for a single branch, ie, 19.1"""
+            message = textwrap.dedent("""\
+                radv: fix DCC fast clear code for intensity formats
+
+                This fixes a rendering issue with DiRT 4 on GFX10. Only GFX10 was
+                affected because intensity formats are different.
+
+                Cc: 19.2 <mesa-stable@lists.freedesktop.org>
+                Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/1923
+                Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+                Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+            """)
+
+            m = core.IS_CC.search(message)
+            assert m is not None
+            assert m.group(1) == '19.2'
+
+        def test_multiple_branches(self):
+            """Tests commit with more than one branch specified"""
+            message = textwrap.dedent("""\
+                radeonsi: enable zerovram for Rocket League
+
+                Fixes corruption on game startup.
+                Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/1888
+
+                Cc: 19.1 19.2 <mesa-stable@lists.freedesktop.org>
+                Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
+            """)
+
+            m = core.IS_CC.search(message)
+            assert m is not None
+            assert m.group(1) == '19.1'
+            assert m.group(2) == '19.2'
+
+        def test_no_branch(self):
+            """Tests commit with no branch specification"""
+            message = textwrap.dedent("""\
+                anv/android: fix images created with external format support
+
+                This fixes a case where user first creates image and then later binds it
+                with memory created from AHW buffer.
+
+                Cc: <mesa-stable@lists.freedesktop.org>
+                Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+                Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
+            """)
+
+            m = core.IS_CC.search(message)
+            assert m is not None
+
+        def test_quotes(self):
+            """Tests commit with quotes around the versions"""
+            message = textwrap.dedent("""\
+                 anv: Always fill out the AUX table even if CCS is disabled
+
+                 Cc: "20.0" mesa-stable@lists.freedesktop.org
+                 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+                 Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3454>
+                 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3454>
+            """)
+
+            m = core.IS_CC.search(message)
+            assert m is not None
+            assert m.group(1) == '20.0'
+
+        def test_multiple_quotes(self):
+            """Tests commit with quotes around the versions"""
+            message = textwrap.dedent("""\
+                 anv: Always fill out the AUX table even if CCS is disabled
+
+                 Cc: "20.0" "20.1" mesa-stable@lists.freedesktop.org
+                 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+                 Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3454>
+                 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3454>
+            """)
+
+            m = core.IS_CC.search(message)
+            assert m is not None
+            assert m.group(1) == '20.0'
+            assert m.group(2) == '20.1'
+
+        def test_single_quotes(self):
+            """Tests commit with quotes around the versions"""
+            message = textwrap.dedent("""\
+                 anv: Always fill out the AUX table even if CCS is disabled
+
+                 Cc: '20.0' mesa-stable@lists.freedesktop.org
+                 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+                 Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3454>
+                 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3454>
+            """)
+
+            m = core.IS_CC.search(message)
+            assert m is not None
+            assert m.group(1) == '20.0'
+
+        def test_multiple_single_quotes(self):
+            """Tests commit with quotes around the versions"""
+            message = textwrap.dedent("""\
+                 anv: Always fill out the AUX table even if CCS is disabled
+
+                 Cc: '20.0' '20.1' mesa-stable@lists.freedesktop.org
+                 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+                 Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3454>
+                 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3454>
+            """)
+
+            m = core.IS_CC.search(message)
+            assert m is not None
+            assert m.group(1) == '20.0'
+            assert m.group(2) == '20.1'
+
+    class TestRevert:
+
+        def test_simple(self):
+            message = textwrap.dedent("""\
+                Revert "radv: do not emit PKT3_CONTEXT_CONTROL with AMDGPU 3.6.0+"
+
+                This reverts commit 2ca8629fa9b303e24783b76a7b3b0c2513e32fbd.
+
+                This was initially ported from RadeonSI, but in the meantime it has
+                been reverted because it might hang. Be conservative and re-introduce
+                this packet emission.
+
+                Unfortunately this doesn't fix anything known.
+
+                Cc: 19.2 <mesa-stable@lists.freedesktop.org>
+                Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+                Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
+            """)
+
+            m = core.IS_REVERT.search(message)
+            assert m is not None
+            assert m.group(1) == '2ca8629fa9b303e24783b76a7b3b0c2513e32fbd'
+
+
+class TestResolveNomination:
+
+    @attr.s(slots=True)
+    class FakeSubprocess:
+
+        """A fake asyncio.subprocess like classe for use with mock."""
+
+        out: typing.Optional[bytes] = attr.ib(None)
+        returncode: int = attr.ib(0)
+
+        async def mock(self, *_, **__):
+            """A dirtly little helper for mocking."""
+            return self
+
+        async def communicate(self) -> typing.Tuple[bytes, bytes]:
+            assert self.out is not None
+            return self.out, b''
+
+        async def wait(self) -> int:
+            return self.returncode
+
+    @staticmethod
+    async def return_true(*_, **__) -> bool:
+        return True
+
+    @staticmethod
+    async def return_false(*_, **__) -> bool:
+        return False
+
+    @pytest.mark.asyncio
+    async def test_fix_is_nominated(self):
+        s = self.FakeSubprocess(b'Fixes: 3d09bb390a39 (etnaviv: GC7000: State changes for HALTI3..5)')
+        c = core.Commit('abcdef1234567890', 'a commit')
+
+        with mock.patch('bin.pick.core.asyncio.create_subprocess_exec', s.mock):
+            with mock.patch('bin.pick.core.is_commit_in_branch', self.return_true):
+                await core.resolve_nomination(c, '')
+
+        assert c.nominated
+        assert c.nomination_type is core.NominationType.FIXES
+
+    @pytest.mark.asyncio
+    async def test_fix_is_not_nominated(self):
+        s = self.FakeSubprocess(b'Fixes: 3d09bb390a39 (etnaviv: GC7000: State changes for HALTI3..5)')
+        c = core.Commit('abcdef1234567890', 'a commit')
+
+        with mock.patch('bin.pick.core.asyncio.create_subprocess_exec', s.mock):
+            with mock.patch('bin.pick.core.is_commit_in_branch', self.return_false):
+                await core.resolve_nomination(c, '')
+
+        assert not c.nominated
+        assert c.nomination_type is core.NominationType.FIXES
+
+    @pytest.mark.asyncio
+    async def test_cc_is_nominated(self):
+        s = self.FakeSubprocess(b'Cc: 16.2 <mesa-stable@lists.freedesktop.org>')
+        c = core.Commit('abcdef1234567890', 'a commit')
+
+        with mock.patch('bin.pick.core.asyncio.create_subprocess_exec', s.mock):
+            await core.resolve_nomination(c, '16.2')
+
+        assert c.nominated
+        assert c.nomination_type is core.NominationType.CC
+
+    @pytest.mark.asyncio
+    async def test_cc_is_nominated2(self):
+        s = self.FakeSubprocess(b'Cc: mesa-stable@lists.freedesktop.org')
+        c = core.Commit('abcdef1234567890', 'a commit')
+
+        with mock.patch('bin.pick.core.asyncio.create_subprocess_exec', s.mock):
+            await core.resolve_nomination(c, '16.2')
+
+        assert c.nominated
+        assert c.nomination_type is core.NominationType.CC
+
+    @pytest.mark.asyncio
+    async def test_cc_is_not_nominated(self):
+        s = self.FakeSubprocess(b'Cc: 16.2 <mesa-stable@lists.freedesktop.org>')
+        c = core.Commit('abcdef1234567890', 'a commit')
+
+        with mock.patch('bin.pick.core.asyncio.create_subprocess_exec', s.mock):
+            await core.resolve_nomination(c, '16.1')
+
+        assert not c.nominated
+        assert c.nomination_type is None
+
+    @pytest.mark.asyncio
+    async def test_revert_is_nominated(self):
+        s = self.FakeSubprocess(b'This reverts commit 1234567890123456789012345678901234567890.')
+        c = core.Commit('abcdef1234567890', 'a commit')
+
+        with mock.patch('bin.pick.core.asyncio.create_subprocess_exec', s.mock):
+            with mock.patch('bin.pick.core.is_commit_in_branch', self.return_true):
+                await core.resolve_nomination(c, '')
+
+        assert c.nominated
+        assert c.nomination_type is core.NominationType.REVERT
+
+    @pytest.mark.asyncio
+    async def test_revert_is_not_nominated(self):
+        s = self.FakeSubprocess(b'This reverts commit 1234567890123456789012345678901234567890.')
+        c = core.Commit('abcdef1234567890', 'a commit')
+
+        with mock.patch('bin.pick.core.asyncio.create_subprocess_exec', s.mock):
+            with mock.patch('bin.pick.core.is_commit_in_branch', self.return_false):
+                await core.resolve_nomination(c, '')
+
+        assert not c.nominated
+        assert c.nomination_type is core.NominationType.REVERT
+
+    @pytest.mark.asyncio
+    async def test_is_fix_and_cc(self):
+        s = self.FakeSubprocess(
+            b'Fixes: 3d09bb390a39 (etnaviv: GC7000: State changes for HALTI3..5)\n'
+            b'Cc: 16.1 <mesa-stable@lists.freedesktop.org>'
+        )
+        c = core.Commit('abcdef1234567890', 'a commit')
+
+        with mock.patch('bin.pick.core.asyncio.create_subprocess_exec', s.mock):
+            with mock.patch('bin.pick.core.is_commit_in_branch', self.return_true):
+                await core.resolve_nomination(c, '16.1')
+
+        assert c.nominated
+        assert c.nomination_type is core.NominationType.FIXES
+
+    @pytest.mark.asyncio
+    async def test_is_fix_and_revert(self):
+        s = self.FakeSubprocess(
+            b'Fixes: 3d09bb390a39 (etnaviv: GC7000: State changes for HALTI3..5)\n'
+            b'This reverts commit 1234567890123456789012345678901234567890.'
+        )
+        c = core.Commit('abcdef1234567890', 'a commit')
+
+        with mock.patch('bin.pick.core.asyncio.create_subprocess_exec', s.mock):
+            with mock.patch('bin.pick.core.is_commit_in_branch', self.return_true):
+                await core.resolve_nomination(c, '16.1')
+
+        assert c.nominated
+        assert c.nomination_type is core.NominationType.FIXES
+
+    @pytest.mark.asyncio
+    async def test_is_cc_and_revert(self):
+        s = self.FakeSubprocess(
+            b'This reverts commit 1234567890123456789012345678901234567890.\n'
+            b'Cc: 16.1 <mesa-stable@lists.freedesktop.org>'
+        )
+        c = core.Commit('abcdef1234567890', 'a commit')
+
+        with mock.patch('bin.pick.core.asyncio.create_subprocess_exec', s.mock):
+            with mock.patch('bin.pick.core.is_commit_in_branch', self.return_true):
+                await core.resolve_nomination(c, '16.1')
+
+        assert c.nominated
+        assert c.nomination_type is core.NominationType.CC
+
+
+class TestResolveFixes:
+
+    @pytest.mark.asyncio
+    async def test_in_new(self):
+        """Because commit abcd is nominated, so f123 should be as well."""
+        c = [
+            core.Commit('f123', 'desc', nomination_type=core.NominationType.FIXES, because_sha='abcd'),
+            core.Commit('abcd', 'desc', True),
+        ]
+        await core.resolve_fixes(c, [])
+        assert c[1].nominated
+
+    @pytest.mark.asyncio
+    async def test_not_in_new(self):
+        """Because commit abcd is not nominated, commit f123 shouldn't be either."""
+        c = [
+            core.Commit('f123', 'desc', nomination_type=core.NominationType.FIXES, because_sha='abcd'),
+            core.Commit('abcd', 'desc'),
+        ]
+        await core.resolve_fixes(c, [])
+        assert not c[0].nominated
+
+    @pytest.mark.asyncio
+    async def test_in_previous(self):
+        """Because commit abcd is nominated, so f123 should be as well."""
+        p = [
+            core.Commit('abcd', 'desc', True),
+        ]
+        c = [
+            core.Commit('f123', 'desc', nomination_type=core.NominationType.FIXES, because_sha='abcd'),
+        ]
+        await core.resolve_fixes(c, p)
+        assert c[0].nominated
+
+    @pytest.mark.asyncio
+    async def test_not_in_previous(self):
+        """Because commit abcd is not nominated, commit f123 shouldn't be either."""
+        p = [
+            core.Commit('abcd', 'desc'),
+        ]
+        c = [
+            core.Commit('f123', 'desc', nomination_type=core.NominationType.FIXES, because_sha='abcd'),
+        ]
+        await core.resolve_fixes(c, p)
+        assert not c[0].nominated
+
+
+class TestIsCommitInBranch:
+
+    @pytest.mark.asyncio
+    async def test_no(self):
+        # Hopefully this is never true?
+        value = await core.is_commit_in_branch('ffffffffffffffffffffffffffffff')
+        assert not value
+
+    @pytest.mark.asyncio
+    async def test_yes(self):
+        # This commit is from 2000, it better always be in the branch
+        value = await core.is_commit_in_branch('88f3b89a2cb77766d2009b9868c44e03abe2dbb2')
+        assert value
+
+
+class TestFullSha:
+
+    @pytest.mark.asyncio
+    async def test_basic(self):
+        # This commit is from 2000, it better always be in the branch
+        value = await core.full_sha('88f3b89a2cb777')
+        assert value
+
+    @pytest.mark.asyncio
+    async def test_invalid(self):
+        # This commit is from 2000, it better always be in the branch
+        with pytest.raises(core.PickUIException):
+            await core.full_sha('fffffffffffffffffffffffffffffffffff')
--- a/bin/pick/ui.py
+++ b/bin/pick/ui.py
@@ -0,0 +1,259 @@
+# Copyright © 2020-2020 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""Urwid UI for pick script."""
+
+import asyncio
+import functools
+import itertools
+import textwrap
+import typing
+
+import attr
+import urwid
+
+from . import core
+
+if typing.TYPE_CHECKING:
+    WidgetType = typing.TypeVar('WidgetType', bound=urwid.Widget)
+
+PALETTE = [
+    ('a', 'black', 'light gray'),
+    ('b', 'black', 'dark red'),
+    ('bg', 'black', 'dark blue'),
+    ('reversed', 'standout', ''),
+]
+
+
+class RootWidget(urwid.Frame):
+
+    def __init__(self, *args, ui: 'UI' = None, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert ui is not None
+        self.ui = ui
+
+    def keypress(self, size: int, key: str) -> typing.Optional[str]:
+        if key == 'q':
+            raise urwid.ExitMainLoop()
+        elif key == 'u':
+            asyncio.ensure_future(self.ui.update())
+        elif key == 'a':
+            self.ui.add()
+        else:
+            return super().keypress(size, key)
+        return None
+
+
+class CommitWidget(urwid.Text):
+
+    # urwid.Text is normally not interactable, this is required to tell urwid
+    # to use our keypress method
+    _selectable = True
+
+    def __init__(self, ui: 'UI', commit: 'core.Commit'):
+        super().__init__(commit.description)
+        self.ui = ui
+        self.commit = commit
+
+    async def apply(self) -> None:
+        result, err = await self.commit.apply(self.ui)
+        if not result:
+            self.ui.chp_failed(self, err)
+        else:
+            self.ui.remove_commit(self)
+
+    async def denominate(self) -> None:
+        await self.commit.denominate(self.ui)
+        self.ui.remove_commit(self)
+
+    async def backport(self) -> None:
+        await self.commit.backport(self.ui)
+        self.ui.remove_commit(self)
+
+    def keypress(self, size: int, key: str) -> typing.Optional[str]:
+        if key == 'c':
+            asyncio.ensure_future(self.apply())
+        elif key == 'd':
+            asyncio.ensure_future(self.denominate())
+        elif key == 'b':
+            asyncio.ensure_future(self.backport())
+        else:
+            return key
+        return None
+
+
+@attr.s(slots=True)
+class UI:
+
+    """Main management object.
+
+    :previous_commits: A list of commits to master since this branch was created
+    :new_commits: Commits added to master since the last time this script was run
+    """
+
+    commit_list: typing.List['urwid.Button'] = attr.ib(factory=lambda: urwid.SimpleFocusListWalker([]), init=False)
+    feedback_box: typing.List['urwid.Text'] = attr.ib(factory=lambda: urwid.SimpleFocusListWalker([]), init=False)
+    header: 'urwid.Text' = attr.ib(factory=lambda: urwid.Text('Mesa Stable Picker', align='center'), init=False)
+    body: 'urwid.Columns' = attr.ib(attr.Factory(lambda s: s._make_body(), True), init=False)
+    footer: 'urwid.Columns' = attr.ib(attr.Factory(lambda s: s._make_footer(), True), init=False)
+    root: RootWidget = attr.ib(attr.Factory(lambda s: s._make_root(), True), init=False)
+    mainloop: urwid.MainLoop = attr.ib(None, init=False)
+
+    previous_commits: typing.List['core.Commit'] = attr.ib(factory=list, init=False)
+    new_commits: typing.List['core.Commit'] = attr.ib(factory=list, init=False)
+
+    def _make_body(self) -> 'urwid.Columns':
+        commits = urwid.ListBox(self.commit_list)
+        feedback = urwid.ListBox(self.feedback_box)
+        return urwid.Columns([commits, feedback])
+
+    def _make_footer(self) -> 'urwid.Columns':
+        body = [
+            urwid.Text('[U]pdate'),
+            urwid.Text('[Q]uit'),
+            urwid.Text('[C]herry Pick'),
+            urwid.Text('[D]enominate'),
+            urwid.Text('[B]ackport'),
+            urwid.Text('[A]pply additional patch')
+        ]
+        return urwid.Columns(body)
+
+    def _make_root(self) -> 'RootWidget':
+        return RootWidget(self.body, self.header, self.footer, 'body', ui=self)
+
+    def render(self) -> 'WidgetType':
+        asyncio.ensure_future(self.update())
+        return self.root
+
+    def load(self) -> None:
+        self.previous_commits = core.load()
+
+    async def update(self) -> None:
+        self.load()
+        with open('VERSION', 'r') as f:
+            version = f.read().strip()[:4]
+        if self.previous_commits:
+            sha = self.previous_commits[0].sha
+        else:
+            sha = f'{version}-branchpoint'
+
+        new_commits = await core.get_new_commits(sha)
+
+        if new_commits:
+            pb = urwid.ProgressBar('a', 'b', done=len(new_commits))
+            o = self.mainloop.widget
+            self.mainloop.widget = urwid.Overlay(
+                urwid.Filler(urwid.LineBox(pb)), o, 'center', ('relative', 50), 'middle', ('relative', 50))
+            self.new_commits = await core.gather_commits(
+                version, self.previous_commits, new_commits,
+                lambda: pb.set_completion(pb.current + 1))
+            self.mainloop.widget = o
+
+        for commit in reversed(list(itertools.chain(self.new_commits, self.previous_commits))):
+            if commit.nominated and commit.resolution is core.Resolution.UNRESOLVED:
+                b = urwid.AttrMap(CommitWidget(self, commit), None, focus_map='reversed')
+                self.commit_list.append(b)
+        self.save()
+
+    async def feedback(self, text: str) -> None:
+        self.feedback_box.append(urwid.AttrMap(urwid.Text(text), None))
+
+    def remove_commit(self, commit: CommitWidget) -> None:
+        for i, c in enumerate(self.commit_list):
+            if c.base_widget is commit:
+                del self.commit_list[i]
+                break
+
+    def save(self):
+        core.save(itertools.chain(self.new_commits, self.previous_commits))
+
+    def add(self) -> None:
+        """Add an additional commit which isn't nominated."""
+        o = self.mainloop.widget
+
+        def reset_cb(_) -> None:
+            self.mainloop.widget = o
+
+        async def apply_cb(edit: urwid.Edit) -> None:
+            text: str = edit.get_edit_text()
+
+            # In case the text is empty
+            if not text:
+                return
+
+            sha = await core.full_sha(text)
+            for c in reversed(list(itertools.chain(self.new_commits, self.previous_commits))):
+                if c.sha == sha:
+                    commit = c
+                    break
+            else:
+                raise RuntimeError(f"Couldn't find {sha}")
+
+            await commit.apply(self)
+
+        q = urwid.Edit("Comit sha\n")
+        ok_btn = urwid.Button('Ok')
+        urwid.connect_signal(ok_btn, 'click', lambda _: asyncio.ensure_future(apply_cb(q)))
+        urwid.connect_signal(ok_btn, 'click', reset_cb)
+
+        can_btn = urwid.Button('Cancel')
+        urwid.connect_signal(can_btn, 'click', reset_cb)
+
+        cols = urwid.Columns([ok_btn, can_btn])
+        pile = urwid.Pile([q, cols])
+        box = urwid.LineBox(pile)
+
+        self.mainloop.widget = urwid.Overlay(
+            urwid.Filler(box), o, 'center', ('relative', 50), 'middle', ('relative', 50)
+        )
+
+    def chp_failed(self, commit: 'CommitWidget', err: str) -> None:
+        o = self.mainloop.widget
+
+        def reset_cb(_) -> None:
+            self.mainloop.widget = o
+
+        t = urwid.Text(textwrap.dedent(f"""
+            Failed to apply {commit.commit.sha} {commit.commit.description} with the following error:
+
+            {err}
+
+            You can either cancel, or resolve the conflicts, commit the
+            changes and select ok."""))
+
+        can_btn = urwid.Button('Cancel')
+        urwid.connect_signal(can_btn, 'click', reset_cb)
+        urwid.connect_signal(
+            can_btn, 'click', lambda _: asyncio.ensure_future(commit.commit.abort_cherry(self, err)))
+
+        ok_btn = urwid.Button('Ok')
+        urwid.connect_signal(ok_btn, 'click', reset_cb)
+        urwid.connect_signal(
+            ok_btn, 'click', lambda _: asyncio.ensure_future(commit.commit.resolve(self)))
+        urwid.connect_signal(
+            ok_btn, 'click', lambda _: self.remove_commit(commit))
+
+        cols = urwid.Columns([ok_btn, can_btn])
+        pile = urwid.Pile([t, cols])
+        box = urwid.LineBox(pile)
+
+        self.mainloop.widget = urwid.Overlay(
+            urwid.Filler(box), o, 'center', ('relative', 50), 'middle', ('relative', 50)
+        )
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -3230,6 +3230,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
      while (channel_start < num_channels) {
         unsigned fetch_size = num_channels - channel_start;
         unsigned fetch_offset = attrib_offset + channel_start * vtx_info->chan_byte_size;
+         bool expanded = false;

         /* use MUBUF when possible to avoid possible alignment issues */
         /* TODO: we could use SDWA to unpack 8/16-bit attributes without extra instructions */
@@ -3244,6 +3245,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
            if (fetch_size == 3 && ctx->options->chip_class == GFX6) {
               /* GFX6 only supports loading vec3 with MTBUF, expand to vec4. */
               fetch_size = 4;
+               expanded = true;
            }
         }

@@ -3268,6 +3270,8 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
            opcode = use_mubuf ? aco_opcode::buffer_load_dwordx2 : aco_opcode::tbuffer_load_format_xy;
            break;
         case 3:
+            assert(ctx->options->chip_class >= GFX7 ||
+                   (!use_mubuf && ctx->options->chip_class == GFX6));
            opcode = use_mubuf ? aco_opcode::buffer_load_dwordx3 : aco_opcode::tbuffer_load_format_xyz;
            break;
         case 4:
@@ -3279,7 +3283,8 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)

         Temp fetch_dst;
         if (channel_start == 0 && fetch_size == dst.size() && !post_shuffle &&
-             (alpha_adjust == RADV_ALPHA_ADJUST_NONE || num_channels <= 3)) {
+             !expanded && (alpha_adjust == RADV_ALPHA_ADJUST_NONE ||
+                           num_channels <= 3)) {
            direct_fetch = true;
            fetch_dst = dst;
         } else {
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -1547,7 +1547,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
            instr->definitions[0].setFixed(instr->operands[3].physReg());
         } else if (instr->format == Format::MIMG &&
                    instr->definitions.size() == 1 &&
-                    instr->operands[1].regClass() == instr->definitions[0].regClass()) {
+                    instr->operands[1].regClass().type() == RegType::vgpr) {
            instr->definitions[0].setFixed(instr->operands[1].physReg());
         }

--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -244,7 +244,8 @@ void validate(Program* program, FILE * output)
            if (instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::sgpr)
               check(instr->operands[1].regClass() == s4, "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
            else if (instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::vgpr)
-               check(instr->definitions.empty() || instr->definitions[0].regClass() == instr->operands[1].regClass(),
+               check((instr->definitions.empty() || instr->definitions[0].regClass() == instr->operands[1].regClass() ||
+                     instr->opcode == aco_opcode::image_atomic_cmpswap || instr->opcode == aco_opcode::image_atomic_fcmpswap),
                     "MIMG operands[1] (VDATA) must be the same as definitions[0] for atomics", instr.get());
            check(instr->operands[2].hasRegClass() && instr->operands[2].regClass().type() == RegType::vgpr,
                  "MIMG operands[2] (VADDR) must be VGPR", instr.get());
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -996,8 +996,9 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)

 	for (unsigned i = 0; i < subpass->color_count; ++i) {
 		if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
-			sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
-			sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
+			/* We don't set the DISABLE bits, because the HW can't have holes,
+			 * so the SPI color format is set to 32-bit 1-component. */
+			sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
 			continue;
 		}

@@ -1113,10 +1114,10 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
 		}
 	}

-	for (unsigned i = subpass->color_count; i < 8; ++i) {
-		sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
-		sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
-	}
+	/* Do not set the DISABLE bits for the unused attachments, as that
+	 * breaks dual source blending in SkQP and does not seem to improve
+	 * performance. */
+
 	/* TODO: avoid redundantly setting context registers */
 	radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
 	radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
--- a/src/freedreno/perfcntrs/fdperf.c
+++ b/src/freedreno/perfcntrs/fdperf.c
@@ -123,10 +123,12 @@ readfile(const char *path, int *sz)
 		if (ret < 0) {
 			free(buf);
 			*sz = 0;
+			close(fd);
 			return NULL;
 		} else if (ret < CHUNKSIZE) {
 			n += ret;
 			*sz = n;
+			close(fd);
 			return buf;
 		} else {
 			n += CHUNKSIZE;
@@ -393,8 +395,10 @@ find_device(void)
 		err(1, "could not open /dev/mem");

 	dev.io = mmap(0, dev.size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, dev.base);
-	if (!dev.io)
+	if (!dev.io) {
+		close(fd);
 		err(1, "could not map device");
+	}
 }

 /*
--- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
@@ -464,6 +464,7 @@ etna_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc)
   struct etna_resource *rsc = etna_resource(prsc);

   assert(!_mesa_set_next_entry(rsc->pending_ctx, NULL));
+   _mesa_set_destroy(rsc->pending_ctx, NULL);

   if (rsc->bo)
      etna_bo_del(rsc->bo);
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -321,7 +321,6 @@ void
 fd_batch_flush(struct fd_batch *batch)
 {
 	struct fd_batch *tmp = NULL;
-	bool newbatch = false;

 	/* NOTE: we need to hold an extra ref across the body of flush,
 	 * since the last ref to this batch could be dropped when cleaning
@@ -329,30 +328,10 @@ fd_batch_flush(struct fd_batch *batch)
 	 */
 	fd_batch_reference(&tmp, batch);

-	if (batch == batch->ctx->batch) {
-		batch->ctx->batch = NULL;
-		newbatch = true;
-	}
-
 	batch_flush(tmp);

-	if (newbatch) {
-		struct fd_context *ctx = batch->ctx;
-		struct fd_batch *new_batch;
-
-		if (ctx->screen->reorder) {
-			/* defer allocating new batch until one is needed for rendering
-			 * to avoid unused batches for apps that create many contexts
-			 */
-			new_batch = NULL;
-		} else {
-			new_batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx, false);
-			util_copy_framebuffer_state(&new_batch->framebuffer, &batch->framebuffer);
-		}
-
-		fd_batch_reference(&batch, NULL);
-		ctx->batch = new_batch;
-		fd_context_all_dirty(ctx);
+	if (batch == batch->ctx->batch) {
+		fd_batch_reference(&batch->ctx->batch, NULL);
 	}

 	fd_batch_reference(&tmp, NULL);
--- a/src/gallium/drivers/freedreno/freedreno_batch_cache.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch_cache.c
@@ -295,9 +295,6 @@ fd_bc_alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx, bool non
 		 */
 		struct fd_batch *flush_batch = NULL;
 		for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
-			if ((cache->batches[i] == ctx->batch) ||
-					!cache->batches[i]->needs_flush)
-				continue;
 			if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno))
 				fd_batch_reference_locked(&flush_batch, cache->batches[i]);
 		}
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -385,9 +385,6 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
 		goto fail;
 	pctx->const_uploader = pctx->stream_uploader;

-	if (!ctx->screen->reorder)
-		ctx->batch = fd_bc_alloc_batch(&screen->batch_cache, ctx, false);
-
 	slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);

 	fd_draw_init(pctx);
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -256,7 +256,6 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
 		DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush,
 				framebuffer->cbufs[0], framebuffer->zsbuf);
 		fd_batch_flush(ctx->batch);
-		util_copy_framebuffer_state(&ctx->batch->framebuffer, cso);
 	}

 	ctx->dirty |= FD_DIRTY_FRAMEBUFFER;
--- a/src/gallium/drivers/iris/iris_blorp.c
+++ b/src/gallium/drivers/iris/iris_blorp.c
@@ -247,24 +247,11 @@ blorp_flush_range(UNUSED struct blorp_batch *blorp_batch,
    */
 }

-static void
-blorp_emit_urb_config(struct blorp_batch *blorp_batch,
-                      unsigned vs_entry_size,
-                      UNUSED unsigned sf_entry_size)
+static const struct gen_l3_config *
+blorp_get_l3_config(struct blorp_batch *blorp_batch)
 {
-   struct iris_context *ice = blorp_batch->blorp->driver_ctx;
   struct iris_batch *batch = blorp_batch->driver_batch;
-
-   unsigned size[4] = { vs_entry_size, 1, 1, 1 };
-
-   /* If last VS URB size is good enough for what the BLORP operation needed,
-    * then we can skip reconfiguration
-    */
-   if (ice->shaders.last_vs_entry_size >= vs_entry_size)
-      return;
-
-   genX(emit_urb_setup)(ice, batch, size, false, false);
-   ice->state.dirty |= IRIS_DIRTY_URB;
+   return batch->screen->l3_config_3d;
 }

 static void
@@ -346,7 +333,6 @@ iris_blorp_exec(struct blorp_batch *blorp_batch,
                         IRIS_DIRTY_UNCOMPILED_GS |
                         IRIS_DIRTY_UNCOMPILED_FS |
                         IRIS_DIRTY_VF |
-                         IRIS_DIRTY_URB |
                         IRIS_DIRTY_SF_CL_VIEWPORT |
                         IRIS_DIRTY_SAMPLER_STATES_VS |
                         IRIS_DIRTY_SAMPLER_STATES_TCS |
--- a/src/gallium/drivers/iris/iris_context.c
+++ b/src/gallium/drivers/iris/iris_context.c
@@ -264,8 +264,6 @@ iris_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags)
   ctx->get_device_reset_status = iris_get_device_reset_status;
   ctx->get_sample_position = iris_get_sample_position;

-   ice->shaders.urb_size = devinfo->urb.size;
-
   iris_init_context_fence_functions(ctx);
   iris_init_blit_functions(ctx);
   iris_init_clear_functions(ctx);
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -28,6 +28,7 @@
 #include "util/u_debug.h"
 #include "intel/blorp/blorp.h"
 #include "intel/dev/gen_debug.h"
+#include "intel/common/gen_l3_config.h"
 #include "intel/compiler/brw_compiler.h"
 #include "iris_batch.h"
 #include "iris_binder.h"
@@ -667,14 +668,9 @@ struct iris_context {
      struct u_upload_mgr *uploader;
      struct hash_table *cache;

-      unsigned urb_size;
-
      /** Is a GS or TES outputting points or lines? */
      bool output_topology_is_points_or_lines;

-      /* Track last VS URB entry size */
-      unsigned last_vs_entry_size;
-
      /**
       * Scratch buffers for various sizes and stages.
       *
@@ -736,6 +732,8 @@ struct iris_context {
       */
      enum isl_aux_usage draw_aux_usage[BRW_MAX_DRAW_BUFFERS];

+      enum gen_urb_deref_block_size urb_deref_block_size;
+
      /** Bitfield of whether color blending is enabled for RT[i] */
      uint8_t blend_enables;

--- a/src/gallium/drivers/iris/iris_genx_protos.h
+++ b/src/gallium/drivers/iris/iris_genx_protos.h
@@ -29,10 +29,6 @@

 /* iris_state.c */
 void genX(init_state)(struct iris_context *ice);
-void genX(emit_urb_setup)(struct iris_context *ice,
-                          struct iris_batch *batch,
-                          const unsigned size[4],
-                          bool tess_present, bool gs_present);
 void genX(emit_hashing_mode)(struct iris_context *ice,
                             struct iris_batch *batch,
                             unsigned width, unsigned height,
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -53,6 +53,7 @@
 #include "iris_screen.h"
 #include "intel/compiler/brw_compiler.h"
 #include "intel/common/gen_gem.h"
+#include "intel/common/gen_l3_config.h"
 #include "iris_monitor.h"

 static void
@@ -575,6 +576,17 @@ iris_getparam_integer(struct iris_screen *screen, int param)
   return -1;
 }

+static const struct gen_l3_config *
+iris_get_default_l3_config(const struct gen_device_info *devinfo,
+                           bool compute)
+{
+   bool wants_dc_cache = true;
+   bool has_slm = compute;
+   const struct gen_l3_weights w =
+      gen_get_default_l3_weights(devinfo, wants_dc_cache, has_slm);
+   return gen_get_l3_config(devinfo, w);
+}
+
 static void
 iris_shader_debug_log(void *data, const char *fmt, ...)
 {
@@ -673,6 +685,9 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
   screen->compiler->supports_shader_constants = true;
   screen->compiler->compact_params = false;

+   screen->l3_config_3d = iris_get_default_l3_config(&screen->devinfo, false);
+   screen->l3_config_cs = iris_get_default_l3_config(&screen->devinfo, true);
+
   iris_disk_cache_init(screen);

   slab_create_parent(&screen->transfer_pool,
--- a/src/gallium/drivers/iris/iris_screen.h
+++ b/src/gallium/drivers/iris/iris_screen.h
@@ -34,6 +34,7 @@

 struct iris_bo;
 struct iris_monitor_config;
+struct gen_l3_config;

 #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
 #define WRITE_ONCE(x, v) *(volatile __typeof__(x) *)&(x) = (v)
@@ -80,6 +81,9 @@ struct iris_screen {
   struct brw_compiler *compiler;
   struct iris_monitor_config *monitor_cfg;

+   const struct gen_l3_config *l3_config_3d;
+   const struct gen_l3_config *l3_config_cs;
+
   /**
    * A buffer containing nothing useful, for hardware workarounds that
    * require scratch writes or reads from some unimportant memory.
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -733,8 +733,8 @@ init_state_base_address(struct iris_batch *batch)
 }

 static void
-iris_emit_l3_config(struct iris_batch *batch, const struct gen_l3_config *cfg,
-                    bool has_slm, bool wants_dc_cache)
+iris_emit_l3_config(struct iris_batch *batch,
+                    const struct gen_l3_config *cfg)
 {
   uint32_t reg_val;

@@ -747,8 +747,8 @@ iris_emit_l3_config(struct iris_batch *batch, const struct gen_l3_config *cfg,
 #endif

   iris_pack_state(L3_ALLOCATION_REG, &reg_val, reg) {
-#if GEN_GEN < 12
-      reg.SLMEnable = has_slm;
+#if GEN_GEN < 11
+      reg.SLMEnable = cfg->n[GEN_L3P_SLM] > 0;
 #endif
 #if GEN_GEN == 11
      /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
@@ -766,18 +766,6 @@ iris_emit_l3_config(struct iris_batch *batch, const struct gen_l3_config *cfg,
   _iris_emit_lri(batch, L3_ALLOCATION_REG_num, reg_val);
 }

-static void
-iris_emit_default_l3_config(struct iris_batch *batch, bool compute)
-{
-   const struct gen_device_info *devinfo = &batch->screen->devinfo;
-   bool wants_dc_cache = true;
-   bool has_slm = compute;
-   const struct gen_l3_weights w =
-      gen_get_default_l3_weights(devinfo, wants_dc_cache, has_slm);
-   const struct gen_l3_config *cfg = gen_get_l3_config(devinfo, w);
-   iris_emit_l3_config(batch, cfg, has_slm, wants_dc_cache);
-}
-
 #if GEN_GEN == 9
 static void
 iris_enable_obj_preemption(struct iris_batch *batch, bool enable)
@@ -912,7 +900,7 @@ iris_init_render_context(struct iris_batch *batch)

   emit_pipeline_select(batch, _3D);

-   iris_emit_default_l3_config(batch, false);
+   iris_emit_l3_config(batch, batch->screen->l3_config_3d);

   init_state_base_address(batch);

@@ -1031,7 +1019,7 @@ iris_init_compute_context(struct iris_batch *batch)
   emit_pipeline_select(batch, GPGPU);
 #endif

-   iris_emit_default_l3_config(batch, true);
+   iris_emit_l3_config(batch, batch->screen->l3_config_cs);

   init_state_base_address(batch);

@@ -5389,9 +5377,22 @@ iris_upload_dirty_render_state(struct iris_context *ice,
         assert(size[i] != 0);
      }

-      genX(emit_urb_setup)(ice, batch, size,
-                           ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL,
-                           ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL);
+      unsigned entries[4], start[4];
+      gen_get_urb_config(&batch->screen->devinfo,
+                         batch->screen->l3_config_3d,
+                         ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL,
+                         ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL,
+                         size, entries, start,
+                         &ice->state.urb_deref_block_size);
+
+      for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+         iris_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) {
+            urb._3DCommandSubOpcode += i;
+            urb.VSURBStartingAddress     = start[i];
+            urb.VSURBEntryAllocationSize = size[i] - 1;
+            urb.VSNumberofURBEntries     = entries[i];
+         }
+      }
   }

   if (dirty & IRIS_DIRTY_BLEND_STATE) {
@@ -5764,13 +5765,17 @@ iris_upload_dirty_render_state(struct iris_context *ice,
                      ARRAY_SIZE(cso_rast->clip));
   }

-   if (dirty & IRIS_DIRTY_RASTER) {
+   if (dirty & (IRIS_DIRTY_RASTER | IRIS_DIRTY_URB)) {
      struct iris_rasterizer_state *cso = ice->state.cso_rast;
      iris_batch_emit(batch, cso->raster, sizeof(cso->raster));

      uint32_t dynamic_sf[GENX(3DSTATE_SF_length)];
      iris_pack_command(GENX(3DSTATE_SF), &dynamic_sf, sf) {
         sf.ViewportTransformEnable = !ice->state.window_space_position;
+
+#if GEN_GEN >= 12
+         sf.DerefBlockSize = ice->state.urb_deref_block_size;
+#endif
      }
      iris_emit_merge(batch, cso->sf, dynamic_sf,
                      ARRAY_SIZE(dynamic_sf));
@@ -7226,34 +7231,6 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
   }
 }

-void
-genX(emit_urb_setup)(struct iris_context *ice,
-                     struct iris_batch *batch,
-                     const unsigned size[4],
-                     bool tess_present, bool gs_present)
-{
-   const struct gen_device_info *devinfo = &batch->screen->devinfo;
-   const unsigned push_size_kB = 32;
-   unsigned entries[4];
-   unsigned start[4];
-
-   ice->shaders.last_vs_entry_size = size[MESA_SHADER_VERTEX];
-
-   gen_get_urb_config(devinfo, 1024 * push_size_kB,
-                      1024 * ice->shaders.urb_size,
-                      tess_present, gs_present,
-                      size, entries, start);
-
-   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
-      iris_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) {
-         urb._3DCommandSubOpcode += i;
-         urb.VSURBStartingAddress     = start[i];
-         urb.VSURBEntryAllocationSize = size[i] - 1;
-         urb.VSNumberofURBEntries     = entries[i];
-      }
-   }
-}
-
 #if GEN_GEN == 9
 /**
 * Preemption on Gen9 has to be enabled or disabled in various cases.
--- a/src/gallium/drivers/lima/lima_util.h
+++ b/src/gallium/drivers/lima/lima_util.h
@@ -29,7 +29,7 @@

 #define LIMA_PAGE_SIZE 4096

-FILE *lima_dump_command_stream;
+extern FILE *lima_dump_command_stream;

 bool lima_get_absolute_timeout(uint64_t *timeout);
 void lima_dump_file_open(void);
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -777,9 +777,11 @@ panfrost_batch_draw_wallpaper(struct panfrost_batch *batch)
        damage.maxx = MIN2(batch->maxx,
                           rsrc->damage.biggest_rect.x +
                           rsrc->damage.biggest_rect.width);
+        damage.maxx = MAX2(damage.maxx, damage.minx);
        damage.maxy = MIN2(batch->maxy,
                           rsrc->damage.biggest_rect.y +
                           rsrc->damage.biggest_rect.height);
+        damage.maxy = MAX2(damage.maxy, damage.miny);

        /* One damage rectangle means we can end up with at most 4 reload
         * regions:
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -79,7 +79,6 @@ constexpr bool verbose_shader = false;
 #endif

 using namespace SwrJit;
-using namespace llvm;

 static unsigned
 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
@@ -2191,7 +2190,7 @@ swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key)
      "GS");
   PFN_GS_FUNC func = builder.CompileGS(ctx, key);

-   ctx->gs->map.insert(std::make_pair(key, std::make_unique<VariantGS>(builder.gallivm, func)));
+   ctx->gs->map.insert(std::make_pair(key, std::unique_ptr<VariantGS>(new VariantGS(builder.gallivm, func))));
   return func;
 }

@@ -2204,7 +2203,7 @@ swr_compile_tcs(struct swr_context *ctx, swr_jit_tcs_key &key)
   PFN_TCS_FUNC func = builder.CompileTCS(ctx, key);

   ctx->tcs->map.insert(
-      std::make_pair(key, std::make_unique<VariantTCS>(builder.gallivm, func)));
+      std::make_pair(key, std::unique_ptr<VariantTCS>(new VariantTCS(builder.gallivm, func))));

   return func;
 }
@@ -2218,7 +2217,7 @@ swr_compile_tes(struct swr_context *ctx, swr_jit_tes_key &key)
   PFN_TES_FUNC func = builder.CompileTES(ctx, key);

   ctx->tes->map.insert(
-      std::make_pair(key, std::make_unique<VariantTES>(builder.gallivm, func)));
+      std::make_pair(key, std::unique_ptr<VariantTES>(new VariantTES(builder.gallivm, func))));

   return func;
 }
@@ -2492,7 +2491,7 @@ swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
      "VS");
   PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);

-   ctx->vs->map.insert(std::make_pair(key, std::make_unique<VariantVS>(builder.gallivm, func)));
+   ctx->vs->map.insert(std::make_pair(key, std::unique_ptr<VariantVS>(new VariantVS(builder.gallivm, func))));
   return func;
 }

@@ -2961,6 +2960,6 @@ swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
      "FS");
   PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key);

-   ctx->fs->map.insert(std::make_pair(key, std::make_unique<VariantFS>(builder.gallivm, func)));
+   ctx->fs->map.insert(std::make_pair(key, std::unique_ptr<VariantFS>(new VariantFS(builder.gallivm, func))));
   return func;
 }
--- a/src/gallium/state_trackers/clover/llvm/codegen/common.cpp
+++ b/src/gallium/state_trackers/clover/llvm/codegen/common.cpp
@@ -196,8 +196,9 @@ clover::llvm::build_module_common(const Module &mod,
                                  const clang::CompilerInstance &c) {
   module m;

-   for (const auto &name : map(std::mem_fn(&Function::getName),
+   for (const auto &llvm_name : map(std::mem_fn(&Function::getName),
                               get_kernels(mod))) {
+      const ::std::string name(llvm_name);
      if (offsets.count(name))
         m.syms.emplace_back(name, 0, offsets.at(name),
                             make_kernel_args(mod, name, c));
--- a/src/gallium/state_trackers/clover/llvm/metadata.hpp
+++ b/src/gallium/state_trackers/clover/llvm/metadata.hpp
@@ -62,7 +62,7 @@ namespace clover {
                            const std::string &name) {
         return ::llvm::cast< ::llvm::MDString>(
               detail::get_kernel_metadata_operands(f, name)[arg.getArgNo()])
-            ->getString();
+            ->getString().str();
      }

      ///
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -27,6 +27,7 @@
 #include "blorp_priv.h"
 #include "dev/gen_device_info.h"
 #include "common/gen_sample_positions.h"
+#include "common/gen_l3_config.h"
 #include "genxml/gen_macros.h"

 /**
@@ -65,10 +66,8 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
                                           uint32_t *sizes,
                                           unsigned num_vbs);

-#if GEN_GEN >= 8
-static struct blorp_address
+UNUSED static struct blorp_address
 blorp_get_workaround_page(struct blorp_batch *batch);
-#endif

 static void
 blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
@@ -92,9 +91,14 @@ static struct blorp_address
 blorp_get_surface_base_address(struct blorp_batch *batch);
 #endif

+#if GEN_GEN >= 7
+static const struct gen_l3_config *
+blorp_get_l3_config(struct blorp_batch *batch);
+# else
 static void
 blorp_emit_urb_config(struct blorp_batch *batch,
                      unsigned vs_entry_size, unsigned sf_entry_size);
+#endif

 static void
 blorp_emit_pipeline(struct blorp_batch *batch,
@@ -185,7 +189,8 @@ _blorp_combine_address(struct blorp_batch *batch, void *location,
 */
 static void
 emit_urb_config(struct blorp_batch *batch,
-                const struct blorp_params *params)
+                const struct blorp_params *params,
+                enum gen_urb_deref_block_size *deref_block_size)
 {
   /* Once vertex fetcher has written full VUE entries with complete
    * header the space requirement is as follows per vertex (in bytes):
@@ -207,7 +212,43 @@ emit_urb_config(struct blorp_batch *batch,
   const unsigned sf_entry_size =
      params->sf_prog_data ? params->sf_prog_data->urb_entry_size : 0;

+#if GEN_GEN >= 7
+   assert(sf_entry_size == 0);
+   const unsigned entry_size[4] = { vs_entry_size, 1, 1, 1 };
+
+   unsigned entries[4], start[4];
+   gen_get_urb_config(batch->blorp->compiler->devinfo,
+                      blorp_get_l3_config(batch),
+                      false, false, entry_size,
+                      entries, start, deref_block_size);
+
+#if GEN_GEN == 7 && !GEN_IS_HASWELL
+   /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
+    *
+    *    "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
+    *    needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
+    *    3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
+    *    3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL
+    *    needs to be sent before any combination of VS associated 3DSTATE."
+    */
+   blorp_emit(batch, GENX(PIPE_CONTROL), pc) {
+      pc.DepthStallEnable  = true;
+      pc.PostSyncOperation = WriteImmediateData;
+      pc.Address           = blorp_get_workaround_page(batch);
+   }
+#endif
+
+   for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
+      blorp_emit(batch, GENX(3DSTATE_URB_VS), urb) {
+         urb._3DCommandSubOpcode      += i;
+         urb.VSURBStartingAddress      = start[i];
+         urb.VSURBEntryAllocationSize  = entry_size[i] - 1;
+         urb.VSNumberofURBEntries      = entries[i];
+      }
+   }
+#else /* GEN_GEN < 7 */
   blorp_emit_urb_config(batch, vs_entry_size, sf_entry_size);
+#endif
 }

 #if GEN_GEN >= 7
@@ -646,7 +687,8 @@ blorp_emit_vs_config(struct blorp_batch *batch,

 static void
 blorp_emit_sf_config(struct blorp_batch *batch,
-                     const struct blorp_params *params)
+                     const struct blorp_params *params,
+                     enum gen_urb_deref_block_size urb_deref_block_size)
 {
   const struct brw_wm_prog_data *prog_data = params->wm_prog_data;

@@ -671,7 +713,11 @@ blorp_emit_sf_config(struct blorp_batch *batch,

 #if GEN_GEN >= 8

-   blorp_emit(batch, GENX(3DSTATE_SF), sf);
+   blorp_emit(batch, GENX(3DSTATE_SF), sf) {
+#if GEN_GEN >= 12
+      sf.DerefBlockSize = urb_deref_block_size;
+#endif
+   }

   blorp_emit(batch, GENX(3DSTATE_RASTER), raster) {
      raster.CullMode = CULLMODE_NONE;
@@ -1212,7 +1258,8 @@ blorp_emit_pipeline(struct blorp_batch *batch,
   uint32_t color_calc_state_offset;
   uint32_t depth_stencil_state_offset;

-   emit_urb_config(batch, params);
+   enum gen_urb_deref_block_size urb_deref_block_size;
+   emit_urb_config(batch, params, &urb_deref_block_size);

   if (params->wm_prog_data) {
      blend_state_offset = blorp_emit_blend_state(batch, params);
@@ -1293,7 +1340,7 @@ blorp_emit_pipeline(struct blorp_batch *batch,
      clip.PerspectiveDivideDisable = true;
   }

-   blorp_emit_sf_config(batch, params);
+   blorp_emit_sf_config(batch, params, urb_deref_block_size);
   blorp_emit_ps_config(batch, params);

   blorp_emit_cc_viewport(batch);
--- a/src/intel/common/gen_aux_map.c
+++ b/src/intel/common/gen_aux_map.c
@@ -370,7 +370,6 @@ get_bpp_encoding(uint16_t bpp)
 }

 #define GEN_AUX_MAP_ENTRY_Y_TILED_BIT  (0x1ull << 52)
-#define GEN_AUX_MAP_ENTRY_VALID_BIT    0x1ull

 uint64_t
 gen_aux_map_format_bits_for_isl_surf(const struct isl_surf *isl_surf)
--- a/src/intel/common/gen_aux_map.h
+++ b/src/intel/common/gen_aux_map.h
@@ -45,6 +45,7 @@ struct gen_device_info;

 #define GEN_AUX_MAP_ADDRESS_MASK       0x0000ffffffffff00ull
 #define GEN_AUX_MAP_FORMAT_BITS_MASK   0xfff0000000000000ull
+#define GEN_AUX_MAP_ENTRY_VALID_BIT    0x1ull
 #define GEN_AUX_MAP_GEN12_CCS_SCALE    256
 #define GEN_AUX_MAP_MAIN_PAGE_SIZE     (64 * 1024)
 #define GEN_AUX_MAP_AUX_PAGE_SIZE \
--- a/src/intel/common/gen_l3_config.h
+++ b/src/intel/common/gen_l3_config.h
@@ -92,10 +92,17 @@ gen_get_l3_config_urb_size(const struct gen_device_info *devinfo,

 void gen_dump_l3_config(const struct gen_l3_config *cfg, FILE *fp);

+enum gen_urb_deref_block_size {
+   GEN_URB_DEREF_BLOCK_SIZE_32         = 0,
+   GEN_URB_DEREF_BLOCK_SIZE_PER_POLY   = 1,
+   GEN_URB_DEREF_BLOCK_SIZE_8          = 2,
+};
+
 void gen_get_urb_config(const struct gen_device_info *devinfo,
-                        unsigned push_constant_bytes, unsigned urb_size_bytes,
+                        const struct gen_l3_config *l3_cfg,
                        bool tess_present, bool gs_present,
                        const unsigned entry_size[4],
-                        unsigned entries[4], unsigned start[4]);
+                        unsigned entries[4], unsigned start[4],
+                        enum gen_urb_deref_block_size *deref_block_size);

 #endif /* GEN_L3_CONFIG_H */
--- a/src/intel/common/gen_urb_config.c
+++ b/src/intel/common/gen_urb_config.c
@@ -59,19 +59,24 @@
 */
 void
 gen_get_urb_config(const struct gen_device_info *devinfo,
-                   unsigned push_constant_bytes, unsigned urb_size_bytes,
+                   const struct gen_l3_config *l3_cfg,
                   bool tess_present, bool gs_present,
                   const unsigned entry_size[4],
-                   unsigned entries[4], unsigned start[4])
+                   unsigned entries[4], unsigned start[4],
+                   enum gen_urb_deref_block_size *deref_block_size)
 {
+   const unsigned urb_size_kB = gen_get_l3_config_urb_size(devinfo, l3_cfg);
+   const unsigned push_constant_kB =
+      (devinfo->gen >= 8 || (devinfo->is_haswell && devinfo->gt == 3)) ? 32 : 16;
+
   const bool active[4] = { true, tess_present, tess_present, gs_present };

   /* URB allocations must be done in 8k chunks. */
-   const unsigned chunk_size_bytes = 8192;
+   const unsigned chunk_size_kB = 8;
+   const unsigned chunk_size_bytes = chunk_size_kB * 1024;

-   const unsigned push_constant_chunks =
-      push_constant_bytes / chunk_size_bytes;
-   const unsigned urb_chunks = urb_size_bytes / chunk_size_bytes;
+   const unsigned push_constant_chunks = push_constant_kB / chunk_size_kB;
+   const unsigned urb_chunks = urb_size_kB / chunk_size_kB;

   /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
    *
@@ -205,4 +210,43 @@ gen_get_urb_config(const struct gen_device_info *devinfo,
         start[i] = 0;
      }
   }
+
+   if (deref_block_size) {
+      if (devinfo->gen >= 12) {
+         /* From the Gen12 BSpec:
+          *
+          *    "Deref Block size depends on the last enabled shader and number
+          *    of handles programmed for that shader
+          *
+          *       1) For GS last shader enabled cases, the deref block is
+          *          always set to a per poly(within hardware)
+          *
+          *    If the last enabled shader is VS or DS.
+          *
+          *       1) If DS is last enabled shader then if the number of DS
+          *          handles is less than 324, need to set per poly deref.
+          *
+          *       2) If VS is last enabled shader then if the number of VS
+          *          handles is less than 192, need to set per poly deref"
+          *
+          * The default is 32 so we assume that's the right choice if we're
+          * not in one of the explicit cases listed above.
+          */
+         if (gs_present) {
+            *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_PER_POLY;
+         } else if (tess_present) {
+            if (entries[MESA_SHADER_TESS_EVAL] < 324)
+               *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_PER_POLY;
+            else
+               *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_32;
+         } else {
+            if (entries[MESA_SHADER_VERTEX] < 192)
+               *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_PER_POLY;
+            else
+               *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_32;
+         }
+      } else {
+         *deref_block_size = 0;
+      }
+   }
 }
--- a/src/intel/compiler/brw_fs_combine_constants.cpp
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp
@@ -56,7 +56,14 @@ could_coissue(const struct gen_device_info *devinfo, const fs_inst *inst)
   case BRW_OPCODE_CMP:
   case BRW_OPCODE_ADD:
   case BRW_OPCODE_MUL:
-      return true;
+      /* Only float instructions can coissue.  We don't have a great
+       * understanding of whether or not something like float(int(a) + int(b))
+       * would be considered float (based on the destination type) or integer
+       * (based on the source types), so we take the conservative choice of
+       * only promoting when both destination and source are float.
+       */
+      return inst->dst.type == BRW_REGISTER_TYPE_F &&
+             inst->src[0].type == BRW_REGISTER_TYPE_F;
   default:
      return false;
   }
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -452,8 +452,17 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
       * In the end, while base_offset is nice to look at in the generated
       * code, using it saves us 0 instructions and would require quite a bit
       * of case-by-case work.  It's just not worth it.
+       *
+       * There's some sort of HW bug on Gen12 which causes issues if we write
+       * to the address register in control-flow.  Since we only ever touch
+       * the address register from the generator, we can easily enough work
+       * around it by setting NoMask on the add.
       */
+      brw_push_insn_state(p);
+      if (devinfo->gen == 12)
+         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
      brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
+      brw_pop_insn_state(p);
      brw_set_default_swsb(p, tgl_swsb_regdist(1));

      if (type_sz(reg.type) > 4 &&
--- a/src/intel/genxml/gen11.xml
+++ b/src/intel/genxml/gen11.xml
@@ -6997,7 +6997,6 @@
  </register>

  <register name="L3CNTLREG" length="1" num="0x7034">
-    <field name="SLM Enable" start="0" end="0" type="uint"/>
    <field name="URB Allocation" start="1" end="7" type="uint"/>
    <field name="Error Detection Behavior Control" start="9" end="9" type="bool"/>
    <field name="Use Full Ways" start="10" end="10" type="bool"/>
--- a/src/intel/genxml/gen12.xml
+++ b/src/intel/genxml/gen12.xml
@@ -2528,6 +2528,11 @@
      <value name="2.0 pixels" value="2"/>
      <value name="4.0 pixels" value="3"/>
    </field>
+    <field name="Deref Block Size" start="93" end="94" type="uint">
+      <value name="Block Deref Size 32" value="0"/>
+      <value name="Per Poly Deref Mode" value="1"/>
+      <value name="Block Deref Size 8" value="2"/>
+    </field>
    <field name="Point Width" start="96" end="106" type="u8.3"/>
    <field name="Point Width Source" start="107" end="107" type="uint">
      <value name="Vertex" value="0"/>
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -535,6 +535,14 @@ copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
      bool dst_has_shadow = false;
      struct blorp_surf dst_shadow_surf;
      if (&image == dst) {
+         /* In this case, the source is the buffer and, since blorp takes its
+          * copy dimensions in terms of the source format, we have to use the
+          * scaled down version for compressed textures because the source
+          * format is an RGB format.
+          */
+         extent.width = buffer_extent.width;
+         extent.height = buffer_extent.height;
+
         anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
                                           aspect, dst->surf.aux_usage,
                                           dst->level,
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -84,7 +84,8 @@ void
 genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
                     const struct gen_l3_config *l3_config,
                     VkShaderStageFlags active_stages,
-                     const unsigned entry_size[4]);
+                     const unsigned entry_size[4],
+                     enum gen_urb_deref_block_size *deref_block_size);

 void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
                                struct anv_address dst, struct anv_address src,
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -46,6 +46,7 @@
 #include "common/gen_clflush.h"
 #include "common/gen_decoder.h"
 #include "common/gen_gem.h"
+#include "common/gen_l3_config.h"
 #include "dev/gen_device_info.h"
 #include "blorp/blorp.h"
 #include "compiler/brw_compiler.h"
@@ -76,7 +77,6 @@ struct anv_image_view;
 struct anv_instance;

 struct gen_aux_map_context;
-struct gen_l3_config;
 struct gen_perf_config;

 #include <vulkan/vulkan.h>
@@ -2272,6 +2272,12 @@ enum anv_pipe_bits {
    * done by writing the AUX-TT register.
    */
   ANV_PIPE_AUX_TABLE_INVALIDATE_BIT         = (1 << 23),
+
+   /* This bit does not exist directly in PIPE_CONTROL. It means that a
+    * PIPE_CONTROL with a post-sync operation will follow. This is used to
+    * implement a workaround for Gen9.
+    */
+   ANV_PIPE_POST_SYNC_BIT                    = (1 << 24),
 };

 #define ANV_PIPE_FLUSH_BITS ( \
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -178,8 +178,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
                                                       (1 << num_vbs) - 1);
 }

-#if GEN_GEN >= 8
-static struct blorp_address
+UNUSED static struct blorp_address
 blorp_get_workaround_page(struct blorp_batch *batch)
 {
   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
@@ -188,7 +187,6 @@ blorp_get_workaround_page(struct blorp_batch *batch)
      .buffer = cmd_buffer->device->workaround_bo,
   };
 }
-#endif

 static void
 blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
@@ -197,22 +195,11 @@ blorp_flush_range(struct blorp_batch *batch, void *start, size_t size)
    */
 }

-static void
-blorp_emit_urb_config(struct blorp_batch *batch,
-                      unsigned vs_entry_size, unsigned sf_entry_size)
+static const struct gen_l3_config *
+blorp_get_l3_config(struct blorp_batch *batch)
 {
-   struct anv_device *device = batch->blorp->driver_ctx;
   struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
-
-   assert(sf_entry_size == 0);
-
-   const unsigned entry_size[4] = { vs_entry_size, 1, 1, 1 };
-
-   genX(emit_urb_setup)(device, &cmd_buffer->batch,
-                        cmd_buffer->state.current_l3_config,
-                        VK_SHADER_STAGE_VERTEX_BIT |
-                        VK_SHADER_STAGE_FRAGMENT_BIT,
-                        entry_size);
+   return cmd_buffer->state.current_l3_config;
 }

 void
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1009,7 +1009,6 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
                      uint32_t base_layer, uint32_t layer_count)
 {
   uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
-   assert(isl_aux_usage_has_ccs(image->planes[plane].aux_usage));

   uint64_t base_address =
      anv_address_physical(image->planes[plane].address);
@@ -1025,6 +1024,9 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);

+   struct gen_mi_builder b;
+   gen_mi_builder_init(&b, &cmd_buffer->batch);
+
   for (uint32_t a = 0; a < layer_count; a++) {
      const uint32_t layer = base_layer + a;

@@ -1069,24 +1071,25 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
           offset < end_offset_B; offset += 64 * 1024) {
         uint64_t address = base_address + offset;

-         uint64_t aux_entry_address, *aux_entry_map;
+         uint64_t aux_entry_addr64, *aux_entry_map;
         aux_entry_map = gen_aux_map_get_entry(cmd_buffer->device->aux_map_ctx,
-                                               address, &aux_entry_address);
+                                               address, &aux_entry_addr64);
+
+         assert(cmd_buffer->device->physical->use_softpin);
+         struct anv_address aux_entry_address = {
+            .bo = NULL,
+            .offset = aux_entry_addr64,
+         };

         const uint64_t old_aux_entry = READ_ONCE(*aux_entry_map);
         uint64_t new_aux_entry =
-            (old_aux_entry & ~GEN_AUX_MAP_FORMAT_BITS_MASK) | format_bits;
+            (old_aux_entry & GEN_AUX_MAP_ADDRESS_MASK) | format_bits;

-         /* We're only going to update the top 32 bits */
-         assert((uint32_t)old_aux_entry == (uint32_t)new_aux_entry);
+         if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage))
+            new_aux_entry |= GEN_AUX_MAP_ENTRY_VALID_BIT;

-         anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
-            sdi.Address = (struct anv_address) {
-               .bo = NULL,
-               .offset = aux_entry_address + 4,
-            };
-            sdi.ImmediateData = new_aux_entry >> 32;
-         }
+         gen_mi_store(&b, gen_mi_mem64(aux_entry_address),
+                          gen_mi_imm(new_aux_entry));
      }
   }

@@ -1165,8 +1168,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
   if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
       initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
 #if GEN_GEN == 12
-      if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage) &&
-          device->physical->has_implicit_ccs && devinfo->has_aux_map) {
+      if (device->physical->has_implicit_ccs && devinfo->has_aux_map) {
         anv_image_init_aux_tt(cmd_buffer, image, aspect,
                               base_level, level_count,
                               base_layer, layer_count);
@@ -1889,7 +1891,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,

   uint32_t l3cr;
   anv_pack_struct(&l3cr, L3_ALLOCATION_REG,
-#if GEN_GEN < 12
+#if GEN_GEN < 11
                   .SLMEnable = has_slm,
 #endif
 #if GEN_GEN == 11
@@ -2048,6 +2050,21 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
             sizeof(cmd_buffer->state.gfx.ib_dirty_range));
   }

+   /* Project: SKL / Argument: LRI Post Sync Operation [23]
+    *
+    * "PIPECONTROL command with “Command Streamer Stall Enable” must be
+    *  programmed prior to programming a PIPECONTROL command with "LRI
+    *  Post Sync Operation" in GPGPU mode of operation (i.e when
+    *  PIPELINE_SELECT command is set to GPGPU mode of operation)."
+    *
+    * The same text exists a few rows below for Post Sync Op.
+    */
+   if (bits & ANV_PIPE_POST_SYNC_BIT) {
+      if (GEN_GEN == 9 && cmd_buffer->state.current_pipeline == GPGPU)
+         bits |= ANV_PIPE_CS_STALL_BIT;
+      bits &= ~ANV_PIPE_POST_SYNC_BIT;
+   }
+
   if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT)) {
      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
 #if GEN_GEN >= 12
@@ -4617,6 +4634,9 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
   isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);

   if (GEN_GEN >= 12) {
+      cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
+      genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
      /* GEN:BUG:1408224581
       *
       * Workaround: Gen12LP Astep only An additional pipe control with
@@ -5568,6 +5588,9 @@ void genX(CmdSetEvent)(
   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
   ANV_FROM_HANDLE(anv_event, event, _event);

+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
      if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
         pc.StallAtPixelScoreboard = true;
@@ -5592,6 +5615,9 @@ void genX(CmdResetEvent)(
   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
   ANV_FROM_HANDLE(anv_event, event, _event);

+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
      if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
         pc.StallAtPixelScoreboard = true;
--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -147,7 +147,7 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,

   genX(emit_urb_setup)(cmd_buffer->device, &cmd_buffer->batch,
                        cmd_buffer->state.current_l3_config,
-                        VK_SHADER_STAGE_VERTEX_BIT, entry_size);
+                        VK_SHADER_STAGE_VERTEX_BIT, entry_size, NULL);

   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
 #if GEN_GEN < 12
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -259,25 +259,18 @@ void
 genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
                     const struct gen_l3_config *l3_config,
                     VkShaderStageFlags active_stages,
-                     const unsigned entry_size[4])
+                     const unsigned entry_size[4],
+                     enum gen_urb_deref_block_size *deref_block_size)
 {
   const struct gen_device_info *devinfo = &device->info;
-#if GEN_IS_HASWELL
-   const unsigned push_constant_kb = devinfo->gt == 3 ? 32 : 16;
-#else
-   const unsigned push_constant_kb = GEN_GEN >= 8 ? 32 : 16;
-#endif
-
-   const unsigned urb_size_kb = gen_get_l3_config_urb_size(devinfo, l3_config);

   unsigned entries[4];
   unsigned start[4];
-   gen_get_urb_config(devinfo,
-                      1024 * push_constant_kb, 1024 * urb_size_kb,
+   gen_get_urb_config(devinfo, l3_config,
                      active_stages &
                         VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
                      active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
-                      entry_size, entries, start);
+                      entry_size, entries, start, deref_block_size);

 #if GEN_GEN == 7 && !GEN_IS_HASWELL
   /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
@@ -306,7 +299,8 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
 }

 static void
-emit_urb_setup(struct anv_pipeline *pipeline)
+emit_urb_setup(struct anv_pipeline *pipeline,
+               enum gen_urb_deref_block_size *deref_block_size)
 {
   unsigned entry_size[4];
   for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
@@ -319,7 +313,8 @@ emit_urb_setup(struct anv_pipeline *pipeline)

   genX(emit_urb_setup)(pipeline->device, &pipeline->batch,
                        pipeline->urb.l3_config,
-                        pipeline->active_stages, entry_size);
+                        pipeline->active_stages, entry_size,
+                        deref_block_size);
 }

 static void
@@ -573,7 +568,8 @@ emit_rs_state(struct anv_pipeline *pipeline,
              const VkPipelineMultisampleStateCreateInfo *ms_info,
              const VkPipelineRasterizationLineStateCreateInfoEXT *line_info,
              const struct anv_render_pass *pass,
-              const struct anv_subpass *subpass)
+              const struct anv_subpass *subpass,
+              enum gen_urb_deref_block_size urb_deref_block_size)
 {
   struct GENX(3DSTATE_SF) sf = {
      GENX(3DSTATE_SF_header),
@@ -591,6 +587,10 @@ emit_rs_state(struct anv_pipeline *pipeline,
   sf.LineStippleEnable = line_info && line_info->stippledLineEnable;
 #endif

+#if GEN_GEN >= 12
+   sf.DerefBlockSize = urb_deref_block_size;
+#endif
+
   const struct brw_vue_prog_data *last_vue_prog_data =
      anv_pipeline_get_last_vue_prog_data(pipeline);

@@ -2148,19 +2148,21 @@ genX(graphics_pipeline_create)(
      vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
                           PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);

+   enum gen_urb_deref_block_size urb_deref_block_size;
+   emit_urb_setup(pipeline, &urb_deref_block_size);
+
   assert(pCreateInfo->pVertexInputState);
   emit_vertex_input(pipeline, pCreateInfo->pVertexInputState);
   assert(pCreateInfo->pRasterizationState);
   emit_rs_state(pipeline, pCreateInfo->pInputAssemblyState,
                           pCreateInfo->pRasterizationState,
-                           ms_info, line_info, pass, subpass);
+                           ms_info, line_info, pass, subpass,
+                           urb_deref_block_size);
   emit_ms_state(pipeline, ms_info);
   emit_ds_state(pipeline, ds_info, pass, subpass);
   emit_cb_state(pipeline, cb_info, ms_info);
   compute_kill_pixel(pipeline, ms_info, subpass);

-   emit_urb_setup(pipeline);
-
   emit_3dstate_clip(pipeline,
                     pCreateInfo->pInputAssemblyState,
                     vp_info,
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -424,6 +424,9 @@ static void
 emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
                    struct anv_address addr)
 {
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
      pc.DestinationAddressType  = DAT_PPGTT;
      pc.PostSyncOperation       = WritePSDepthCount;
@@ -448,6 +451,9 @@ emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer,
                           struct anv_address addr,
                           bool available)
 {
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
      pc.DestinationAddressType  = DAT_PPGTT;
      pc.PostSyncOperation       = WriteImmediateData;
@@ -832,6 +838,9 @@ void genX(CmdWriteTimestamp)(

   default:
      /* Everything else is bottom-of-pipe */
+      cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
+      genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
         pc.DestinationAddressType  = DAT_PPGTT;
         pc.PostSyncOperation       = WriteTimestamp;
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1523,8 +1523,10 @@ intel_prepare_render(struct brw_context *brw)
    * that will happen next will probably dirty the front buffer.  So
    * mark it as dirty here.
    */
-   if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
+   if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer) &&
+       ctx->DrawBuffer != _mesa_get_incomplete_framebuffer()) {
      brw->front_buffer_dirty = true;
+   }

   if (brw->is_shared_buffer_bound) {
      /* Subsequent rendering will probably dirty the shared buffer. */
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1504,6 +1504,9 @@ gen6_set_sample_maps(struct gl_context *ctx);
 /* gen8_multisample_state.c */
 void gen8_emit_3dstate_sample_pattern(struct brw_context *brw);

+/* gen7_l3_state.c */
+void brw_emit_l3_state(struct brw_context *brw);
+
 /* gen7_urb.c */
 void
 gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
--- a/src/mesa/drivers/dri/i965/gen4_blorp_exec.h
+++ b/src/mesa/drivers/dri/i965/gen4_blorp_exec.h
@@ -178,7 +178,7 @@ blorp_emit_pipeline(struct blorp_batch *batch,
   assert(batch->blorp->driver_ctx == batch->driver_batch);
   struct brw_context *brw = batch->driver_batch;

-   emit_urb_config(batch, params);
+   emit_urb_config(batch, params, NULL);

   blorp_emit(batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) {
      pp.PointertoVSState = blorp_emit_vs_state(batch);
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -118,7 +118,8 @@ setup_l3_config(struct brw_context *brw, const struct gen_l3_config *cfg)
   if (devinfo->gen >= 8) {
      assert(!cfg->n[GEN_L3P_IS] && !cfg->n[GEN_L3P_C] && !cfg->n[GEN_L3P_T]);

-      const unsigned imm_data = ((has_slm ? GEN8_L3CNTLREG_SLM_ENABLE : 0) |
+      const unsigned imm_data = (
+         (devinfo->gen < 11 && has_slm ? GEN8_L3CNTLREG_SLM_ENABLE : 0) |
         (devinfo->gen == 11 ? GEN11_L3CNTLREG_USE_FULL_WAYS : 0) |
         SET_FIELD(cfg->n[GEN_L3P_URB], GEN8_L3CNTLREG_URB_ALLOC) |
         SET_FIELD(cfg->n[GEN_L3P_RO], GEN8_L3CNTLREG_RO_ALLOC) |
@@ -211,8 +212,8 @@ update_urb_size(struct brw_context *brw, const struct gen_l3_config *cfg)
   }
 }

-static void
-emit_l3_state(struct brw_context *brw)
+void
+brw_emit_l3_state(struct brw_context *brw)
 {
   const struct gen_l3_weights w = get_pipeline_state_l3_weights(brw);
   const float dw = gen_diff_l3_weights(w, gen_get_l3_config_weights(brw->l3.config));
@@ -260,7 +261,7 @@ const struct brw_tracked_state gen7_l3_state = {
             BRW_NEW_TES_PROG_DATA |
             BRW_NEW_VS_PROG_DATA,
   },
-   .emit = emit_l3_state
+   .emit = brw_emit_l3_state
 };

 /**
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -208,8 +208,6 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size,
                bool gs_present, bool tess_present)
 {
   const struct gen_device_info *devinfo = &brw->screen->devinfo;
-   const int push_size_kB =
-      (devinfo->gen >= 8 || (devinfo->is_haswell && devinfo->gt == 3)) ? 32 : 16;

   /* BRW_NEW_{VS,TCS,TES,GS}_PROG_DATA */
   struct brw_vue_prog_data *prog_data[4] = {
@@ -249,8 +247,9 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size,

   unsigned entries[4];
   unsigned start[4];
-   gen_get_urb_config(devinfo, 1024 * push_size_kB, 1024 * brw->urb.size,
-                      tess_present, gs_present, entry_size, entries, start);
+   gen_get_urb_config(devinfo, brw->l3.config,
+                      tess_present, gs_present, entry_size,
+                      entries, start, NULL);

   if (devinfo->gen == 7 && !devinfo->is_haswell && !devinfo->is_baytrail)
      gen7_emit_vs_workaround_flush(brw);
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -231,8 +231,7 @@ blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *batch,
 #endif
 }

-#if GEN_GEN >= 8
-static struct blorp_address
+UNUSED static struct blorp_address
 blorp_get_workaround_page(struct blorp_batch *batch)
 {
   assert(batch->blorp->driver_ctx == batch->driver_batch);
@@ -242,7 +241,6 @@ blorp_get_workaround_page(struct blorp_batch *batch)
      .buffer = brw->workaround_bo,
   };
 }
-#endif

 static void
 blorp_flush_range(UNUSED struct blorp_batch *batch, UNUSED void *start,
@@ -253,6 +251,16 @@ blorp_flush_range(UNUSED struct blorp_batch *batch, UNUSED void *start,
    */
 }

+#if GEN_GEN >= 7
+static const struct gen_l3_config *
+blorp_get_l3_config(struct blorp_batch *batch)
+{
+   assert(batch->blorp->driver_ctx == batch->driver_batch);
+   struct brw_context *brw = batch->driver_batch;
+
+   return brw->l3.config;
+}
+#else /* GEN_GEN < 7 */
 static void
 blorp_emit_urb_config(struct blorp_batch *batch,
                      unsigned vs_entry_size,
@@ -261,18 +269,14 @@ blorp_emit_urb_config(struct blorp_batch *batch,
   assert(batch->blorp->driver_ctx == batch->driver_batch);
   struct brw_context *brw = batch->driver_batch;

-#if GEN_GEN >= 7
-   if (brw->urb.vsize >= vs_entry_size)
-      return;
-
-   gen7_upload_urb(brw, vs_entry_size, false, false);
-#elif GEN_GEN == 6
+#if GEN_GEN == 6
   gen6_upload_urb(brw, vs_entry_size, false, 0);
 #else
   /* We calculate it now and emit later. */
   brw_calculate_urb_fence(brw, 0, vs_entry_size, sf_entry_size);
 #endif
 }
+#endif

 void
 genX(blorp_exec)(struct blorp_batch *batch,
@@ -317,6 +321,7 @@ genX(blorp_exec)(struct blorp_batch *batch,
      brw_cache_flush_for_depth(brw, params->stencil.addr.buffer);

   brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
+   brw_emit_l3_state(brw);

 retry:
   intel_batchbuffer_require_space(brw, 1400);
@@ -386,6 +391,12 @@ retry:
   brw->no_depth_or_stencil = !params->depth.enabled &&
                              !params->stencil.enabled;
   brw->ib.index_size = -1;
+   brw->urb.vsize = 0;
+   brw->urb.gs_present = false;
+   brw->urb.gsize = 0;
+   brw->urb.tess_present = false;
+   brw->urb.hsize = 0;
+   brw->urb.dsize = 0;

   if (params->dst.enabled) {
      brw_render_cache_add_bo(brw, params->dst.addr.buffer,
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -111,7 +111,8 @@

 static nir_ssa_def *
 sample_via_nir(nir_builder *b, nir_variable *texcoord,
-               const char *name, int sampler, enum glsl_base_type base_type)
+               const char *name, int sampler, enum glsl_base_type base_type,
+               nir_alu_type alu_type)
 {
   const struct glsl_type *sampler2D =
      glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, base_type);
@@ -127,7 +128,7 @@ sample_via_nir(nir_builder *b, nir_variable *texcoord,
   tex->op = nir_texop_tex;
   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
   tex->coord_components = 2;
-   tex->dest_type = nir_type_float;
+   tex->dest_type = alu_type;
   tex->src[0].src_type = nir_tex_src_texture_deref;
   tex->src[0].src = nir_src_for_ssa(&deref->dest.ssa);
   tex->src[1].src_type = nir_tex_src_sampler_deref;
@@ -164,7 +165,7 @@ make_drawpix_z_stencil_program_nir(struct st_context *st,
                             "gl_FragDepth");
      out->data.location = FRAG_RESULT_DEPTH;
      nir_ssa_def *depth = sample_via_nir(&b, texcoord, "depth", 0,
-                                          GLSL_TYPE_FLOAT);
+                                          GLSL_TYPE_FLOAT, nir_type_float);
      nir_store_var(&b, out, depth, 0x1);

      /* Also copy color */
@@ -186,7 +187,7 @@ make_drawpix_z_stencil_program_nir(struct st_context *st,
                             "gl_FragStencilRefARB");
      out->data.location = FRAG_RESULT_STENCIL;
      nir_ssa_def *stencil = sample_via_nir(&b, texcoord, "stencil", 1,
-                                            GLSL_TYPE_UINT);
+                                            GLSL_TYPE_UINT, nir_type_uint);
      nir_store_var(&b, out, stencil, 0x1);
   }

--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -355,6 +355,7 @@ st_new_renderbuffer_fb(enum pipe_format format, unsigned samples, boolean sw)
   case PIPE_FORMAT_R8G8B8X8_UNORM:
   case PIPE_FORMAT_B8G8R8X8_UNORM:
   case PIPE_FORMAT_X8R8G8B8_UNORM:
+   case PIPE_FORMAT_R8G8B8_UNORM:
      strb->Base.InternalFormat = GL_RGB8;
      break;
   case PIPE_FORMAT_R8G8B8A8_SRGB:
@@ -400,6 +401,9 @@ st_new_renderbuffer_fb(enum pipe_format format, unsigned samples, boolean sw)
   case PIPE_FORMAT_R16G16B16A16_UNORM:
      strb->Base.InternalFormat = GL_RGBA16;
      break;
+   case PIPE_FORMAT_R16G16B16_UNORM:
+      strb->Base.InternalFormat = GL_RGB16;
+      break;
   case PIPE_FORMAT_R8_UNORM:
      strb->Base.InternalFormat = GL_R8;
      break;
@@ -416,6 +420,7 @@ st_new_renderbuffer_fb(enum pipe_format format, unsigned samples, boolean sw)
      strb->Base.InternalFormat = GL_RGBA32F;
      break;
   case PIPE_FORMAT_R32G32B32X32_FLOAT:
+   case PIPE_FORMAT_R32G32B32_FLOAT:
      strb->Base.InternalFormat = GL_RGB32F;
      break;
   case PIPE_FORMAT_R16G16B16A16_FLOAT:
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -758,7 +758,11 @@ deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest,
      free(out);
      return 0;
   }
-   write_all(dest, out, ret);
+   ssize_t written = write_all(dest, out, ret);
+   if (written == -1) {
+      free(out);
+      return 0;
+   }
   free(out);
   return ret;
 #else
--- a/src/util/os_socket.h
+++ b/src/util/os_socket.h
@@ -13,6 +13,8 @@
 #ifdef _MSC_VER
 #include <BaseTsd.h>
 typedef SSIZE_T ssize_t;
+#else
+#include <unistd.h>
 #endif

 #ifdef __cplusplus