diff --git a/.travis.yml b/.travis.yml
index 18f114b7cc8..6214132289c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,198 +1,40 @@
 language: c
 
-dist: xenial
+os: osx
 
 cache:
-  apt: true
   ccache: true
 
 env:
   global:
-    - XORG_RELEASES=https://xorg.freedesktop.org/releases/individual
-    - XCB_RELEASES=https://xcb.freedesktop.org/dist
-    - WAYLAND_RELEASES=https://wayland.freedesktop.org/releases
-    - XORGMACROS_VERSION=util-macros-1.19.0
-    - GLPROTO_VERSION=glproto-1.4.17
-    - DRI2PROTO_VERSION=dri2proto-2.8
-    - LIBPCIACCESS_VERSION=libpciaccess-0.13.4
-    - LIBDRM_VERSION=libdrm-2.4.97
-    - XCBPROTO_VERSION=xcb-proto-1.13
-    - RANDRPROTO_VERSION=randrproto-1.3.0
-    - LIBXRANDR_VERSION=libXrandr-1.3.0
-    - LIBXCB_VERSION=libxcb-1.13
-    - LIBXSHMFENCE_VERSION=libxshmfence-1.2
-    - LIBVDPAU_VERSION=libvdpau-1.1
-    - LIBVA_VERSION=libva-1.7.0
-    - LIBWAYLAND_VERSION=wayland-1.15.0
-    - WAYLAND_PROTOCOLS_VERSION=wayland-protocols-1.8
-    - PKG_CONFIG_PATH=$HOME/prefix/lib/pkgconfig:$HOME/prefix/share/pkgconfig
-    - LD_LIBRARY_PATH="$HOME/prefix/lib:$LD_LIBRARY_PATH"
-    - PATH="$HOME/prefix/bin:$PATH"
-
-matrix:
-  include:
-    - env:
-        - LABEL="macOS meson"
-        - BUILD=meson
-        - DRI_LOADERS="-Dplatforms=x11"
-        - GALLIUM_DRIVERS=swrast
-      os: osx
+    - PKG_CONFIG_PATH=""
 
 before_install:
-  - |
-    if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then
-      HOMEBREW_NO_AUTO_UPDATE=1 brew install python3 ninja expat gettext
-      # Set PATH for homebrew pip3 installs
-      PATH="$HOME/Library/Python/3.6/bin:${PATH}"
-      # Set PKG_CONFIG_PATH for keg-only expat
-      PKG_CONFIG_PATH="/usr/local/opt/expat/lib/pkgconfig:${PKG_CONFIG_PATH}"
-      # Set PATH for keg-only gettext
-      PATH="/usr/local/opt/gettext/bin:${PATH}"
-
-      # Install xquartz for prereqs ...
-      XQUARTZ_VERSION="2.7.11"
-      wget -nv https://dl.bintray.com/xquartz/downloads/XQuartz-${XQUARTZ_VERSION}.dmg
-      hdiutil attach XQuartz-${XQUARTZ_VERSION}.dmg
-      sudo installer -pkg /Volumes/XQuartz-${XQUARTZ_VERSION}/XQuartz.pkg -target /
-      hdiutil detach /Volumes/XQuartz-${XQUARTZ_VERSION}
-      # ... and set paths
-      PATH="/opt/X11/bin:${PATH}"
-      PKG_CONFIG_PATH="/opt/X11/share/pkgconfig:/opt/X11/lib/pkgconfig:${PKG_CONFIG_PATH}"
-      ACLOCAL="aclocal -I /opt/X11/share/aclocal -I /usr/local/share/aclocal"
-    fi
+  - HOMEBREW_NO_AUTO_UPDATE=1 brew install python3 ninja expat gettext
+  # Set PATH for homebrew pip3 installs
+  - PATH="$HOME/Library/Python/3.6/bin:${PATH}"
+  # Set PKG_CONFIG_PATH for keg-only expat
+  - PKG_CONFIG_PATH="/usr/local/opt/expat/lib/pkgconfig:${PKG_CONFIG_PATH}"
+  # Set PATH for keg-only gettext
+  - PATH="/usr/local/opt/gettext/bin:${PATH}"
+
+  # Install xquartz for prereqs ...
+  - XQUARTZ_VERSION="2.7.11"
+  - wget -nv https://dl.bintray.com/xquartz/downloads/XQuartz-${XQUARTZ_VERSION}.dmg
+  - hdiutil attach XQuartz-${XQUARTZ_VERSION}.dmg
+  - sudo installer -pkg /Volumes/XQuartz-${XQUARTZ_VERSION}/XQuartz.pkg -target /
+  - hdiutil detach /Volumes/XQuartz-${XQUARTZ_VERSION}
+  # ... and set paths
+  - PKG_CONFIG_PATH="/opt/X11/share/pkgconfig:/opt/X11/lib/pkgconfig:${PKG_CONFIG_PATH}"
 
 install:
-  # Install a more modern meson from pip, since the version in the
-  # ubuntu repos is often quite old.
-  - if test "x$BUILD" = xmeson; then
-      pip3 install --user meson;
-      pip3 install --user mako;
-    fi
-
-  # Install dependencies where we require specific versions (or where
-  # disallowed by Travis CI's package whitelisting).
-
-  - |
-    if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
-      wget $XORG_RELEASES/util/$XORGMACROS_VERSION.tar.bz2
-      tar -jxvf $XORGMACROS_VERSION.tar.bz2
-      (cd $XORGMACROS_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XORG_RELEASES/proto/$GLPROTO_VERSION.tar.bz2
-      tar -jxvf $GLPROTO_VERSION.tar.bz2
-      (cd $GLPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XORG_RELEASES/proto/$DRI2PROTO_VERSION.tar.bz2
-      tar -jxvf $DRI2PROTO_VERSION.tar.bz2
-      (cd $DRI2PROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XCB_RELEASES/$XCBPROTO_VERSION.tar.bz2
-      tar -jxvf $XCBPROTO_VERSION.tar.bz2
-      (cd $XCBPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XCB_RELEASES/$LIBXCB_VERSION.tar.bz2
-      tar -jxvf $LIBXCB_VERSION.tar.bz2
-      (cd $LIBXCB_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XORG_RELEASES/lib/$LIBPCIACCESS_VERSION.tar.bz2
-      tar -jxvf $LIBPCIACCESS_VERSION.tar.bz2
-      (cd $LIBPCIACCESS_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget https://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
-      tar -jxvf $LIBDRM_VERSION.tar.bz2
-      (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 --enable-freedreno --enable-etnaviv-experimental-api && make install)
-
-      wget $XORG_RELEASES/proto/$RANDRPROTO_VERSION.tar.bz2
-      tar -jxvf $RANDRPROTO_VERSION.tar.bz2
-      (cd $RANDRPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XORG_RELEASES/lib/$LIBXRANDR_VERSION.tar.bz2
-      tar -jxvf $LIBXRANDR_VERSION.tar.bz2
-      (cd $LIBXRANDR_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
-      tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
-      (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget https://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2
-      tar -jxvf $LIBVDPAU_VERSION.tar.bz2
-      (cd $LIBVDPAU_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget https://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2
-      tar -jxvf $LIBVA_VERSION.tar.bz2
-      (cd $LIBVA_VERSION && ./configure --prefix=$HOME/prefix --disable-wayland --disable-dummy-driver && make install)
-
-      wget $WAYLAND_RELEASES/$LIBWAYLAND_VERSION.tar.xz
-      tar -axvf $LIBWAYLAND_VERSION.tar.xz
-      (cd $LIBWAYLAND_VERSION && ./configure --prefix=$HOME/prefix --enable-libraries --without-host-scanner --disable-documentation --disable-dtd-validation && make install)
-
-      wget $WAYLAND_RELEASES/$WAYLAND_PROTOCOLS_VERSION.tar.xz
-      tar -axvf $WAYLAND_PROTOCOLS_VERSION.tar.xz
-      (cd $WAYLAND_PROTOCOLS_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      # Meson requires ninja >= 1.6, but xenial has 1.3.x
-      wget https://github.com/ninja-build/ninja/releases/download/v1.6.0/ninja-linux.zip
-      unzip ninja-linux.zip
-      mv ninja $HOME/prefix/bin/
-
-      # Generate this header since one is missing on the Travis instance
-      mkdir -p linux
-      printf "%s\n" \
-           "#ifndef _LINUX_MEMFD_H" \
-           "#define _LINUX_MEMFD_H" \
-           "" \
-           "#define MFD_CLOEXEC             0x0001U" \
-           "#define MFD_ALLOW_SEALING       0x0002U" \
-           "" \
-           "#endif /* _LINUX_MEMFD_H */" > linux/memfd.h
-
-      # Generate this header, including the missing SYS_memfd_create
-      # macro, which is not provided by the header in the Travis
-      # instance
-      mkdir -p sys
-      printf "%s\n" \
-           "#ifndef _SYSCALL_H" \
-           "#define _SYSCALL_H      1" \
-           "" \
-           "#include <asm/unistd.h>" \
-           "" \
-           "#ifndef _LIBC" \
-           "# include <bits/syscall.h>" \
-           "#endif" \
-           "" \
-           "#ifndef __NR_memfd_create" \
-           "# define __NR_memfd_create 319 /* Taken from <asm/unistd_64.h> */" \
-           "#endif" \
-           "" \
-           "#ifndef SYS_memfd_create" \
-           "# define SYS_memfd_create __NR_memfd_create" \
-           "#endif" \
-           "" \
-           "#endif" > sys/syscall.h
-    fi
+  - pip3 install --user meson
+  - pip3 install --user mako
 
 script:
-  if test "x$BUILD" = xmeson; then
-    if test -n "$LLVM_CONFIG"; then
-      # We need to control the version of llvm-config we're using, so we'll
-      # generate a native file to do so. This requires meson >=0.49
-      #
-      echo -e "[binaries]\nllvm-config = '`which $LLVM_CONFIG`'" > native.file
-
-      $LLVM_CONFIG --version
-    else
-      : > native.file
-    fi
-
-    export CFLAGS="$CFLAGS -isystem`pwd`"
-    meson _build \
-                  --native-file=native.file \
-                  -Dbuild-tests=true \
-                  ${DRI_LOADERS} \
-                  -Ddri-drivers=${DRI_DRIVERS:-[]} \
-                  -Dgallium-drivers=${GALLIUM_DRIVERS:-[]} \
-                  -Dvulkan-drivers=${VULKAN_DRIVERS:-[]}
-    meson configure _build
-    ninja -C _build
-    ninja -C _build test
-  fi
+  - meson _build
+      -Dbuild-tests=true
+      -Dplatforms=x11
+      -Dgallium-drivers=swrast
+  - ninja -C _build
+  - ninja -C _build test
diff --git a/Android.common.mk b/Android.common.mk
index 36d97c52dd1..ae4a9fc98dd 100644
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -27,10 +27,13 @@ endif
 
 LOCAL_C_INCLUDES += \
 	$(MESA_TOP)/src \
-	$(MESA_TOP)/include
+	$(MESA_TOP)/include \
+        system/core/include/cutils \
+        system/core/liblog/include
 
 MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION)
 LOCAL_CFLAGS += \
+	-O3 \
 	-Wno-error \
 	-Werror=incompatible-pointer-types \
 	-Wno-unused-parameter \
@@ -78,14 +81,23 @@ LOCAL_CFLAGS += \
 	-fvisibility=hidden \
 	-fno-math-errno \
 	-fno-trapping-math \
-	-Wno-sign-compare
+	-Wno-sign-compare \
+	-Wno-self-assign \
+	-Wno-constant-logical-operand \
+	-Wno-format \
+	-Wno-incompatible-pointer-types \
+	-Wno-enum-conversion
 
 LOCAL_CPPFLAGS += \
 	-D__STDC_CONSTANT_MACROS \
 	-D__STDC_FORMAT_MACROS \
 	-D__STDC_LIMIT_MACROS \
 	-Wno-error=non-virtual-dtor \
-	-Wno-non-virtual-dtor
+	-Wno-non-virtual-dtor	\
+	-Wno-delete-non-virtual-dtor \
+	-Wno-overloaded-virtual \
+	-Wno-missing-braces \
+	-Wno-deprecated-register
 
 # mesa requires at least c99 compiler
 LOCAL_CONLYFLAGS += \
@@ -115,6 +127,9 @@ LOCAL_CFLAGS += -DHAVE_LIBDRM
 LOCAL_SHARED_LIBRARIES += libdrm
 endif
 
+LOCAL_SHARED_LIBRARIES += libcutils \
+                          liblog
+
 LOCAL_CFLAGS_32 += -DDEFAULT_DRIVER_DIR=\"/vendor/lib/$(MESA_DRI_MODULE_REL_PATH)\"
 LOCAL_CFLAGS_64 += -DDEFAULT_DRIVER_DIR=\"/vendor/lib64/$(MESA_DRI_MODULE_REL_PATH)\"
 LOCAL_PROPRIETARY_MODULE := true
diff --git a/Android.mk b/Android.mk
index d2b12ea4473..5fe028c6d19 100644
--- a/Android.mk
+++ b/Android.mk
@@ -110,6 +110,7 @@ endef
 
 # add subdirectories
 SUBDIRS := \
+	src/freedreno \
 	src/gbm \
 	src/loader \
 	src/mapi \
diff --git a/Readme.md b/Readme.md
new file mode 100644
index 00000000000..5df295abc3a
--- /dev/null
+++ b/Readme.md
@@ -0,0 +1,2 @@
+Any security related issues should be reported by following the instructions here:
+https://01.org/security
diff --git a/VERSION b/VERSION
index f5cbc1e7406..87c0f53ffeb 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-19.1.0-devel
+19.1.6
diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore
new file mode 100644
index 00000000000..0df54310da5
--- /dev/null
+++ b/bin/.cherry-ignore
@@ -0,0 +1,15 @@
+# fixes: The following commits do not apply cleanly on 19.1 branch, as they
+#        depend on other commits not present in the branch.
+20b00e1ff24f974bc99e7ca9a720518da0ce5b89 panfrost: Make ctx->job useful
+f6c44549ee2dd0f218deea1feba3965523609406 iris: Replace devinfo->gen with GEN_GEN
+1cd13ccee7bc2733e7a56284dc02bdb1b1c40081 iris: Update fast clear colors on Gen9 with direct immediate writes.
+# fixes: The following commit depends on commits 77a1070d366a and df4c2ec5e19b
+#        in order to compile, which did not land in the branch.
+2d799250346331a93b21678dc5605cff74dfa3a1 iris: Avoid unnecessary resolves on transfer maps
+# stable: Explicit 19.2 only nominations.
+e73d863a66caac796ed5fb543a77f0b892df8573 radv: allow to enable VK_AMD_shader_ballot only on GFX8+
+f202ac27a99caf9009aa9d60e2e0d7f3b528e99f radv: add a new debug option called RADV_DEBUG=noshaderballot
+a6ad9e8ccf970a0da68508eb2ce26b316045b9f0 radv: force enable VK_AMD_shader_ballot for Wolfenstein Youngblood
+0813c27d8d4a7e9372a8a86d970b598fc4e3bfd1 radv/gfx10: don't initialize VGT_INSTANCE_STEP_RATE_0
+a4e6e59db82e61b47ef905f28dde80ae36a67d35 radv/gfx10: do not use NGG with NAVI14
+fe0ec41c4d36fd5a82e7579d89e34cce7423c4e5 radv: Change memory type order for GPUs without dedicated VRAM
diff --git a/docs/relnotes/19.1.0.html b/docs/relnotes/19.1.0.html
index c27e65ea096..7e65dd6db8d 100644
--- a/docs/relnotes/19.1.0.html
+++ b/docs/relnotes/19.1.0.html
@@ -14,7 +14,7 @@ <h1>The Mesa 3D Graphics Library</h1>
 <iframe src="../contents.html"></iframe>
 <div class="content">
 
-<h1>Mesa 19.1.0 Release Notes / TBD</h1>
+<h1>Mesa 19.1.0 Release Notes / June 11, 2019</h1>
 
 <p>
 Mesa 19.1.0 is a new development release. People who are concerned
@@ -32,7 +32,7 @@ <h1>Mesa 19.1.0 Release Notes / TBD</h1>
 
 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+2a6c3af3a803389183168e449c536304cf03e0f82c4c9333077933543b9d02f3  mesa-19.1.0.tar.xz
 </pre>
 
 
@@ -69,15 +69,4542 @@ <h2>New features</h2>
 <h2>Bug fixes</h2>
 
 <ul>
-<li>TBD</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81843">Bug 81843</a> - [SNB IVB HSW] ETC2 textures are not returned as compressed images</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99781">Bug 99781</a> - Some Unity games fail assertion on startup in glXCreateContextAttribsARB</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100239">Bug 100239</a> - Incorrect rendering in CS:GO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100316">Bug 100316</a> - Linking GLSL 1.30 shaders with invariant and deprecated variables triggers an 'mismatching invariant qualifiers' error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104272">Bug 104272</a> - [OpenGL CTS] [HSW] KHR-GL46.direct_state_access.textures_compressed_subimage assert fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104355">Bug 104355</a> - Ivy Bridge ignores component mappings in texture views</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104602">Bug 104602</a> - [apitrace] Graphical artifacts in Civilization VI on RX Vega</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107052">Bug 107052</a> - [Regression][bisected]. Crookz - The Big Heist Demo can't be launched despite the &quot;true&quot; flag in &quot;drirc&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107505">Bug 107505</a> - [lars] dEQP-GLES31.functional.geometry_shading.layered#render_with_default_layer_3d failure</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107510">Bug 107510</a> - [GEN8+] up to 10% perf drop on several 3D benchmarks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107563">Bug 107563</a> - [RADV] Broken rendering in Unity demos</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107987">Bug 107987</a> - [Debug mesa only]. Crash happens when calling drawArrays</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108250">Bug 108250</a> - [GLSL] layout-location-struct.shader_test fails to link</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108457">Bug 108457</a> - [OpenGL CTS] KHR-GL46.tessellation_shader.single.xfb_captures_data_from_correct_stage fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108540">Bug 108540</a> - vkAcquireNextImageKHR blocks when timeout=0 in Wayland</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108766">Bug 108766</a> - Mesa built with meson has RPATH entries</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108824">Bug 108824</a> - Invalid handling when GL buffer is bound on one context and invalidated on another</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108841">Bug 108841</a> - [RADV] SPIRV's control flow attributes do not propagate to LLVM</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108879">Bug 108879</a> - [CIK] [regression] All opencl apps hangs indefinitely in si_create_context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108999">Bug 108999</a> - Calculating the scissors fields when the y is flipped (0 on top) can generate negative numbers that will cause assertion failure later on.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109057">Bug 109057</a> - texelFetch from GL_TEXTURE_2D_MULTISAMPLE with integer format fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109107">Bug 109107</a> - gallium/st/va: change va max_profiles when using Radeon VCN Hardware</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109216">Bug 109216</a> - 4-27% performance drop in Vulkan benchmarks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109326">Bug 109326</a> - mesa: Meson configuration summary should be printed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109328">Bug 109328</a> - [BSW BXT GLK] dEQP-VK.subgroups.arithmetic.subgroup regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109391">Bug 109391</a> - LTO Build fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109401">Bug 109401</a> - [DXVK] Project Cars rendering problems</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109404">Bug 109404</a> - [ANV] The Witcher 3 shadows flickering</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109443">Bug 109443</a> - Build failure with MSVC when using Scons &gt;= 3.0.2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109451">Bug 109451</a> - [IVB,SNB] LINE_STRIPs following a TRIANGLE_FAN fail to use primitive restart</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109543">Bug 109543</a> - After upgrade mesa to 19.0.0~rc1 all vulkan based application stop working [&quot;vulkan-cube&quot; received SIGSEGV in radv_pipeline_init_blend_state at ../src/amd/vulkan/radv_pipeline.c:699]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109561">Bug 109561</a> - [regression, bisected] code re-factor causing games to stutter or lock-up system</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109573">Bug 109573</a> - dEQP-VK.spirv_assembly.instruction.graphics.module.same_module</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109575">Bug 109575</a> - Mesa-19.0.0-rc1 : Computer Crashes trying to run anything Vulkan</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109581">Bug 109581</a> - [BISECTED] Nothing is Rendered on Sascha Willem's &quot;subpasses&quot; demo</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109594">Bug 109594</a> - totem assert failure: totem: src/intel/genxml/gen9_pack.h:72: __gen_uint: La declaración `v &lt;= max' no se cumple.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109597">Bug 109597</a> - wreckfest issues with transparent objects &amp; skybox</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109601">Bug 109601</a> - [Regression] RuneLite GPU rendering broken on 18.3.x</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109603">Bug 109603</a> - nir_instr_as_deref: Assertion `parent &amp;&amp; parent-&gt;type == nir_instr_type_deref' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109645">Bug 109645</a> - build error on arm64: tegra_screen.c:33: /usr/include/xf86drm.h:41:10: fatal error: drm.h: No such file or directory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109646">Bug 109646</a> - New video compositor compute shader render glitches mpv</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109647">Bug 109647</a> - /usr/include/xf86drm.h:40:10: fatal error: drm.h: No such file or directory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109648">Bug 109648</a> - AMD Raven hang during va-api decoding</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109659">Bug 109659</a> - Missing OpenGL symbols in OSMesa Gallium when building with meson</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109698">Bug 109698</a> - dri.pc contents invalid when built with meson</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109717">Bug 109717</a> - [regression]  Cull distance tests asserting</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109735">Bug 109735</a> - [Regression] broken font with mesa_vulkan_overlay</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109738">Bug 109738</a> - Child of Light shows only a black screen</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109739">Bug 109739</a> - Mesa build fails when vulkan-overlay-layer option is enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109742">Bug 109742</a> - vdpau state tracker on nv92 started to hit assert after vl compute work</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109743">Bug 109743</a> - Test fails: piglit.spec.arb_sample_shading.arb_sample_shading-builtin-gl-sample-mask-mrt-alpha</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109747">Bug 109747</a> - Add framerate to vulkan-overlay-layer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109759">Bug 109759</a> - [BISECTED][REGRESSION][IVB, HSW] Font rendering problem in OpenGL</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109788">Bug 109788</a> - vulkan-overlay-layer: Only installs 64bit version</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109810">Bug 109810</a> - nir_opt_copy_prop_vars.c:454: error: unknown field ‘ssa’ specified in initializer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109929">Bug 109929</a> - tgsi_to_nir.c:2111: undefined reference to `gl_nir_lower_samplers_as_deref'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109944">Bug 109944</a> - [bisected] Android build test fails with: utils.c: error: use of undeclared identifier 'PACKAGE_VERSION'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109945">Bug 109945</a> - pan_assemble.c:51:46: error: passing argument 2 of ‘tgsi_to_nir’ from incompatible pointer type [-Werror=incompatible-pointer-types]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109980">Bug 109980</a> - [i915 CI][HSW] spec&#64;arb_fragment_shader_interlock&#64;arb_fragment_shader_interlock-image-load-store - fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109984">Bug 109984</a> - unhandled VkStructureType VK_STRUCTURE_TYPE_RENDER_PASS_INPUT_ATTACHMENT_ASPECT_CREATE_INFO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110134">Bug 110134</a> - SIGSEGV while playing large hevc video in mpv</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110143">Bug 110143</a> - Doom 3: BFG Edition - Steam and GOG.com - white flickering screen</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110201">Bug 110201</a> - [ivb] mesa 19.0.0 breaks rendering in kitty</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110211">Bug 110211</a> - If DESTDIR is set to an empty string, the dri drivers are not installed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110216">Bug 110216</a> - radv: Segfault when compiling compute shaders from Assassin's Creed Odyssey (regression, bisected)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110221">Bug 110221</a> - build error with meson</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110239">Bug 110239</a> - Mesa SIGABRT: src/intel/genxml/gen9_pack.h:72: __gen_uint: Assertion `v &lt;= max' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110257">Bug 110257</a> - Major artifacts in mpeg2 vaapi hw decoding</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110259">Bug 110259</a> - radv: Sampling depth-stencil image in GENERAL layout returns nothing but zero (regression, bisected)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110291">Bug 110291</a> - Vega 64 GPU hang running Space Engineers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110302">Bug 110302</a> - [bisected][regression] piglit egl-create-pbuffer-surface and egl-gl-colorspace regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110305">Bug 110305</a> - Iris driver fails ext_packed_depth_stencil-getteximage test</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110311">Bug 110311</a> - [IVB HSW SNB][regression][bisected] regressions on vec4 deqp/gl{es}cts tests</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110349">Bug 110349</a> - radv: Dragon Quest XI (DXVK) has a graphical glitch (regression, bisected)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110353">Bug 110353</a> - weird colors seen in valley</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110355">Bug 110355</a> - radeonsi: GTK elements become invisible in some applications (GIMP, LibreOffice)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110356">Bug 110356</a> - install_megadrivers.py creates new dangling symlink [bisected]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110404">Bug 110404</a> - Iris fails piglit.spec.ext_transform_feedback.immediate-reuse test</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110422">Bug 110422</a> - AMD_DEBUG=forcedma will crash OpenGL aps with SIGFAULT on VegaM 8706G</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110441">Bug 110441</a> - [llvmpipe] complex-loop-analysis-bug regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110443">Bug 110443</a> - vaapi/vpp: wrong output for non 64-bytes align width (ex: 1200)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110454">Bug 110454</a> - [llvmpipe] piglit arb_color_buffer_float-render GL_RGBA8_SNORM failure with llvm-9</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110462">Bug 110462</a> - Epic Games Launcher renders nothing with &quot;-opengl&quot; option</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110474">Bug 110474</a> - [bisected][regression] vk cts fp16 arithmetic failures</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110497">Bug 110497</a> - [DXVK][Regression][Bisected][SKL] Project Cars 2 crashes with Bug Splat when loading finishes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110526">Bug 110526</a> - [CTS] dEQP-VK.ycbcr.{conversion,format}.* fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110530">Bug 110530</a> - [CTS] dEQP-VK.ycbcr.format.g8_b8_r8_3plane_420* reports VM faults on Vega10</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110535">Bug 110535</a> - [bisected] [icl] GPU hangs on crucible func.miptree.r8g8b8a8-unorm.aspect-color.view-2d.levels01.array01.extent-512x512.upload-copy-with-draw tests</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110540">Bug 110540</a> - [AMD TAHITI XT] valve artifact broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110573">Bug 110573</a> - Mesa vulkan-radeon 19.0.3 system freeze and visual artifacts (RADV)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110590">Bug 110590</a> - [Regression][Bisected] GTAⅣ under wine fails with GLXBadFBConfig</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110632">Bug 110632</a> - &quot;glx: Fix synthetic error generation in __glXSendError&quot; broke wine games on 32-bit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110648">Bug 110648</a> - Dota2 will not open using vulkan since 19.0 series</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110655">Bug 110655</a> - VK_LAYER_MESA_OVERLAY_CONFIG=draw,fps renders sporadically</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110698">Bug 110698</a> - tu_device.c:900:4: error: initializer element is not constant</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110701">Bug 110701</a> - GPU faults in in Unigine Valley 1.0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110721">Bug 110721</a> - graphics corruption on steam client with mesa 19.1.0 rc3 on polaris</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110761">Bug 110761</a> - Huge problems between Mesa and Electron engine apps</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110784">Bug 110784</a> - [regression][bisected] Reverting 'expose 0 shader binary formats for compat profiles for Qt' causes get_program_binary failures on Iris</li>
+
 </ul>
 
+
 <h2>Changes</h2>
 
+<p>Adam Jackson (1):</p>
+<ul>
+  <li>drisw: Try harder to probe whether MIT-SHM works</li>
+</ul>
+
+<p>Albert Pal (1):</p>
+<ul>
+  <li>Fix link release notes for 19.0.0.</li>
+</ul>
+
+<p>Alejandro Piñeiro (12):</p>
+<ul>
+  <li>blorp: introduce helper method blorp_nir_init_shader</li>
+  <li>nir, glsl: move pixel_center_integer/origin_upper_left to shader_info.fs</li>
+  <li>nir/xfb: add component_offset at nir_xfb_info</li>
+  <li>nir_types: add glsl_varying_count helper</li>
+  <li>nir/xfb: adding varyings on nir_xfb_info and gather_info</li>
+  <li>nir/xfb: sort varyings too</li>
+  <li>nir_types: add glsl_type_is_struct helper</li>
+  <li>nir/xfb: handle arrays and AoA of basic types</li>
+  <li>nir/linker: use nir_gather_xfb_info</li>
+  <li>nir/linker: fix ARRAY_SIZE query with xfb varyings</li>
+  <li>nir/xfb: move varyings info out of nir_xfb_info</li>
+  <li>docs: document MESA_GLSL=errors keyword</li>
+</ul>
+
+<p>Alexander von Gluck IV (1):</p>
+<ul>
+  <li>haiku: Fix hgl dispatch build. Tested under meson/scons.</li>
+</ul>
+
+<p>Alexandros Frantzis (1):</p>
+<ul>
+  <li>virgl: Fake MSAA when max samples is 1</li>
+</ul>
+
+<p>Alok Hota (32):</p>
+<ul>
+  <li>swr/rast: update SWR rasterizer shader stats</li>
+  <li>gallium/swr: Param defaults for unhandled PIPE_CAPs</li>
+  <li>gallium/aux: add PIPE_CAP_MAX_VARYINGS to u_screen</li>
+  <li>swr/rast: Convert system memory pointers to gfxptr_t</li>
+  <li>swr/rast: Disable use of __forceinline by default</li>
+  <li>swr/rast: Correctly align 64-byte spills/fills</li>
+  <li>swr/rast: Flip BitScanReverse index calculation</li>
+  <li>swr/rast: Move knob defaults to generated cpp file</li>
+  <li>swr/rast: FP consistency between POSH/RENDER pipes</li>
+  <li>swr/rast: Refactor scratch space variable names</li>
+  <li>swr/rast: convert DWORD-&gt;uint32_t, QWORD-&gt;uint64_t</li>
+  <li>swr/rast: simdlib cleanup, clipper stack space fixes</li>
+  <li>swr/rast: Add translation support to streamout</li>
+  <li>swr/rast: bypass size limit for non-sampled textures</li>
+  <li>swr/rast: Cleanup and generalize gen_archrast</li>
+  <li>swr/rast: Add initial SWTag proto definitions</li>
+  <li>swr/rast: Add string handling to AR event framework</li>
+  <li>swr/rast: Add general SWTag statistics</li>
+  <li>swr/rast: Fix autotools and scons codegen</li>
+  <li>swr/rast: Remove deprecated 4x2 backend code</li>
+  <li>swr/rast: AVX512 support compiled in by default</li>
+  <li>swr/rast: enforce use of tile offsets</li>
+  <li>swr/rast: add more llvm intrinsics</li>
+  <li>swr/rast: update guardband rects at draw setup</li>
+  <li>swr/rast: add SWR_STATIC_ASSERT() macro</li>
+  <li>swr/rast: add flat shading</li>
+  <li>swr/rast: add guards for cpuid on Linux</li>
+  <li>swr/rast: early exit on empty triangle mask</li>
+  <li>swr/rast: Cleanup and generalize gen_archrast</li>
+  <li>swr/rast: Add initial SWTag proto definitions</li>
+  <li>swr/rast: Add string handling to AR event framework</li>
+  <li>swr/rast: Add general SWTag statistics</li>
+</ul>
+
+<p>Alyssa Rosenzweig (192):</p>
+<ul>
+  <li>panfrost: Initial stub for Panfrost driver</li>
+  <li>panfrost: Implement Midgard shader toolchain</li>
+  <li>meson: Remove panfrost from default driver list</li>
+  <li>kmsro: Move DRM entrypoints to shared block</li>
+  <li>panfrost: Use u_pipe_screen_get_param_defaults</li>
+  <li>panfrost: Check in sources for command stream</li>
+  <li>panfrost: Include glue for out-of-tree legacy code</li>
+  <li>kmsro: Silence warning if missing</li>
+  <li>panfrost: Clean-up one-argument passing quirk</li>
+  <li>panfrost: Don't hardcode number of nir_ssa_defs</li>
+  <li>panfrost: Add kernel-agnostic resource management</li>
+  <li>panfrost: Remove if 0'd dead code</li>
+  <li>panfrost: Remove speculative if 0'd format bit code</li>
+  <li>panfrost: Elucidate texture op scheduling comment</li>
+  <li>panfrost: Specify supported draw modes per-context</li>
+  <li>panfrost: Fix build; depend on libdrm</li>
+  <li>panfrost: Backport driver to Mali T600/T700</li>
+  <li>panfrost: Identify MALI_OCCLUSION_PRECISE bit</li>
+  <li>panfrost: Implement PIPE_QUERY_OCCLUSION_COUNTER</li>
+  <li>panfrost: Don't align framebuffer dims</li>
+  <li>panfrost: Improve logging and patch memory leaks</li>
+  <li>panfrost: Fix various leaks unmapping resources</li>
+  <li>panfrost: Free imported BOs</li>
+  <li>panfrost: Swap order of tiled texture (de)alloc</li>
+  <li>panfrost: Cleanup mali_viewport (clipping) code</li>
+  <li>panfrost: Preserve w sign in perspective division</li>
+  <li>panfrost: Fix clipping region</li>
+  <li>panfrost: Stub out separate stencil functions</li>
+  <li>panfrost: Add pandecode (command stream debugger)</li>
+  <li>panfrost: Implement pantrace (command stream dump)</li>
+  <li>panfrost/midgard: Refactor tag lookahead code</li>
+  <li>panfrost/midgard: Fix nested/chained if-else</li>
+  <li>panfrost: Rectify doubleplusungood extended branch</li>
+  <li>panfrost/midgard: Emit extended branches</li>
+  <li>panfrost: Dynamically set discard branch targets</li>
+  <li>panfrost: Verify and print brx condition in disasm</li>
+  <li>panfrost: Use tiler fast path (performance boost)</li>
+  <li>panfrost/meson: Remove subdir for nondrm</li>
+  <li>panfrost/nondrm: Flag CPU-invisible regions</li>
+  <li>panfrost/nondrm: Make COHERENT_LOCAL explicit</li>
+  <li>panfrost/nondrm: Split out dump_counters</li>
+  <li>panfrost/midgard: Add fround(_even), ftrunc, ffma</li>
+  <li>panfrost: Decode render target swizzle/channels</li>
+  <li>panfrost: Add RGB565, RGB5A1 texture formats</li>
+  <li>panfrost: Identify 4-bit channel texture formats</li>
+  <li>panfrost: Expose perf counters in environment</li>
+  <li>panfrost/midgard: Allow flt to run on most units</li>
+  <li>panfrost: Import job data structures from v3d</li>
+  <li>panfrost: Decouple Gallium clear from FBD clear</li>
+  <li>panfrost: Cleanup cruft related to clears</li>
+  <li>panfrost/midgard: Don't force constant on VLUT</li>
+  <li>panfrost: Flush with offscreen rendering</li>
+  <li>panfrost/midgard: Promote smul to vmul</li>
+  <li>panfrost/midgard: Preview for data hazards</li>
+  <li>panfrost: List primitive restart enable bit</li>
+  <li>panfrost/drm: Cast pointer to u64 to fix warning</li>
+  <li>panfrost: Cleanup needless if in create_bo</li>
+  <li>panfrost: Combine has_afbc/tiled in layout enum</li>
+  <li>panfrost: Delay color buffer setup</li>
+  <li>panfrost: Determine framebuffer format bits late</li>
+  <li>panfrost: Allocate dedicated slab for linear BOs</li>
+  <li>panfrost: Support linear depth textures</li>
+  <li>panfrost: Document "depth-buffer writeback" bit</li>
+  <li>panfrost: Identify fragment_extra flags</li>
+  <li>util: Add a drm_find_modifier helper</li>
+  <li>v3d: Use shared drm_find_modifier util</li>
+  <li>vc4: Use shared drm_find_modifier util</li>
+  <li>freedreno: Use shared drm_find_modifier util</li>
+  <li>panfrost: Break out fragment to SFBD/MFBD files</li>
+  <li>panfrost: Remove staging SFBD for pan_context</li>
+  <li>panfrost: Remove staging MFBD</li>
+  <li>panfrost: Minor comment cleanup (version detection)</li>
+  <li>panfrost/mfbd: Implement linear depth buffers</li>
+  <li>panfrost/mfbd: Respect per-job depth write flag</li>
+  <li>panfrost: Comment spelling fix</li>
+  <li>panfrost: Allocate extra data for depth buffer</li>
+  <li>panfrost; Disable AFBC for depth buffers</li>
+  <li>panfrost: Compute viewport state on the fly</li>
+  <li>panfrost/midgard: Implement fpow</li>
+  <li>panfrost: Workaround buffer overrun with mip level</li>
+  <li>panfrost: Fix primconvert check</li>
+  <li>panfrost: Disable PIPE_CAP_TGSI_TEXCOORD</li>
+  <li>panfrost/decode: Respect primitive size pointers</li>
+  <li>panfrost: Replay more varying buffers</li>
+  <li>panfrost: Rewrite varying assembly</li>
+  <li>panfrost/midgard: Fix b2f32 swizzle for vectors</li>
+  <li>panfrost: Fix viewports</li>
+  <li>panfrost: Implement scissor test</li>
+  <li>panfrost/midgard: Add fcsel_i opcode</li>
+  <li>panfrost/midgard: Schedule ball/bany to vectors</li>
+  <li>panfrost/midgard: Add more ball/bany, iabs ops</li>
+  <li>panfrost/midgard: Map more bany/ball opcodes</li>
+  <li>panfrost/midgard: Lower bool_to_int32</li>
+  <li>panfrost/midgard: Lower f2b32 to fne</li>
+  <li>panfrost/midgard: Lower i2b32</li>
+  <li>panfrost/midgard: Implement b2i; improve b2f/f2b</li>
+  <li>panfrost/midgard: Lower source modifiers for ints</li>
+  <li>panfrost/midgard: Cleanup midgard_nir_algebraic.py</li>
+  <li>panfrost: Stub out ES3 caps/callbacks</li>
+  <li>panfrost/midgard: Add ult/ule ops</li>
+  <li>panfrost/midgard: Expand fge lowering to more types</li>
+  <li>panfrost/midgard: Handle i2b constant</li>
+  <li>panfrost/midgard: fpow is a two-part operation</li>
+  <li>panfrost: Preliminary work for mipmaps</li>
+  <li>panfrost: Fix vertex buffer corruption</li>
+  <li>panfrost/midgard: Disassemble `cube` texture op</li>
+  <li>panfrost/midgard: Add L/S op for writing cubemap coordinates</li>
+  <li>panfrost: Preliminary work for cubemaps</li>
+  <li>panfrost/decode: Decode all cubemap faces</li>
+  <li>panfrost: Include all cubemap faces in bitmap list</li>
+  <li>panfrost/midgard: Emit cubemap coordinates</li>
+  <li>panfrost: Implement command stream for linear cubemaps</li>
+  <li>panfrost: Extend tiling for cubemaps</li>
+  <li>panfrost: Implement missing texture formats</li>
+  <li>panfrost/decode: Print negative_start</li>
+  <li>panfrost: Clean index state between indexed draws</li>
+  <li>panfrost: Fix index calculation types and asserts</li>
+  <li>panfrost: Implement FIXED formats</li>
+  <li>panfrost: Remove support for legacy kernels</li>
+  <li>nir: Add "viewport vector" system values</li>
+  <li>panfrost: Implement system values</li>
+  <li>panfrost: Cleanup some indirection in pan_resource</li>
+  <li>panfrost: Respect box-&gt;width in tiled stores</li>
+  <li>panfrost: Size tiled temp buffers correctly</li>
+  <li>panfrost/decode: Add flags for tilebuffer readback</li>
+  <li>panfrost: Add tilebuffer load? branch</li>
+  <li>panfrost/midgard: Add umin/umax opcodes</li>
+  <li>panfrost/midgard: Add ilzcnt op</li>
+  <li>panfrost/midgard: Add ibitcount8 op</li>
+  <li>panfrost/midgard: Enable lower_find_lsb</li>
+  <li>panfrost: Remove "mali_unknown6" nonsense</li>
+  <li>panfrost/midgard: Drop dependence on mesa/st</li>
+  <li>panfrost: Cleanup indexed draw handling</li>
+  <li>nir: Add nir_lower_viewport_transform</li>
+  <li>panfrost/midgard: Use shared nir_lower_viewport_transform</li>
+  <li>panfrost: Track BO lifetime with jobs and reference counts</li>
+  <li>panfrost: Fixup vertex offsets to prevent shadow copy</li>
+  <li>panfrost/mdg: Use shared fsign lowering</li>
+  <li>panfrost/mdg/disasm: Print raw varying_parameters</li>
+  <li>panfrost/midgard: Pipe through varying arrays</li>
+  <li>panfrost/midgard: Implement indirect loads of varyings/UBOs</li>
+  <li>panfrost/midgard: Respect component of bcsel condition</li>
+  <li>panfrost/midgard: Remove useless MIR dump</li>
+  <li>panfrost: Respect backwards branches in RA</li>
+  <li>panfrost/midgard: Don't try to inline constants on branches</li>
+  <li>panfrost/midgard: imul can only run on *mul</li>
+  <li>panfrost: Disable indirect outputs for now</li>
+  <li>panfrost: Use actual imov instruction</li>
+  <li>panfrost/midgard: Dead code eliminate MIR</li>
+  <li>panfrost/midgard: Track loop depth</li>
+  <li>panfrost/midgard: Fix off-by-one in successor analysis</li>
+  <li>panfrost/midgard: Remove unused mir_next_block</li>
+  <li>panfrost/midgard: Update integer op list</li>
+  <li>panfrost/midgard: Document sign-extension/zero-extension bits (vector)</li>
+  <li>panfrost/midgard: Set integer mods</li>
+  <li>panfrost/midgard: Implement copy propagation</li>
+  <li>panfrost/midgard: Optimize MIR in progress loop</li>
+  <li>panfrost/midgard: Refactor opcode tables</li>
+  <li>panfrost/midgard: Add "op commutes?" property</li>
+  <li>panfrost/midgard: Remove assembler</li>
+  <li>panfrost/midgard: Reduce fmax(a, 0.0) to fmov.pos</li>
+  <li>panfrost/midgard: Extend copy propagation pass</li>
+  <li>panfrost/midgard: Optimize csel involving 0</li>
+  <li>panfrost/midgard: Copy prop for texture registers</li>
+  <li>panfrost/midgard: Identify inand</li>
+  <li>panfrost/midgard: Add new bitwise ops</li>
+  <li>Revert "panfrost/midgard: Extend copy propagation pass"</li>
+  <li>panfrost/midgard: Only copyprop without an outmod</li>
+  <li>panfrost/midgard: Fix regressions in -bjellyfish</li>
+  <li>panfrost/midgard: Fix tex propogation</li>
+  <li>panfrost/midgard: imov workaround</li>
+  <li>panfrost: Use fp32 (not fp16) varyings</li>
+  <li>panfrost/midgard: Safety check immediate precision degradations</li>
+  <li>panfrost: Workaround -bshadow regression</li>
+  <li>panfrost: Remove shader dump</li>
+  <li>panfrost/decode: Hit MRT blend shader enable bits</li>
+  <li>panfrost: Fix blend shader upload</li>
+  <li>panfrost/midgard: reg_mode_full -&gt; reg_mode_32, etc</li>
+  <li>panfrost/midgard/disasm: Catch mask errors</li>
+  <li>panfrost/midgard/disasm: Extend print_reg to 8-bit</li>
+  <li>panfrost/midgard/disasm: Fill in .int mod</li>
+  <li>panfrost/midgard: Fix crash on unknown op</li>
+  <li>panfrost/midgard: Rename ilzcnt8 -&gt; iclz</li>
+  <li>panfrost/midgard/disasm: Support 8-bit destination</li>
+  <li>panfrost/midgard/disasm: Print 8-bit sources</li>
+  <li>panfrost/midgard/disasm: Stub out 64-bit</li>
+  <li>panfrost/midgard/disasm: Handle dest_override generalized</li>
+  <li>panfrost: Support RGB565 FBOs</li>
+  <li>panfrost/midgard: Fix integer selection</li>
+  <li>panfrost/midgard: Fix RA when temp_count = 0</li>
+  <li>panfrost/midgard: Lower mixed csel (NIR)</li>
+  <li>panfrost/midgard: iabs cannot run on mul</li>
+</ul>
+
+<p>Alyssa Ross (1):</p>
+<ul>
+  <li>get_reviewer.pl: improve portability</li>
+</ul>
+
+<p>Amit Pundir (1):</p>
+<ul>
+  <li>mesa: android: freedreno: build libfreedreno_{drm,ir3} static libs</li>
+</ul>
+
+<p>Andre Heider (5):</p>
 <ul>
-<li>TBD</li>
+  <li>iris: fix build with gallium nine</li>
+  <li>iris: improve PIPE_CAP_VIDEO_MEMORY bogus value</li>
+  <li>iris: add support for tgsi_to_nir</li>
+  <li>st/nine: enable csmt per default on iris</li>
+  <li>st/nine: skip position checks in SetCursorPosition()</li>
 </ul>
 
+<p>Andreas Baierl (2):</p>
+<ul>
+  <li>nir: add rcp(w) lowering for gl_FragCoord</li>
+  <li>lima/ppir: Add gl_FragCoord handling</li>
+</ul>
+
+<p>Andres Gomez (12):</p>
+<ul>
+  <li>mesa: INVALID_VALUE for wrong type or format in Clear*Buffer*Data</li>
+  <li>gitlab-ci: install distro's ninja</li>
+  <li>glsl: correctly validate component layout qualifier for dvec{3,4}</li>
+  <li>glsl/linker: always validate explicit location among inputs</li>
+  <li>glsl/linker: don't fail non static used inputs without matching outputs</li>
+  <li>glsl/linker: simplify xfb_offset vs xfb_stride overflow check</li>
+  <li>Revert "glsl: relax input-&gt;output validation for SSO programs"</li>
+  <li>glsl/linker: location aliasing requires types to have the same width</li>
+  <li>docs: drop Andres Gomez from the release cycles</li>
+  <li>glsl/linker: always validate explicit locations for first and last interfaces</li>
+  <li>docs/relnotes: add support for VK_KHR_shader_float16_int8</li>
+  <li>glsl/linker: check for xfb_offset aliasing</li>
+</ul>
+
+<p>Andrii Simiklit (5):</p>
+<ul>
+  <li>i965: consider a 'base level' when calculating width0, height0, depth0</li>
+  <li>i965: re-emit index buffer state on a reset option change.</li>
+  <li>util: clean the 24-bit unused field to avoid an issues</li>
+  <li>iris: make the TFB result visible to others</li>
+  <li>egl: return correct error code for a case req ver &lt; 3 with forward-compatible</li>
+</ul>
+
+<p>Antia Puentes (1):</p>
+<ul>
+  <li>nir/linker: Fix TRANSFORM_FEEDBACK_BUFFER_INDEX</li>
+</ul>
+
+<p>Anuj Phogat (7):</p>
+<ul>
+  <li>i965/icl: Add WA_2204188704 to disable pixel shader panic dispatch</li>
+  <li>anv/icl: Add WA_2204188704 to disable pixel shader panic dispatch</li>
+  <li>intel: Add Elkhart Lake device info</li>
+  <li>intel: Add Elkhart Lake PCI-IDs</li>
+  <li>iris/icl: Set Enabled Texel Offset Precision Fix bit</li>
+  <li>iris/icl: Add WA_2204188704 to disable pixel shader panic dispatch</li>
+  <li>intel: Add support for Comet Lake</li>
+</ul>
+
+<p>Axel Davy (49):</p>
+<ul>
+  <li>st/nine: Ignore window size if error</li>
+  <li>st/nine: Ignore multisample quality level if no ms</li>
+  <li>st/nine: Disable depth write when nothing gets updated</li>
+  <li>st/nine: Do not advertise support for D15S1 and D24X4S4</li>
+  <li>st/nine: Do not advertise CANMANAGERESOURCE</li>
+  <li>st/nine: Change a few advertised caps</li>
+  <li>Revert "d3dadapter9: Support software renderer on any DRI device"</li>
+  <li>st/nine: Fix D3DWindowBuffer_release for old wine nine support</li>
+  <li>st/nine: Use FLT_MAX/2 for RCP clamping</li>
+  <li>st/nine: Upload managed textures only at draw using them</li>
+  <li>st/nine: Upload managed buffers only at draw using them</li>
+  <li>st/nine: Fix buffer/texture unbinding in nine_state_clear</li>
+  <li>st/nine: Finish if nooverwrite after normal mapping</li>
+  <li>st/nine: Always return OK on SetSoftwareVertexProcessing</li>
+  <li>st/nine: Enable modifiers on ps 1.X texcoords</li>
+  <li>st/nine: Ignore nooverwrite for systemmem</li>
+  <li>st/nine: Fix SINCOS input</li>
+  <li>st/nine: Optimize surface upload with conversion</li>
+  <li>st/nine: Optimize volume upload with conversion</li>
+  <li>st/nine: rename *_conversion to *_internal</li>
+  <li>st/nine: Refactor surface GetSystemMemPointer</li>
+  <li>st/nine: Refactor volume GetSystemMemPointer</li>
+  <li>st/nine: Support internal compressed format for surfaces</li>
+  <li>st/nine: Support internal compressed format for volumes</li>
+  <li>st/nine: Add drirc option to use data_internal for dynamic textures</li>
+  <li>drirc: Add Gallium nine workaround for Rayman Legends</li>
+  <li>st/nine: Recompile optimized shaders based on b/i consts</li>
+  <li>st/nine: Control shader constant inlining with drirc</li>
+  <li>st/nine: Regroup param-&gt;rel tests</li>
+  <li>st/nine: Refactor param-&gt;rel</li>
+  <li>st/nine: Compact nine_ff_get_projected_key</li>
+  <li>st/nine: Compact pixel shader key</li>
+  <li>st/nine: use helper ureg_DECL_sampler everywhere</li>
+  <li>st/nine: Manually upload vs and ps constants</li>
+  <li>st/nine: Refactor shader constants ureg_src computation</li>
+  <li>st/nine: Make swvp_on imply IS_VS</li>
+  <li>st/nine: Refactor ct_ctor</li>
+  <li>st/nine: Track constant slots used</li>
+  <li>st/nine: Refactor counting of constants</li>
+  <li>st/nine: Prepare constant compaction in nine_shader</li>
+  <li>st/nine: Propagate const_range to context</li>
+  <li>st/nine: Cache constant buffer size</li>
+  <li>st/nine: Handle const_ranges in nine_state</li>
+  <li>st/nine: Enable computing const_ranges</li>
+  <li>st/nine: Use TGSI_SEMANTIC_GENERIC for fog</li>
+  <li>st/nine: Optimize a bit writeonly buffers</li>
+  <li>st/nine: Throttle rendering similarly for thread_submit</li>
+  <li>st/nine: Check discard_delayed_release is set before allocating more</li>
+  <li>d3dadapter9: Revert to old throttling limit value</li>
+</ul>
+
+<p>Bart Oldeman (1):</p>
+<ul>
+  <li>gallium-xlib: query MIT-SHM before using it.</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (105):</p>
+<ul>
+  <li>radv: Only look at pImmutableSamples if the descriptor has a sampler.</li>
+  <li>amd/common: Add gep helper for pointer increment.</li>
+  <li>amd/common: Implement ptr-&gt;int casts in ac_to_integer.</li>
+  <li>radv: Fix the shader info pass for not having the variable.</li>
+  <li>amd/common: Use correct writemask for shared memory stores.</li>
+  <li>amd/common: Fix stores to derefs with unknown variable.</li>
+  <li>amd/common: Handle nir_deref_type_ptr_as_array for shared memory.</li>
+  <li>amd/common: handle nir_deref_cast for shared memory from integers.</li>
+  <li>amd/common: Do not use 32-bit loads for shared memory.</li>
+  <li>amd/common: Implement global memory accesses.</li>
+  <li>radv: Do not use the bo list for local buffers.</li>
+  <li>radv: Implement VK_EXT_buffer_device_address.</li>
+  <li>radv: Use correct num formats to detect whether we should be use 1.0 or 1.</li>
+  <li>radv: Sync ETC2 whitelisted devices.</li>
+  <li>radv: Clean up a bunch of compiler warnings.</li>
+  <li>radv: Handle clip+cull distances more generally as compact arrays.</li>
+  <li>radv: Implement VK_EXT_depth_clip_enable.</li>
+  <li>radv: Disable depth clamping even without EXT_depth_range_unrestricted.</li>
+  <li>radv: Fix float16 interpolation set up.</li>
+  <li>radv: Allow interpolation on non-float types.</li>
+  <li>radv: Interpolate less aggressively.</li>
+  <li>turnip: Add driver skeleton (v2)</li>
+  <li>turnip: Fix up detection of device.</li>
+  <li>turnip: Gather some device info.</li>
+  <li>turnip: Remove abort.</li>
+  <li>turnip: Fix newly introduced warning.</li>
+  <li>turnip: Add buffer allocation &amp; mapping support.</li>
+  <li>turnip: Report a memory type and heap.</li>
+  <li>turnip: Cargo cult the Intel heap size functionality.</li>
+  <li>turnip: Initialize memory type in requirements.</li>
+  <li>turnip: Disable more features.</li>
+  <li>turnip: Add 630 to the list.</li>
+  <li>turnip: Fix bo allocation after we stopped using libdrm_freedreno ...</li>
+  <li>turnip: Fix memory mapping.</li>
+  <li>turnip: Add image layout calculations.</li>
+  <li>turnip: Stop hardcoding the msm version check.</li>
+  <li>turnip: move tu_gem.c to tu_drm.c</li>
+  <li>turnip: Implement pipe-less param query.</li>
+  <li>turnip: Implement some format properties for RGBA8.</li>
+  <li>turnip: Remove some radv leftovers.</li>
+  <li>turnip: clean up TODO.</li>
+  <li>turnip: Implement some UUIDs.</li>
+  <li>turnip: Implement a slow bo list</li>
+  <li>turnip: Add a command stream.</li>
+  <li>turnip: Add msm queue support.</li>
+  <li>turnip: Make bo_list functions not static</li>
+  <li>turnip: Implement submission.</li>
+  <li>turnip: Fill command buffer</li>
+  <li>turnip: Shorten primary_cmd_stream name.</li>
+  <li>turnip: Add emit functions in a header.</li>
+  <li>turnip: Move stream functions to tu_cs.c</li>
+  <li>turnip: Add buffer memory binding.</li>
+  <li>turnip: Make tu6_emit_event_write shared.</li>
+  <li>turnip: Add tu6_rb_fmt_to_ifmt.</li>
+  <li>turnip: Implement buffer-&gt;buffer DMA copies.</li>
+  <li>turnip: Add image-&gt;buffer DMA copies.</li>
+  <li>turnip: Add buffer-&gt;image DMA copies.</li>
+  <li>turnip: Add todo for copies.</li>
+  <li>turnip: Fix GCC compiles.</li>
+  <li>turnip: Deconflict vk_format_table regeneration</li>
+  <li>gitlab-ci: Build turnip.</li>
+  <li>radeonsi: Remove implicit const cast.</li>
+  <li>radv: Allow fast clears with concurrent queue mask for some layouts.</li>
+  <li>vulkan/util: Handle enums that are in platform-specific headers.</li>
+  <li>vulkan: Update the XML and headers to 1.1.104</li>
+  <li>radv: Implement VK_EXT_host_query_reset.</li>
+  <li>radv: Use correct image view comparison for fast clears.</li>
+  <li>radv: Implement VK_EXT_pipeline_creation_feedback.</li>
+  <li>ac/nir: Return frag_coord as integer.</li>
+  <li>nir: Add access qualifiers on load_ubo intrinsic.</li>
+  <li>radv: Add non-uniform indexing lowering.</li>
+  <li>radv: Add bolist RADV_PERFTEST flag.</li>
+  <li>ac: Move has_local_buffers disable to radeonsi.</li>
+  <li>radv: Use local buffers for the global bo list.</li>
+  <li>radv: Support VK_EXT_inline_uniform_block.</li>
+  <li>radv: Add support for driconf.</li>
+  <li>vulkan/wsi: Add X11 adaptive sync support based on dri options.</li>
+  <li>radv: Add adaptive_sync driconfig option and enable it by default.</li>
+  <li>radv: Add logic for subsampled format descriptions.</li>
+  <li>radv: Add logic for multisample format descriptions.</li>
+  <li>radv: Add multiple planes to images.</li>
+  <li>radv: Add single plane image views &amp; meta operations.</li>
+  <li>radv: Support different source &amp; dest aspects for planar images in blit2d.</li>
+  <li>radv: Add ycbcr conversion structs.</li>
+  <li>radv: Add support for image views with multiple planes.</li>
+  <li>radv: Allow mixed src/dst aspects in copies.</li>
+  <li>ac/nir: Add support for planes.</li>
+  <li>radv: Add ycbcr samplers in descriptor set layouts.</li>
+  <li>radv: Update descriptor sets for multiple planes.</li>
+  <li>radv: Add ycbcr lowering pass.</li>
+  <li>radv: Run the new ycbcr lowering pass.</li>
+  <li>radv: Add hashing for the ycbcr samplers.</li>
+  <li>radv: Add ycbcr format features.</li>
+  <li>radv: Add ycbcr subsampled &amp; multiplane formats to csv.</li>
+  <li>radv: Enable YCBCR conversion feature.</li>
+  <li>radv: Expose VK_EXT_ycbcr_image_arrays.</li>
+  <li>radv: Expose Vulkan 1.1 for Android.</li>
+  <li>radv: Fix hang width YCBCR array textures.</li>
+  <li>radv: Set is_array in lowered ycbcr tex instructions.</li>
+  <li>radv: Restrict YUVY formats to 1 layer.</li>
+  <li>radv: Disable subsampled formats.</li>
+  <li>radv: Implement cosited_even sampling.</li>
+  <li>radv: Do not use extra descriptor space for the 3rd plane.</li>
+  <li>nir: Actually propagate progress in nir_opt_move_load_ubo.</li>
+  <li>radv: Prevent out of bound shift on 32-bit builds.</li>
+</ul>
+
+<p>Benjamin Gordon (1):</p>
+<ul>
+  <li>configure.ac/meson.build: Add options for library suffixes</li>
+</ul>
+
+<p>Benjamin Tissoires (1):</p>
+<ul>
+  <li>CI: use wayland ci-templates repo to create the base image</li>
+</ul>
+
+<p>Boyan Ding (3):</p>
+<ul>
+  <li>gk110/ir: Add rcp f64 implementation</li>
+  <li>gk110/ir: Add rsq f64 implementation</li>
+  <li>gk110/ir: Use the new rcp/rsq in library</li>
+</ul>
+
+<p>Boyuan Zhang (1):</p>
+<ul>
+  <li>st/va: reverse qt matrix back to its original order</li>
+</ul>
+
+<p>Brian Paul (51):</p>
+<ul>
+  <li>st/mesa: whitespace/formatting fixes in st_cb_texture.c</li>
+  <li>svga: assorted whitespace and formatting fixes</li>
+  <li>svga: fix dma.pending &gt; 0 test</li>
+  <li>mesa: fix display list corner case assertion</li>
+  <li>st/mesa: whitespace fixes in st_sampler_view.c</li>
+  <li>st/mesa: line wrapping, whitespace fixes in st_cb_texture.c</li>
+  <li>st/mesa: whitespace fixes in st_texture.h</li>
+  <li>svga: init fill variable to avoid compiler warning</li>
+  <li>svga: silence array out of bounds warning</li>
+  <li>st/wgl: init a variable to silence MinGW warning</li>
+  <li>gallium/util: whitespace cleanups in u_bitmask.[ch]</li>
+  <li>gallium/util: add some const qualifiers in u_bitmask.c</li>
+  <li>pipebuffer: use new pb_usage_flags enum type</li>
+  <li>pipebuffer: whitespace fixes in pb_buffer.h</li>
+  <li>winsys/svga: use new pb_usage_flags enum type</li>
+  <li>st/mesa: move, clean-up shader variant key decls/inits</li>
+  <li>st/mesa: whitespace, formatting fixes in st_cb_flush.c</li>
+  <li>svga: refactor draw_vgpu10() function</li>
+  <li>svga: remove SVGA_RELOC_READ flag in SVGA3D_BindGBSurface()</li>
+  <li>pipebuffer: s/PB_ALL_USAGE_FLAGS/PB_USAGE_ALL/</li>
+  <li>st/mesa: init hash keys with memset(), not designated initializers</li>
+  <li>intel/decoders: silence uninitialized variable warnings in gen_print_batch()</li>
+  <li>intel/compiler: silence unitialized variable warning in opt_vector_float()</li>
+  <li>st/mesa: move utility functions, macros into new st_util.h file</li>
+  <li>st/mesa: move around some code in st_context.c</li>
+  <li>st/mesa: add/improve sampler view comments</li>
+  <li>st/mesa: rename st_texture_release_sampler_view()</li>
+  <li>st/mesa: minor refactoring of texture/sampler delete code</li>
+  <li>docs: try to improve the Meson documentation (v2)</li>
+  <li>drisw: fix incomplete type compilation failure</li>
+  <li>gallium/winsys/kms: fix incomplete type compilation failure</li>
+  <li>nir: silence a couple new compiler warnings</li>
+  <li>docs: separate information for compiler selection and compiler options</li>
+  <li>docs: link to the meson_options.txt file gitlab.freedesktop.org</li>
+  <li>st/mesa: implement "zombie" sampler views (v2)</li>
+  <li>st/mesa: implement "zombie" shaders list</li>
+  <li>st/mesa: stop using pipe_sampler_view_release()</li>
+  <li>svga: stop using pipe_sampler_view_release()</li>
+  <li>llvmpipe: stop using pipe_sampler_view_release()</li>
+  <li>swr: remove call to pipe_sampler_view_release()</li>
+  <li>i915g: remove calls to pipe_sampler_view_release()</li>
+  <li>gallium/util: remove pipe_sampler_view_release()</li>
+  <li>nir: fix a few signed/unsigned comparison warnings</li>
+  <li>st/mesa: fix texture deletion context mix-up issues (v2)</li>
+  <li>nir: use {0} initializer instead of {} to fix MSVC build</li>
+  <li>util: no-op __builtin_types_compatible_p() for non-GCC compilers</li>
+  <li>docs: s/Aptril/April/</li>
+  <li>llvmpipe: init some vars to NULL to silence MinGW compiler warnings</li>
+  <li>glsl: work around MinGW 7.x compiler bug</li>
+  <li>svga: add SVGA_NO_LOGGING env var (v2)</li>
+  <li>glsl: fix typo in #warning message</li>
+</ul>
+
+<p>Caio Marcelo de Oliveira Filho (61):</p>
+<ul>
+  <li>nir: keep the phi order when splitting blocks</li>
+  <li>i965: skip bit6 swizzle detection in Gen8+</li>
+  <li>anv: skip bit6 swizzle detection in Gen8+</li>
+  <li>isl: assert that Gen8+ don't have bit6_swizzling</li>
+  <li>intel/compiler: use 0 as sampler in emit_mcs_fetch</li>
+  <li>nir: fix example in opt_peel_loop_initial_if description</li>
+  <li>iris: Fix uses of gl_TessLevel*</li>
+  <li>iris: Add support for TCS passthrough</li>
+  <li>iris: always include an extra constbuf0 if using UBOs</li>
+  <li>nir/copy_prop_vars: don't get confused by array_deref of vectors</li>
+  <li>nir/copy_prop_vars: add debug helpers</li>
+  <li>nir/copy_prop_vars: keep track of components in copy_entry</li>
+  <li>nir/copy_prop_vars: change test helper to get intrinsics</li>
+  <li>nir: nir_build_deref_follower accept array derefs of vectors</li>
+  <li>nir/copy_prop_vars: add tests for load/store elements of vectors</li>
+  <li>nir: fix MSVC build</li>
+  <li>st/nir: count num_uniforms for FS bultin shader</li>
+  <li>nir/copy_prop_vars: rename/refactor store_to_entry helper</li>
+  <li>nir/copy_prop_vars: use NIR_MAX_VEC_COMPONENTS</li>
+  <li>nir/copy_prop_vars: handle load/store of vector elements</li>
+  <li>nir/copy_prop_vars: add tests for indirect array deref</li>
+  <li>nir/copy_prop_vars: prefer using entries from equal derefs</li>
+  <li>nir/copy_prop_vars: handle indirect vector elements</li>
+  <li>anv: Implement VK_EXT_external_memory_host</li>
+  <li>nir: Add a pass to combine store_derefs to same vector</li>
+  <li>intel/nir: Combine store_derefs after vectorizing IO</li>
+  <li>intel/nir: Combine store_derefs to improve code from SPIR-V</li>
+  <li>nir: Handle array-deref-of-vector case in loop analysis</li>
+  <li>spirv: Add an execution environment to the options</li>
+  <li>intel/compiler: handle GLSL_TYPE_INTERFACE as GLSL_TYPE_STRUCT</li>
+  <li>spirv: Use interface type for block and buffer block</li>
+  <li>iris: Clean up compiler warnings about unused</li>
+  <li>nir: Take if_uses into account when repairing SSA</li>
+  <li>mesa: Extension boilerplate for NV_compute_shader_derivatives</li>
+  <li>glsl: Remove redundant conditions when asserting in_qualifier</li>
+  <li>glsl: Enable derivative builtins for NV_compute_shader_derivatives</li>
+  <li>glsl: Enable texture builtins for NV_compute_shader_derivatives</li>
+  <li>glsl: Parse and propagate derivative_group to shader_info</li>
+  <li>nir/algebraic: Lower CS derivatives to zero when no group defined</li>
+  <li>nir: Don't set LOD=0 for compute shader that has derivative group</li>
+  <li>intel/fs: Use TEX_LOGICAL whenever implicit lod is supported</li>
+  <li>intel/fs: Add support for CS to group invocations in quads</li>
+  <li>intel/fs: Don't loop when lowering CS intrinsics</li>
+  <li>intel/fs: Use NIR_PASS_V when lowering CS intrinsics</li>
+  <li>i965: Advertise NV_compute_shader_derivatives</li>
+  <li>gallium: Add PIPE_CAP_COMPUTE_SHADER_DERIVATIVES</li>
+  <li>iris: Enable NV_compute_shader_derivatives</li>
+  <li>spirv: Add support for DerivativeGroup capabilities</li>
+  <li>anv: Implement VK_NV_compute_shader_derivatives</li>
+  <li>docs: Add NV_compute_shader_derivatives to 19.1.0 relnotes</li>
+  <li>spirv: Add more to_string helpers</li>
+  <li>spirv: Tell which opcode or value is unhandled when failing</li>
+  <li>spirv: Rename vtn_decoration literals to operands</li>
+  <li>spirv: Handle SpvOpDecorateId</li>
+  <li>nir: Add option to lower tex to txl when shader don't support implicit LOD</li>
+  <li>intel/fs: Don't handle texop_tex for shaders without implicit LOD</li>
+  <li>spirv: Properly handle SpvOpAtomicCompareExchangeWeak</li>
+  <li>intel/fs: Assert when brw_fs_nir sees a nir_deref_instr</li>
+  <li>anv: Fix limits when VK_EXT_descriptor_indexing is used</li>
+  <li>nir: Fix nir_opt_idiv_const when negatives are involved</li>
+  <li>nir: Fix clone of nir_variable state slots</li>
+</ul>
+
+<p>Carlos Garnacho (1):</p>
+<ul>
+  <li>wayland/egl: Ensure EGL surface is resized on DRI update_buffers()</li>
+</ul>
+
+<p>Chad Versace (17):</p>
+<ul>
+  <li>turnip: Drop Makefile.am and Android.mk</li>
+  <li>turnip: Fix indentation in function signatures</li>
+  <li>turnip: Fix result of vkEnumerate*LayerProperties</li>
+  <li>turnip: Fix result of vkEnumerate*ExtensionProperties</li>
+  <li>turnip: Use vk_outarray in all relevant public functions</li>
+  <li>turnip: Fix a real -Wmaybe-uninitialized</li>
+  <li>turnip: Fix indentation</li>
+  <li>turnip: Require DRM device version &gt;= 1.3</li>
+  <li>turnip: Add TODO for Android logging</li>
+  <li>turnip: Use vk_errorf() for initialization error messages</li>
+  <li>turnip: Replace fd_bo with tu_bo</li>
+  <li>turnip: Add TODO file</li>
+  <li>turnip: Fix 'unused' warnings</li>
+  <li>turnip: Don't return from tu_stub funcs</li>
+  <li>turnip: Annotate vkGetImageSubresourceLayout with tu_stub</li>
+  <li>turnip: Fix error behavior for VkPhysicalDeviceExternalImageFormatInfo</li>
+  <li>turnip: Use Vulkan 1.1 names instead of KHR</li>
+</ul>
+
+<p>Charmaine Lee (5):</p>
+<ul>
+  <li>svga: add svga shader type in the shader variant</li>
+  <li>svga: move host logging to winsys</li>
+  <li>st/mesa: purge framebuffers with current context after unbinding winsys buffers</li>
+  <li>mesa: unreference current winsys buffers when unbinding winsys buffers</li>
+  <li>svga: Remove unnecessary check for the pre flush bit for setting vertex buffers</li>
+</ul>
+
+<p>Chenglei Ren (1):</p>
+<ul>
+  <li>anv/android: fix missing dependencies issue during parallel build</li>
+</ul>
+
+<p>Chia-I Wu (78):</p>
+<ul>
+  <li>egl: fix KHR_partial_update without EXT_buffer_age</li>
+  <li>turnip: add .clang-format</li>
+  <li>turnip: use msm_drm.h from inc_freedreno</li>
+  <li>turnip: remove unnecessary libfreedreno_drm dep</li>
+  <li>turnip: add wrappers around DRM_MSM_GET_PARAM</li>
+  <li>turnip: add wrappers around DRM_MSM_SUBMITQUEUE_*</li>
+  <li>turnip: constify tu_device in tu_gem_*</li>
+  <li>turnip: preliminary support for tu_QueueWaitIdle</li>
+  <li>turnip: run sed and clang-format on tu_cs</li>
+  <li>turnip: document tu_cs</li>
+  <li>turnip: add tu_cs_add_bo</li>
+  <li>turnip: minor cleanup to tu_cs_end</li>
+  <li>turnip: update cs-&gt;start in tu_cs_end</li>
+  <li>turnip: inline tu_cs_check_space</li>
+  <li>turnip: add more tu_cs helpers</li>
+  <li>turnip: build drm_msm_gem_submit_bo array directly</li>
+  <li>turnip: add tu_bo_list_merge</li>
+  <li>turnip: add cmdbuf-&gt;bo_list to bo_list in queue submit</li>
+  <li>turnip: preliminary support for tu_BindImageMemory2</li>
+  <li>turnip: preliminary support for tu_image_view_init</li>
+  <li>turnip: preliminary support for tu_CmdBeginRenderPass</li>
+  <li>turnip: add tu_cs_reserve_space(_assert)</li>
+  <li>turnip: emit HW init in tu_BeginCommandBuffer</li>
+  <li>turnip: preliminary support for tu_GetRenderAreaGranularity</li>
+  <li>turnip: add tu_tiling_config</li>
+  <li>turnip: add internal helpers for tu_cs</li>
+  <li>turnip: add tu_cs_{reserve,add}_entry</li>
+  <li>turnip: specify initial size in tu_cs_init</li>
+  <li>turnip: never fail tu_cs_begin/tu_cs_end</li>
+  <li>turnip: add tu_cs_sanity_check</li>
+  <li>turnip: provide both emit_ib and emit_call</li>
+  <li>turnip: add tu_cs_mode</li>
+  <li>turnip: add TU_CS_MODE_SUB_STREAM</li>
+  <li>turnip: preliminary support for loadOp and storeOp</li>
+  <li>turnip: add a more complete format table</li>
+  <li>turnip: add functions to import/export prime fd</li>
+  <li>turnip: advertise VK_KHR_external_memory_capabilities</li>
+  <li>turnip: advertise VK_KHR_external_memory</li>
+  <li>turnip: add support for VK_KHR_external_memory_{fd,dma_buf}</li>
+  <li>turnip: fix VkClearValue packing</li>
+  <li>turnip: preliminary support for fences</li>
+  <li>turnip: respect color attachment formats</li>
+  <li>turnip: mark IBs for dumping</li>
+  <li>turnip: use 32-bit offset in tu_cs_entry</li>
+  <li>turnip: more/better asserts for tu_cs</li>
+  <li>turnip: add tu_cs_discard_entries</li>
+  <li>turnip: tu_cs_emit_array</li>
+  <li>turnip: fix tu_cs sub-streams</li>
+  <li>turnip: simplify tu_cs sub-streams usage</li>
+  <li>turnip: create a less dummy pipeline</li>
+  <li>turnip: parse VkPipelineDynamicStateCreateInfo</li>
+  <li>turnip: parse VkPipelineInputAssemblyStateCreateInfo</li>
+  <li>turnip: parse VkPipelineViewportStateCreateInfo</li>
+  <li>turnip: parse VkPipelineRasterizationStateCreateInfo</li>
+  <li>turnip: parse VkPipelineDepthStencilStateCreateInfo</li>
+  <li>turnip: parse VkPipeline{Multisample,ColorBlend}StateCreateInfo</li>
+  <li>turnip: preliminary support for shader modules</li>
+  <li>turnip: compile VkPipelineShaderStageCreateInfo</li>
+  <li>turnip: parse VkPipelineShaderStageCreateInfo</li>
+  <li>turnip: parse VkPipelineVertexInputStateCreateInfo</li>
+  <li>turnip: add draw_cs to tu_cmd_buffer</li>
+  <li>turnip: preliminary support for draw state binding</li>
+  <li>turnip: preliminary support for tu_CmdDraw</li>
+  <li>turnip: guard -Dvulkan-driver=freedreno</li>
+  <li>turnip: preliminary support for tu_GetImageSubresourceLayout</li>
+  <li>turnip: preliminary support for Wayland WSI</li>
+  <li>vulkan/wsi: move modifier array into wsi_wl_swapchain</li>
+  <li>vulkan/wsi: create wl_drm wrapper as needed</li>
+  <li>vulkan/wsi: refactor drm_handle_format</li>
+  <li>vulkan/wsi: add wsi_wl_display_drm</li>
+  <li>vulkan/wsi: add wsi_wl_display_dmabuf</li>
+  <li>vulkan/wsi: make wl_drm optional</li>
+  <li>virgl: handle fence_server_sync in winsys</li>
+  <li>virgl: hide fence internals from the driver</li>
+  <li>virgl: introduce virgl_drm_fence</li>
+  <li>virgl: fix fence fd version check</li>
+  <li>virgl: clear vertex_array_dirty</li>
+  <li>virgl: skip empty cmdbufs</li>
+</ul>
+
+<p>Chris Forbes (3):</p>
+<ul>
+  <li>glsl: add scaffolding for EXT_gpu_shader4</li>
+  <li>glsl: enable noperspective|flat|centroid for EXT_gpu_shader4</li>
+  <li>glsl: enable types for EXT_gpu_shader4</li>
+</ul>
+
+<p>Chris Wilson (19):</p>
+<ul>
+  <li>i965: Assert the execobject handles match for this device</li>
+  <li>iris: fix import from dri2/3</li>
+  <li>iris: IndexFormat = size/2</li>
+  <li>iris: Set resource modifier on handle</li>
+  <li>iris: Wrap userptr for creating bo</li>
+  <li>iris: AMD_pinned_memory</li>
+  <li>iris: Record reusability of bo on construction</li>
+  <li>iris: fix memzone_for_address since multibinder changes</li>
+  <li>iris: Tidy exporting the flink handle</li>
+  <li>iris: Fix assigning the output handle for exporting for KMS</li>
+  <li>iris: Merge two walks of the exec_bos list</li>
+  <li>iris: Tag each submitted batch with a syncobj</li>
+  <li>iris: Add fence support using drm_syncobj</li>
+  <li>iris: Wire up EGL_IMG_context_priority</li>
+  <li>iris: Use PIPE_BUFFER_STAGING for the query objects</li>
+  <li>iris: Use coherent allocation for PIPE_RESOURCE_STAGING</li>
+  <li>iris: Use streaming loads to read from tiled surfaces</li>
+  <li>iris: Push heavy memchecker code to DEBUG</li>
+  <li>iris: Adapt to variable ppGTT size</li>
+</ul>
+
+<p>Christian Gmeiner (12):</p>
+<ul>
+  <li>etnaviv: rs: mark used src resource as read from</li>
+  <li>etnaviv: blt: mark used src resource as read from</li>
+  <li>etnaviv: implement ETC2 block patching for HALTI0</li>
+  <li>etnaviv: keep track of mapped bo address</li>
+  <li>etnaviv: hook-up etc2 patching</li>
+  <li>etnaviv: enable ETC2 texture compression support for HALTI0 GPUs</li>
+  <li>etnaviv: fix resource usage tracking across different pipe_context's</li>
+  <li>etnaviv: fix compile warnings</li>
+  <li>st/dri: allow direct UYVY import</li>
+  <li>etnaviv: shrink struct etna_3d_state</li>
+  <li>nir: add lower_ftrunc</li>
+  <li>etnaviv: use the correct uniform dirty bits</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+  <li>meson: Fix missing glproto dependency for gallium-glx</li>
+</ul>
+
+<p>Connor Abbott (6):</p>
+<ul>
+  <li>nir/serialize: Prevent writing uninitialized state_slot data</li>
+  <li>nir: Add a stripping pass for improved cacheability</li>
+  <li>radeonsi/nir: Use nir stripping pass</li>
+  <li>nir/search: Add automaton-based pre-searching</li>
+  <li>nir/search: Add debugging code to dump the pattern matched</li>
+  <li>nir/algebraic: Don't emit empty initializers for MSVC</li>
+</ul>
+
+<p>Daniel Schürmann (2):</p>
+<ul>
+  <li>nir: Define shifts according to SM5 specification.</li>
+  <li>nir: Use SM5 properties to optimize shift(a@32, iand(31, b))</li>
+</ul>
+
+<p>Daniel Stone (2):</p>
+<ul>
+  <li>panfrost: Properly align stride</li>
+  <li>vulkan/wsi/wayland: Respect non-blocking AcquireNextImage</li>
+</ul>
+
+<p>Danylo Piliaiev (13):</p>
+<ul>
+  <li>anv: Handle VK_ATTACHMENT_UNUSED in colorAttachment</li>
+  <li>radv: Handle VK_ATTACHMENT_UNUSED in CmdClearAttachment</li>
+  <li>anv: Fix VK_EXT_transform_feedback working with varyings packed in PSIZ</li>
+  <li>anv: Fix destroying descriptor sets when pool gets reset</li>
+  <li>anv: Treat zero size XFB buffer as disabled</li>
+  <li>glsl: Cross validate variable's invariance by explicit invariance only</li>
+  <li>i965,iris,anv: Make alpha to coverage work with sample mask</li>
+  <li>intel/fs: Make alpha test work with MRT and sample mask</li>
+  <li>st/mesa: Fix GL_MAP_COLOR with glDrawPixels GL_COLOR_INDEX</li>
+  <li>iris: Fix assert when using vertex attrib without buffer binding</li>
+  <li>intel/compiler: Do not reswizzle dst if instruction writes to flag register</li>
+  <li>drirc: Add workaround for Epic Games Launcher</li>
+  <li>anv: Do not emulate texture swizzle for INPUT_ATTACHMENT, STORAGE_IMAGE</li>
+</ul>
+
+<p>Dave Airlie (63):</p>
+<ul>
+  <li>virgl: enable elapsed time queries</li>
+  <li>virgl: ARB_query_buffer_object support</li>
+  <li>docs: update qbo support for virgl</li>
+  <li>glsl: glsl to nir fix uninit class member.</li>
+  <li>radv/llvm: initialise passes member.</li>
+  <li>radv: remove alloc parameter from pipeline init</li>
+  <li>iris: fix some hangs around null framebuffers</li>
+  <li>iris: fix crash in sparse vertex array</li>
+  <li>iris: add initial transform feedback overflow query paths (V3)</li>
+  <li>iris: fix cube texture view</li>
+  <li>iris: execute compute related query on compute batch.</li>
+  <li>iris: iris add load register reg32/64</li>
+  <li>iris: add conditional render support</li>
+  <li>iris: fix gpu calcs for timestamp queries</li>
+  <li>iris/WIP: add broadwell support</li>
+  <li>iris: limit gen8 to 8 samples</li>
+  <li>iris: setup gen8 caps</li>
+  <li>iris: add fs invocations query workaround for broadwell</li>
+  <li>iris: handle qbo fragment shader invocation workaround</li>
+  <li>st/mesa: add support for lowering fp64/int64 for nir drivers</li>
+  <li>softpipe: fix texture view crashes</li>
+  <li>nir/spirv: don't use bare types, remove assert in split vars for testing</li>
+  <li>nir/deref: remove casts of casts which are likely redundant (v3)</li>
+  <li>softpipe: fix 32-bit bitfield extract</li>
+  <li>softpipe: handle 32-bit bitfield inserts</li>
+  <li>softpipe: remove shadow_ref assert.</li>
+  <li>softpipe: fix integer texture swizzling for 1 vs 1.0f</li>
+  <li>nir/split_vars: fixup some more explicit_stride related issues.</li>
+  <li>draw: bail instead of assert on instance count (v2)</li>
+  <li>draw/gs: fix point size outputs from geometry shader.</li>
+  <li>draw/vs: partly fix basevertex/vertex id</li>
+  <li>softpipe: fix clears to only clear specified color buffers.</li>
+  <li>softpipe/draw: fix vertex id in soft paths.</li>
+  <li>softpipe: add indirect store buffer/image unit</li>
+  <li>nir/deref: fix struct wrapper casts. (v3)</li>
+  <li>nir: use proper array sizing define for vectors</li>
+  <li>intel/compiler: use defined size for vector components</li>
+  <li>iris: avoid use after free in shader destruction</li>
+  <li>ddebug: add compute functions to help hang detection</li>
+  <li>draw: add stream member to stats callback</li>
+  <li>tgsi: add support for geometry shader streams.</li>
+  <li>softpipe: add support for indexed queries.</li>
+  <li>draw: add support to tgsi paths for geometry streams. (v2)</li>
+  <li>softpipe: add support for vertex streams (v2)</li>
+  <li>virgl: add support for missing command buffer binding.</li>
+  <li>virgl: add support for ARB_multi_draw_indirect</li>
+  <li>virgl: add support for ARB_indirect_parameters</li>
+  <li>draw: fix undefined shift of (1 &lt;&lt; 31)</li>
+  <li>swrast: fix undefined shift of 1 &lt;&lt; 31</li>
+  <li>llvmpipe: fix undefined shift 1 &lt;&lt; 31.</li>
+  <li>virgl/drm: cleanup buffer from handle creation (v2)</li>
+  <li>virgl/drm: handle flink name better.</li>
+  <li>virgl/drm: insert correct handles into the table. (v3)</li>
+  <li>intel/compiler: fix uninit non-static variable. (v2)</li>
+  <li>nir: fix bit_size in lower indirect derefs.</li>
+  <li>r600: reset tex array override even when no view bound</li>
+  <li>spirv: fix SpvOpBitSize return value.</li>
+  <li>nir: fix lower vars to ssa for larger vector sizes.</li>
+  <li>util/tests: add basic unit tests for bitset</li>
+  <li>util/bitset: fix bitset range mask calculations.</li>
+  <li>kmsro: add _dri.so to two of the kmsro drivers.</li>
+  <li>glsl: init packed in more constructors.</li>
+  <li>Revert "mesa: unreference current winsys buffers when unbinding winsys buffers"</li>
+</ul>
+
+<p>David Riley (3):</p>
+<ul>
+  <li>virgl: Store mapped hw resource with transfer object.</li>
+  <li>virgl: Allow transfer queue entries to be found and extended.</li>
+  <li>virgl: Re-use and extend queue transfers for intersecting buffer subdatas.</li>
+</ul>
+
+<p>David Shao (1):</p>
+<ul>
+  <li>meson: ensure that xmlpool_options.h is generated for gallium targets that need it</li>
+</ul>
+
+<p>Deepak Rawat (2):</p>
+<ul>
+  <li>winsys/drm: Fix out of scope variable usage</li>
+  <li>winsys/svga/drm: Fix 32-bit RPCI send message</li>
+</ul>
+
+<p>Dominik Drees (1):</p>
+<ul>
+  <li>Add no_aos_sampling GALLIVM_PERF option</li>
+</ul>
+
+<p>Drew Davenport (1):</p>
+<ul>
+  <li>util: Don't block SIGSYS for new threads</li>
+</ul>
+
+<p>Dylan Baker (40):</p>
+<ul>
+  <li>bump version for 19.0 branch</li>
+  <li>docs: Add relnotes stub for 19.1</li>
+  <li>gallium: wrap u_screen in extern "C" for c++</li>
+  <li>automake: Add --enable-autotools to distcheck flags</li>
+  <li>android,autotools,i965: Fix location of float64_glsl.h</li>
+  <li>meson: remove build_by_default : true</li>
+  <li>meson: fix style in intel/tools</li>
+  <li>meson: remove -std=c++11 from intel/tools</li>
+  <li>get-pick-list: Add --pretty=medium to the arguments for Cc patches</li>
+  <li>meson: Add dependency on genxml to anvil</li>
+  <li>meson/iris: Use current coding style</li>
+  <li>docs: Add release notes for 19.0.0</li>
+  <li>docs: Add SHA256 sums for 19.0.0</li>
+  <li>docs: update calendar, add news item, and link release notes for 19.0.0</li>
+  <li>bin/install_megadrivers.py: Correctly handle DESTDIR=''</li>
+  <li>bin/install_megadrivers.py: Fix regression for set DESTDIR</li>
+  <li>docs: Add release notes for 19.0.1</li>
+  <li>docs: Add SHA256 sums for mesa 19.0.1</li>
+  <li>docs: update calendar, add news item and link release notes for 19.0.1</li>
+  <li>meson: Error if LLVM doesn't have rtti when building clover</li>
+  <li>meson: Error if LLVM is turned off but clover it turned on</li>
+  <li>docs: Add release notes for 19.0.2</li>
+  <li>docs: Add sha256 sums for 19.0.2</li>
+  <li>docs: update calendar, and news item and link release notes for 19.0.2</li>
+  <li>Delete autotools</li>
+  <li>docs: drop most autoconf references</li>
+  <li>ci: Delete autotools build jobs</li>
+  <li>docs: add relnotes for 19.0.3</li>
+  <li>docs: Add SHA256 sums for mesa 19.0.3</li>
+  <li>docs: update calendar, and news item and link release notes for 19.0.3</li>
+  <li>meson: always define libglapi</li>
+  <li>glsl: fix general_ir_test with mingw</li>
+  <li>meson: switch gles1 and gles2 to auto options</li>
+  <li>meson: Make shader-cache a trillean instead of boolean</li>
+  <li>meson: make nm binary optional</li>
+  <li>util/tests: Use define instead of VLA</li>
+  <li>glsl/tests: define ssize_t on windows</li>
+  <li>tests/vma: fix build with MSVC</li>
+  <li>meson: Don't build glsl cache_test when shader cache is disabled</li>
+  <li>meson: Force the use of config-tool for llvm</li>
+</ul>
+
+<p>Eduardo Lima Mitev (5):</p>
+<ul>
+  <li>freedreno/a6xx: Silence compiler warnings</li>
+  <li>nir: Add ir3-specific version of most SSBO intrinsics</li>
+  <li>ir3/nir: Add a new pass 'ir3_nir_lower_io_offsets'</li>
+  <li>ir3/compiler: Enable lower_io_offsets pass and handle new SSBO intrinsics</li>
+  <li>ir3/lower_io_offsets: Try propagate SSBO's SHR into a previous shift instruction</li>
+</ul>
+
+<p>El Christianito (1):</p>
+<ul>
+  <li>drirc: add Budgie WM to adaptive-sync blacklist</li>
+</ul>
+
+<p>Eleni Maria Stea (6):</p>
+<ul>
+  <li>i965: Faking the ETC2 compression on Gen &lt; 8 GPUs using two miptrees.</li>
+  <li>i965: Fixed the CopyImageSubData for ETC2 on Gen &lt; 8</li>
+  <li>i965: Enabled the OES_copy_image extension on Gen 7 GPUs</li>
+  <li>i965: Removed the field etc_format from the struct intel_mipmap_tree</li>
+  <li>i965: fixed clamping in set_scissor_bits when the y is flipped</li>
+  <li>radv: consider MESA_VK_VERSION_OVERRIDE when setting the api version</li>
+</ul>
+
+<p>Elie Tournier (3):</p>
+<ul>
+  <li>virgl: Add a caps to advertise GLES backend</li>
+  <li>virgl: Set PIPE_CAP_DOUBLES when running on GLES This is a lie but no known app use fp64.</li>
+  <li>virgl: Return an error if we use fp64 on top of GLES</li>
+</ul>
+
+<p>Emil Velikov (30):</p>
+<ul>
+  <li>vc4: Declare the last cpu pointer as being modified in NEON asm.</li>
+  <li>docs: add release notes for 18.3.3</li>
+  <li>docs: add sha256 checksums for 18.3.3</li>
+  <li>docs: update calendar, add news item and link release notes for 18.3.3</li>
+  <li>anv: wire up the state_pool_padding test</li>
+  <li>docs: add release notes for 18.3.4</li>
+  <li>docs: add sha256 checksums for 18.3.4</li>
+  <li>docs: update calendar, add news item and link release notes for 18.3.4</li>
+  <li>egl/dri: de-duplicate dri2_load_driver*</li>
+  <li>meson: egl: correctly manage loader/xmlconfig</li>
+  <li>loader: use loader_open_device() to handle O_CLOEXEC</li>
+  <li>egl/android: bump the number of drmDevices to 64</li>
+  <li>docs: mention "Allow commits from members who can merge..."</li>
+  <li>egl/sl: split out swrast probe into separate function</li>
+  <li>egl/sl: use drmDevice API to enumerate available devices</li>
+  <li>egl/sl: use kms_swrast with vgem instead of a random GPU</li>
+  <li>docs: add release notes for 18.3.5</li>
+  <li>docs: add sha256 checksums for 18.3.5</li>
+  <li>docs: update calendar, add news item and link release notes for 18.3.5</li>
+  <li>docs: add release notes for 18.3.6</li>
+  <li>docs: add sha256 checksums for 18.3.6</li>
+  <li>docs: update calendar, add news item and link release notes for 18.3.6</li>
+  <li>turnip: drop dead close(master_fd)</li>
+  <li>vulkan/wsi: check if the display_fd given is master</li>
+  <li>vulkan/wsi: don't use DUMB_CLOSE for normal GEM handles</li>
+  <li>llvmpipe: add lp_fence_timedwait() helper</li>
+  <li>llvmpipe: correctly handle waiting in llvmpipe_fence_finish</li>
+  <li>egl/dri: flesh out and use dri2_create_drawable()</li>
+  <li>mapi: add static_date offset to MaxShaderCompilerThreadsKHR</li>
+  <li>mapi: correctly handle the full offset table</li>
+</ul>
+
+<p>Emmanuel Gil Peyrot (1):</p>
+<ul>
+  <li>docs: make bugs.html easier to find</li>
+</ul>
+
+<p>Eric Anholt (121):</p>
+<ul>
+  <li>v3d: Always enable the NEON utile load/store code.</li>
+  <li>v3d: Fix a release build set-but-unused compiler warning.</li>
+  <li>mesa: Skip partial InvalidateFramebuffer of packed depth/stencil.</li>
+  <li>v3d: Fix image_load_store clamping of signed integer stores.</li>
+  <li>nir: Move V3D's "the shader was TGSI, ignore FS output types" flag to NIR.</li>
+  <li>v3d: Fix precompile of FRAG_RESULT_DATA1 and higher outputs.</li>
+  <li>v3d: Store the actual mask of color buffers present in the key.</li>
+  <li>v3d: Fix dumping of shaders with alpha test.</li>
+  <li>v3d: Fix pack/unpack of VFPACK operand unpacks.</li>
+  <li>v3d: Fix input packing of .l for rounding/fdx/fdy.</li>
+  <li>v3d: Fix copy-propagation of input unpacks.</li>
+  <li>v3d: Whitespace consistency fix.</li>
+  <li>nir: Move panfrost's isign lowering to nir_opt_algebraic.</li>
+  <li>v3d: Use the NIR lowering for isign instead of rolling our own.</li>
+  <li>intel: Use the NIR lowering for isign.</li>
+  <li>freedreno: Use the NIR lowering for isign.</li>
+  <li>v3d: Clear the GMP on initialization of the simulator.</li>
+  <li>v3d: Sync indirect draws on the last rendering.</li>
+  <li>v3d: Use the early_fragment_tests flag for the shader's disable-EZ field.</li>
+  <li>v3d: Fix incorrect flagging of ldtmu as writing r4 on v3d 4.x.</li>
+  <li>v3d: Drop a perf note about merging unpack_half_*, which has been implemented.</li>
+  <li>v3d: Drop our hand-lowered nir_op_ffract.</li>
+  <li>v3d: Add a helper function for getting a nop register.</li>
+  <li>v3d: Refactor bcsel and if condition handling.</li>
+  <li>v3d: Do bool-to-cond for discard_if as well.</li>
+  <li>v3d: Kill off vir_PF(), which is hard to use right.</li>
+  <li>v3d: Fix f2b32 behavior.</li>
+  <li>v3d: Fix the check for "is the last thrsw inside control flow"</li>
+  <li>v3d: Add a function to describe what the c-&gt;execute.file check means.</li>
+  <li>v3d: Stop tracking num_inputs for VPM loads.</li>
+  <li>v3d: Delay emitting ldvpm on V3D 4.x until it's actually used.</li>
+  <li>v3d: Emit a simpler negate for the iabs implementation.</li>
+  <li>v3d: Move i2b and f2b support into emit_comparison.</li>
+  <li>kmsro: Add the rest of the current set of tinydrm drivers.</li>
+  <li>nir: Just return when asked to rewrite uses of an SSA def to itself.</li>
+  <li>v3d: Fix vir_is_raw_mov() for input unpacks.</li>
+  <li>v3d: Dump the VIR after register spilling if we were forced to.</li>
+  <li>v3d: Rematerialize MOVs of uniforms instead of spilling them.</li>
+  <li>v3d: Fix build of NEON code with Mesa's cflags not targeting NEON.</li>
+  <li>v3d: Restrict live intervals to the blocks reachable from any def.</li>
+  <li>v3d: Stop treating exec masking specially.</li>
+  <li>nir: Improve printing of load_input/store_output variable names.</li>
+  <li>v3d: Translate f2i(fround_even) as FTOIN.</li>
+  <li>v3d: Move the stores for fixed function VS output reads into NIR.</li>
+  <li>v3d: Fix temporary leaks of temp_registers and when spilling.</li>
+  <li>v3d: Do uniform rematerialization spilling before dropping threadcount</li>
+  <li>v3d: Switch implicit uniforms over to being any qinst-&gt;uniform != ~0.</li>
+  <li>v3d: Add support for vir-to-qpu of ldunif instructions to a temp.</li>
+  <li>v3d: Drop the old class bits splitting up the accumulators.</li>
+  <li>v3d: Add support for register-allocating a ldunif to a QFILE_TEMP.</li>
+  <li>v3d: Use ldunif instructions for uniforms.</li>
+  <li>v3d: Eliminate the TLB and TLBU files.</li>
+  <li>v3d: Drop the V3D 3.x vpm read dead code elimination.</li>
+  <li>v3d: Include a count of register pressure in the RA failure dumps.</li>
+  <li>st/dri: Set the PIPE_BIND_SHARED flag on create_image_with_modifiers.</li>
+  <li>util: Add a DAG datastructure.</li>
+  <li>vc4: Switch over to using the DAG datastructure for QIR scheduling.</li>
+  <li>v3d: Reuse list_for_each_entry_rev().</li>
+  <li>vc4: Reuse list_for_each_entry_rev().</li>
+  <li>v3d: Use the DAG datastructure for QPU instruction scheduling.</li>
+  <li>vc4: Switch the post-RA scheduler over to the DAG datastructure.</li>
+  <li>v3d: Disable PIPE_CAP_BLIT_BASED_TEXTURE_TRANSFER.</li>
+  <li>v3d: Fix leak of the mem_ctx after the DAG refactor.</li>
+  <li>v3d: Fix leak of the renderonly struct on screen destruction.</li>
+  <li>mesa/st: Make sure that prog_to_nir NIR gets freed.</li>
+  <li>mesa/st: Fix leaks of TGSI tokens in VP variants.</li>
+  <li>v3d: Always lay out shared tiled buffers with UIF_TOP set.</li>
+  <li>v3d: Allow the UIF modifier with renderonly.</li>
+  <li>v3d: Expose the dma-buf modifiers query.</li>
+  <li>v3d: Rename v3d_tmu_config_data to v3d_unit_data.</li>
+  <li>v3d: Move constant offsets to UBO addresses into the main uniform stream.</li>
+  <li>v3d: Upload all of UBO[0] if any indirect load occurs.</li>
+  <li>v3d: Remove some dead members of struct v3d_compile.</li>
+  <li>egl: Add a 565 pbuffer-only EGL config under X11.</li>
+  <li>dri3: Return the current swap interval from glXGetSwapIntervalMESA().</li>
+  <li>v3d: Add support for handling OOM signals from the simulator.</li>
+  <li>v3d: Bump the maximum texture size to 4k for V3D 4.x.</li>
+  <li>v3d: Don't try to use the TFU blit path if a scissor is enabled.</li>
+  <li>v3d: Add some more new packets for V3D 4.x.</li>
+  <li>st: Lower uniforms in st in the !PIPE_CAP_PACKED_UNIFORMS case as well.</li>
+  <li>vc4: Don't forget to set the range when scalarizing our uniforms.</li>
+  <li>vc4: Split UBO0 and UBO1 address uniform handling.</li>
+  <li>vc4: Upload CS/VS UBO uniforms together.</li>
+  <li>v3d: Add an optimization pass for redundant flags updates.</li>
+  <li>nir: Drop comments about the constant_index slots for load/stores.</li>
+  <li>nir: Drop remaining references to const_index in favor of the call to use.</li>
+  <li>nir: Add a comment about how intrinsic definitions work.</li>
+  <li>v3d: Add and use a define for the number of channels in a QPU invocation.</li>
+  <li>v3d: Drop a note for the future about PIPE_CAP_PACKED_UNIFORMS.</li>
+  <li>v3d: Include the number of max temps used in the shader-db output.</li>
+  <li>v3d: Replace the old shader-db env var output with the ARB_debug_output.</li>
+  <li>v3d: Add Compute Shader compilation support.</li>
+  <li>v3d: Add missing base offset to CS shared memory accesses.</li>
+  <li>v3d: Add missing dumping for the spill offset/size uniforms.</li>
+  <li>v3d: Detect the correct number of QPUs and use it to fix the spill size.</li>
+  <li>v3d: Use the new lower_to_scratch implementation for indirects on temps.</li>
+  <li>v3d: Only look up the 3rd texture gather offset for non-arrays.</li>
+  <li>v3d: Always set up the qregs for CSD payload.</li>
+  <li>v3d: Fix an invalid reuse of flags generation from before a thrsw.</li>
+  <li>v3d: Fix atomic cmpxchg in shaders on hardware.</li>
+  <li>nir: Fix deref offset calculation for structs.</li>
+  <li>nir: Use the nir_builder _imm helpers in setting up deref offsets.</li>
+  <li>gallium: Remove the pool pipebuffer manager.</li>
+  <li>gallium: Remove the ondemand pipebuffer manager.</li>
+  <li>gallium: Remove the "alt" pipebuffer manager interface.</li>
+  <li>gallium: Remove the malloc pipebuffer manager.</li>
+  <li>st/mesa: Don't set atomic counter size != 0 if MAX_SHADER_BUFFERS == 0.</li>
+  <li>v3d: Disable SSBOs and atomic counters on vertex shaders.</li>
+  <li>v3d: Fill in the ignored segment size fields to appease new simulator.</li>
+  <li>v3d: Apply the GFXH-930 workaround to the case where the VS loads attrs.</li>
+  <li>v3d: Assert that we do request the normal texturing return data.</li>
+  <li>v3d: Use _mesa_hash_table_remove_key() where appropriate.</li>
+  <li>vc4: Use _mesa_hash_table_remove_key() where appropriate.</li>
+  <li>v3d: Add a note about i/o indirection for future performance work.</li>
+  <li>v3d: Don't try to update the shadow texture for separate stencil.</li>
+  <li>Revert "v3d: Disable PIPE_CAP_BLIT_BASED_TEXTURE_TRANSFER."</li>
+  <li>v3d: Re-add support for memory_barrier_shared.</li>
+  <li>v3d: Fix detection of the last ldtmu before a new TMU op.</li>
+  <li>v3d: Fix detection of TMU write sequences in register spilling.</li>
+  <li>kmsro: Add support for V3D.</li>
+  <li>vc4: Fall back to renderonly if the vc4 driver doesn't have v3d.</li>
+</ul>
+
+<p>Eric Engestrom (142):</p>
+<ul>
+  <li>wsi/display: add comment</li>
+  <li>egl: use coherent variable names</li>
+  <li>gitlab-ci: add ubuntu container</li>
+  <li>gitlab-ci: add a meson vulkan build</li>
+  <li>gitlab-ci: add a make vulkan build</li>
+  <li>gitlab-ci: add a scons no-llvm build</li>
+  <li>gitlab-ci: add scons llvm 3.5 build</li>
+  <li>gitlab-ci: add scons SWR build</li>
+  <li>gitlab-ci: add meson loader/classic DRI build</li>
+  <li>gitlab-ci: add meson gallium SWR build</li>
+  <li>gitlab-ci: add meson gallium RadeonSI build</li>
+  <li>gitlab-ci: add meson gallium "other drivers" build</li>
+  <li>gitlab-ci: add meson gallium ST Clover (LLVM 5.0) build</li>
+  <li>gitlab-ci: add meson gallium ST Clover (LLVM 6.0) build</li>
+  <li>gitlab-ci: add meson gallium ST Clover (LLVM 7.0) build</li>
+  <li>gitlab-ci: add meson gallium ST "Other" build</li>
+  <li>gitlab-ci: add make loaders/classic DRI build</li>
+  <li>gitlab-ci: add make Gallium Drivers SWR build</li>
+  <li>gitlab-ci: add make Gallium Drivers RadeonSI build</li>
+  <li>gitlab-ci: add make Gallium Drivers "Other" build</li>
+  <li>gitlab-ci: add make Gallium ST Clover LLVM-3.9 build</li>
+  <li>gitlab-ci: add make Gallium ST Clover LLVM-4.0 build</li>
+  <li>gitlab-ci: add make Gallium ST Clover LLVM-5.0 build</li>
+  <li>gitlab-ci: add make Gallium ST Clover LLVM-6.0 build</li>
+  <li>gitlab-ci: add make Gallium ST Clover LLVM-7 build</li>
+  <li>gitlab-ci: add make Gallium ST Other build</li>
+  <li>travis: remove unused linux code path</li>
+  <li>travis: remove unused scons code path</li>
+  <li>gitlab-ci: add meson glvnd build</li>
+  <li>xvmc: fix string comparison</li>
+  <li>xvmc: fix string comparison</li>
+  <li>meson: add script to print the options before configuring a builddir</li>
+  <li>driconf: drop unused macro</li>
+  <li>travis: fix osx make build</li>
+  <li>gitlab-ci: workaround docker bug for users with uppercase characters</li>
+  <li>wsi: query the ICD's max dimensions instead of hard-coding them</li>
+  <li>gitlab-ci: limit ninja to 4 threads max</li>
+  <li>drm-uapi/README: remove explicit list of driver names</li>
+  <li>drm-uapi: use local files, not system libdrm</li>
+  <li>gbm: drop duplicate #defines</li>
+  <li>st/dri: drop duplicate #define</li>
+  <li>etnaviv: drop duplicate #define</li>
+  <li>anv/tests: compile to something sensible in release builds</li>
+  <li>util/tests: compile to something sensible in release builds</li>
+  <li>gitlab-ci: use ccache to speed up builds</li>
+  <li>tegra/meson: add missing dep_libdrm</li>
+  <li>tegra/autotools: add missing libdrm cflags</li>
+  <li>gitlab-ci: limit the automatic CI to master and MRs</li>
+  <li>gitlab-ci: automatically run the CI on pushes to `ci/*` branches</li>
+  <li>anv: sort extensions alphabetically</li>
+  <li>anv: sort vendors extensions after KHR and EXT</li>
+  <li>anv: make sure the extensions stay sorted</li>
+  <li>anv: drop unused imports</li>
+  <li>anv: use anv_shader_bin_write_to_blob()'s return value</li>
+  <li>gitlab-ci: always run the containers build</li>
+  <li>dri_interface: add missing #include</li>
+  <li>driinfo: add DTD to allow the xml to be validated</li>
+  <li>meson/swr: replace hard-coded path with current_build_dir()</li>
+  <li>egl/android: replace magic 0=CbCr,1=CrCb with simple enum</li>
+  <li>vulkan: use VkBase{In,Out}Structure instead of a custom struct</li>
+  <li>driconf: add DTD to allow the drirc xml (00-mesa-defaults.conf) to be validated</li>
+  <li>gitlab-ci: install xmllint to validate 00-mesa-defaults.conf</li>
+  <li>anv: simplify chained comparison</li>
+  <li>anv: drop unused parameter</li>
+  <li>anv: remove spaces around kwargs assignment</li>
+  <li>anv: fix typo</li>
+  <li>Revert "swr/rast: Archrast codegen updates"</li>
+  <li>meson: avoid going back up the tree with include_directories()</li>
+  <li>anv: use the platform defines in vk.xml instead of hard-coding them</li>
+  <li>radv: use the platform defines in vk.xml instead of hard-coding them</li>
+  <li>util: #define PATH_MAX when undefined (eg. Hurd)</li>
+  <li>vulkan: import missing file from Khronos</li>
+  <li>egl: fix libdrm-less builds</li>
+  <li>vulkan: import vk_layer.h from Khronos</li>
+  <li>gitlab-ci: drop job prefixes</li>
+  <li>meson: fix with_dri2 definition for GNU Hurd</li>
+  <li>meson: remove unused include_directories(vulkan)</li>
+  <li>vulkan/util: use the platform defines in vk.xml instead of hard-coding them</li>
+  <li>vulkan/overlay: fix missing var rename in previous commit</li>
+  <li>meson: don't build libGLES*.so with GLVND</li>
+  <li>autotools: don't build libGLES*.so with GLVND</li>
+  <li>travis: fix meson build by letting `auto` do its job</li>
+  <li>travis: drop unused vars</li>
+  <li>travis: clean up</li>
+  <li>gitlab-ci: only build the default (=latest) and oldest llvm versions</li>
+  <li>gitlab-ci: autotools needs to be told which llvm version to use</li>
+  <li>r600: cast pointer to expected type</li>
+  <li>build: make passing an incorrect pointer type a hard error</li>
+  <li>gitlab-ci: fix llvm version (7 doesn't have a ".0")</li>
+  <li>hgl/meson: drop unused include directory</li>
+  <li>glx/meson: use full include path for dri_interface.h</li>
+  <li>android: fix missing backspace for line continuation</li>
+  <li>panfrost: fix tgsi_to_nir() call</li>
+  <li>panfrost: move #include to fix compilation</li>
+  <li>gitlab-ci: add panfrost to the gallium drivers build</li>
+  <li>wsi: deduplicate get_current_time() functions between display and x11</li>
+  <li>wsi/display: s/#if/#ifdef/ to fix -Wundef</li>
+  <li>wsi/wayland: fix pointer casting warning on 32bit</li>
+  <li>wsi/x11: use WSI_FROM_HANDLE() instead of pointer casts</li>
+  <li>turnip: use the platform defines in vk.xml instead of hard-coding them</li>
+  <li>travis: fix osx meson build</li>
+  <li>nir: const `nir_call_instr::callee`</li>
+  <li>gitlab-ci: add clang build</li>
+  <li>gitlab-ci: drop most autotools builds</li>
+  <li>util/disk_cache: close fd in the fallback path</li>
+  <li>egl: hide entrypoints that shouldn't be exported when using glvnd</li>
+  <li>meson: strip rpath from megadrivers</li>
+  <li>gallium/hud: fix memory leaks</li>
+  <li>gallium/hud: prevent buffer overflow</li>
+  <li>gallium/hud: fix rounding error in nic bps computation</li>
+  <li>simplify LLVM version string printing</li>
+  <li>util/process: document memory leak</li>
+  <li>vk/util: remove unneeded array index</li>
+  <li>bin: drop unused import from install_megadrivers.py</li>
+  <li>meson: remove meson-created megadrivers symlinks</li>
+  <li>gitlab-ci: build gallium extra hud</li>
+  <li>gitlab-ci: add lima to the build</li>
+  <li>delete autotools .gitignore files</li>
+  <li>delete autotools input files</li>
+  <li>docs: remove unsupported GL function name mangling</li>
+  <li>docs: drop autotools python information</li>
+  <li>docs: replace autotools intructions with meson equivalent</li>
+  <li>docs: use past tense when talking about autotools</li>
+  <li>docs: haiku can be built using meson</li>
+  <li>egl: fixup autotools-specific wording</li>
+  <li>util: add os_read_file() helper</li>
+  <li>anv: add support for VK_EXT_memory_budget</li>
+  <li>radv: update to use the new features struct names</li>
+  <li>turnip: update to use the new features struct names</li>
+  <li>gitlab-ci: build vulkan drivers in clang build</li>
+  <li>util: move #include out of #if linux</li>
+  <li>wsi/wayland: document lack of vkAcquireNextImageKHR timeout support</li>
+  <li>egl: hard-code destroy function instead of passing it around as a pointer</li>
+  <li>gitlab-ci: add scons windows build using mingw</li>
+  <li>gitlab-ci: merge several meson jobs</li>
+  <li>gitlab-ci: meson-gallium-radeonsi was a subset of meson-gallium-clover-llvm</li>
+  <li>gitlab-ci: simplify meson job names</li>
+  <li>gitlab-ci: merge meson-glvnd into meson-swr</li>
+  <li>travis: fix syntax, and drop unused stuff</li>
+  <li>util/os_file: always use the 'grow' mechanism</li>
+  <li>meson: expose glapi through osmesa</li>
+  <li>util/os_file: actually return the error read() gave us</li>
+</ul>
+
+<p>Erico Nunes (5):</p>
+<ul>
+  <li>lima/ppir: support ppir_op_ceil</li>
+  <li>nir/algebraic: add lowering for fsign</li>
+  <li>lima: enable nir fsign lowering in ppir</li>
+  <li>lima/gpir: add limit of max 512 instructions</li>
+  <li>lima/ppir: support nir_op_ftrunc</li>
+</ul>
+
+<p>Erik Faye-Lund (79):</p>
+<ul>
+  <li>mesa: expose NV_conditional_render on GLES</li>
+  <li>st/mesa: remove unused header-file</li>
+  <li>swr/codegen: fix autotools build</li>
+  <li>virgl: remove unused variables</li>
+  <li>virgl: remove unused variable</li>
+  <li>virgl: remove unused variable</li>
+  <li>virgl: remove unused variable</li>
+  <li>virgl: do not allow compressed formats for buffers</li>
+  <li>virgl: stricter usage of compressed 3d textures</li>
+  <li>virgl: also destroy all read-transfers</li>
+  <li>virgl: use debug_printf instead of fprintf</li>
+  <li>virgl: unsigned int -&gt; unsigned</li>
+  <li>virgl: only warn about unchecked flags</li>
+  <li>virgl: do not warn about display-target binding</li>
+  <li>virgl: use debug_printf instead of fprintf</li>
+  <li>virgl: remove pointless transfer-counter</li>
+  <li>virgl: tmp_resource -&gt; templ</li>
+  <li>virgl: track full virgl_resource instead of just virgl_hw_res</li>
+  <li>virgl: simplify virgl_texture_transfer_unmap logic</li>
+  <li>virgl: make unmap queuing a bit more straight-forward</li>
+  <li>virgl: check for readback on correct resource</li>
+  <li>virgl: wait for the right resource</li>
+  <li>virgl: return error if allocating resolve_tmp fails</li>
+  <li>virgl: rewrite core of virgl_texture_transfer_map</li>
+  <li>virgl: use pipe_box for blit dst-rect</li>
+  <li>virgl: support write-back with staged transfers</li>
+  <li>virgl: make sure bind is set for non-buffers</li>
+  <li>gallium/util: support translating between uint and sint formats</li>
+  <li>virgl: get readback-formats from host</li>
+  <li>virgl: only blit if resource is read</li>
+  <li>virgl: do color-conversion during when mapping transfer</li>
+  <li>virgl: document potentially failing blit</li>
+  <li>mesa/st: remove impossible error-check</li>
+  <li>gallium/u_vbuf: support NULL-resources</li>
+  <li>i915: support NULL-resources</li>
+  <li>nouveau: support NULL-resources</li>
+  <li>swr: support NULL-resources</li>
+  <li>mesa/st: accept NULL and empty buffer objects</li>
+  <li>mesa/st: remove always-false state</li>
+  <li>softpipe: setup pixel_offset for all primitive types</li>
+  <li>docs: normaize css-indent style</li>
+  <li>docs: remove non-existent css attribute</li>
+  <li>docs: remove long commented out css</li>
+  <li>docs: add missing semicolon</li>
+  <li>docs: avoid repeating the font</li>
+  <li>docs: avoid repeating the color</li>
+  <li>docs: remove spurious newline</li>
+  <li>docs: use multiple background-images for header</li>
+  <li>docs: simplify css-centering</li>
+  <li>docs: do not hard-code header-height</li>
+  <li>docs: properly escape '&gt;'</li>
+  <li>docs: properly escape ampersand</li>
+  <li>docs: remove stray paragraph-close</li>
+  <li>docs: use h2 instead of b-tag for headings</li>
+  <li>docs: use dl/dd instead of blockquote for freedesktop link</li>
+  <li>docs: open list-item before closing it</li>
+  <li>docs: close paragraphs before lists</li>
+  <li>docs: close lists</li>
+  <li>docs: remove stray paragraph-close</li>
+  <li>docs: close paragraphs before preformatted text</li>
+  <li>docs: start paragraph before closing it</li>
+  <li>docs: drop paragraph around preformatted text</li>
+  <li>docs: fix incorrectly closed paragraph</li>
+  <li>docs: don't pointlessly close and re-start definition lists</li>
+  <li>docs: remove stray list-start</li>
+  <li>docs: fixup bad paragraphing</li>
+  <li>docs: add missing lists</li>
+  <li>docs: fix closing of paragraphs</li>
+  <li>docs: fixup list-item tags</li>
+  <li>docs: fix closing of list-items</li>
+  <li>docs: replace empty list with a none-paragraph</li>
+  <li>docs: turn faq-index into an ordered list</li>
+  <li>docs: drop centered heading for faq</li>
+  <li>docs: reorder heading and notice</li>
+  <li>meson: lift driver-collection out into parent build-file</li>
+  <li>meson: give dri- and gallium-drivers separate vars</li>
+  <li>meson: add build-summary</li>
+  <li>docs: fixup mistake in contents</li>
+  <li>draw: flush when setting stream-out targets</li>
+</ul>
+
+<p>Ernestas Kulik (2):</p>
+<ul>
+  <li>vc4: Fix leak in HW queries error path</li>
+  <li>v3d: Fix leak in resource setup error path</li>
+</ul>
+
+<p>Francisco Jerez (6):</p>
+<ul>
+  <li>intel/dump_gpu: Disambiguate between BOs from different GEM handle spaces.</li>
+  <li>intel/fs: Exclude control sources from execution type and region alignment calculations.</li>
+  <li>intel/fs: Lower integer multiply correctly when destination stride equals 4.</li>
+  <li>intel/fs: Cap dst-aligned region stride to maximum representable hstride value.</li>
+  <li>intel/fs: Implement extended strides greater than 4 for IR source regions.</li>
+  <li>intel/fs: Rely on undocumented unrestricted regioning for 32x16-bit integer multiply.</li>
+</ul>
+
+<p>Fritz Koenig (4):</p>
+<ul>
+  <li>freedreno: pass count to query_dmabuf_modifiers</li>
+  <li>freedreno/a6xx: UBWC support</li>
+  <li>freedreno: UBWC allocator</li>
+  <li>freedreno/a6xx: Enable UBWC modifier</li>
+</ul>
+
+<p>Gert Wollny (35):</p>
+<ul>
+  <li>mesa/core: Enable EXT_texture_sRGB_R8 also for desktop GL</li>
+  <li>radeonsi: release tokens after creating the shader program</li>
+  <li>mesa: release references to image textures when a context is destroyed</li>
+  <li>virgl: Enable mixed color FBO attachemnets only when the host supports it</li>
+  <li>mesa/core: Enable EXT_depth_clamp for GLES &gt;= 2.0</li>
+  <li>nir: Add posibility to not lower to source mod 'abs' for ops with three sources</li>
+  <li>mesa: Expose EXT_texture_query_lod and add support for its use shaders</li>
+  <li>softpipe: Enable PIPE_CAP_MIXED_COLORBUFFER_FORMATS    It seems softpipe actually supports this. This change enables the following piglits as passing without regressions in the gpu test set:</li>
+  <li>virgl: Add a caps feature check version</li>
+  <li>softpipe: Implement ATOMFADD and enable cap TGSI_ATOMFADD</li>
+  <li>virgl: define MAX_VERTEX_STREAMS based on availability of TF3</li>
+  <li>softpipe: Use mag texture filter also for clamped lod == 0</li>
+  <li>softpipe: Don't use mag filter for gather op</li>
+  <li>softpipe: raise number of bits used for X coordinate texture lookup</li>
+  <li>softpipe: Add an extra code path for the buffer texel lookup</li>
+  <li>softpipe: Enable PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT</li>
+  <li>Gallium: Add new CAP that indicated whether IO array definitions can be shriked</li>
+  <li>virgl: Enable passing arrays as input to fragment shaders</li>
+  <li>doc/features: Add a few extensions to the feature matrix</li>
+  <li>softpipe: Factor gradient evaluation out of the lambda evaluation</li>
+  <li>softpipe: Prepare handling explicit gradients</li>
+  <li>softpipe: Pipe gather_comp through from st_tgsi_get_samples</li>
+  <li>softpipe: Move selection of shadow values up and clean parameter list</li>
+  <li>softpipe: tie in new code path for lod evaluation</li>
+  <li>softpipe: keep input lod for explicite derivatives</li>
+  <li>softpipe: evaluate cube the faces on a per sample bases</li>
+  <li>softpipe: Factor out evaluation of the source indices</li>
+  <li>softpipe: Add an per-input array for interpolator correctors to machine</li>
+  <li>softpipe: Add (fake) support for TGSI_OPCODE_INTERP_SAMPLE</li>
+  <li>softpipe: Add support for TGSI_OPCODE_INTERP_OFFSET</li>
+  <li>softpipe: Add support for TGSI_OPCODE_INTERP_CENTROID</li>
+  <li>softpipe: Increase the GLSL feature level</li>
+  <li>doc: Update feature matrix</li>
+  <li>softpipe/buffer: load only as many components as the the buffer resource type provides</li>
+  <li>Revert "softpipe/buffer: load only as many components as the the buffer resource type provides"</li>
+</ul>
+
+<p>Greg V (3):</p>
+<ul>
+  <li>util: emulate futex on FreeBSD using umtx</li>
+  <li>gallium/hud: add CPU usage support for FreeBSD</li>
+  <li>gallium: enable dmabuf on BSD as well</li>
+</ul>
+
+<p>Grigori Goronzy (1):</p>
+<ul>
+  <li>glx: add support for GLX_ARB_create_context_no_error (v3)</li>
+</ul>
+
+<p>Guido Günther (4):</p>
+<ul>
+  <li>docs: Fix 19.0.x version numbers</li>
+  <li>gallium: ddebug: Add missing fence related wrappers</li>
+  <li>gallium/u_dump: util_dump_sampler_view: Dump u.tex.first_level</li>
+  <li>gallium: trace: Add missing fence related wrappers</li>
+</ul>
+
+<p>Gurchetan Singh (44):</p>
+<ul>
+  <li>mesa/main: Expose EXT_texture_compression_s3tc_srgb</li>
+  <li>i965: Set flag for EXT_texture_compression_s3tc_srgb</li>
+  <li>st/mesa: expose EXT_texture_compression_s3tc_srgb</li>
+  <li>docs: add GL_EXT_texture_compression_s3tc_srgb to release notes</li>
+  <li>virgl: add ability to do finer grain dirty tracking</li>
+  <li>virgl: use virgl_resource_dirty helper</li>
+  <li>virgl: don't mark unclean after a flush</li>
+  <li>virgl: track level cleanliness rather than resource cleanliness</li>
+  <li>virgl: make alignment smaller when uploading index user buffers</li>
+  <li>virgl: unmap uploader at flush time</li>
+  <li>virgl: when creating / freeing transfers, pass slab pool directly</li>
+  <li>virgl: add protocol for resource transfers</li>
+  <li>virgl: use virgl_transfer in inline write</li>
+  <li>virgl: limit command length to 16 bits</li>
+  <li>virgl: keep track of number of computations</li>
+  <li>virgl: pass virgl transfer to virgl_res_needs_flush_wait</li>
+  <li>virgl: add extra checks in virgl_res_needs_flush_wait</li>
+  <li>virgl: make winsys modifications for encoded transfers</li>
+  <li>virgl: add encoder functions for new protocol</li>
+  <li>virgl: introduce transfer queue</li>
+  <li>virgl: use transfer queue</li>
+  <li>virgl: use virgl_transfer_inline_write even less</li>
+  <li>virgl/vtest: deprecate protocol version 1</li>
+  <li>egl/sl: also allow virtgpu to fallback to kms_swrast</li>
+  <li>virgl: use uint16_t mask instead of separate booleans</li>
+  <li>configure.ac / meson: depend on libnativewindow when appropriate</li>
+  <li>anv: move anv_GetMemoryAndroidHardwareBufferANDROID up a bit</li>
+  <li>anv: fix build on Nougat</li>
+  <li>egl/android: move droid_image_loader_extension down a bit</li>
+  <li>egl/android: move droid_open_device_drm_gralloc down a bit</li>
+  <li>egl/android: droid_open_device_drm_gralloc --&gt; droid_open_device</li>
+  <li>egl/android: refactor droid_load_driver a bit</li>
+  <li>egl/android: plumb swrast option</li>
+  <li>egl/android: use swrast option in droid_load_driver</li>
+  <li>egl/android: use software rendering when appropriate</li>
+  <li>egl/android: chose node type based on swrast and preprocessor flags</li>
+  <li>virgl: wait after a flush</li>
+  <li>virgl/vtest: execute a transfer_get when flushing the front buffer</li>
+  <li>virgl/vtest: add utilities for receiving fds</li>
+  <li>virgl/vtest: plumb support for shared memory</li>
+  <li>virgl/vtest: receive and handle shared memory fd</li>
+  <li>virgl/vtest: modify sending and receiving data for shared memory</li>
+  <li>virgl/vtest: wait after issuing a transfer get</li>
+  <li>virgl/vtest: bump up protocol version + support encoded transfers</li>
+</ul>
+
+<p>Guttula, Suresh (1):</p>
+<ul>
+  <li>st/va:Add support for indirect manner by returning VA_STATUS_ERROR_OPERATION_FAILED</li>
+</ul>
+
+<p>Hal Gentz (1):</p>
+<ul>
+  <li>glx: Fix synthetic error generation in __glXSendError</li>
+</ul>
+
+<p>Heinrich (1):</p>
+<ul>
+  <li>gbm: Improve documentation of BO import</li>
+</ul>
+
+<p>Iago Toral Quiroga (39):</p>
+<ul>
+  <li>compiler/nir: add an is_conversion field to nir_op_info</li>
+  <li>compiler/nir: add lowering option for 16-bit fmod</li>
+  <li>compiler/nir: add lowering for 16-bit flrp</li>
+  <li>compiler/nir: add lowering for 16-bit ldexp</li>
+  <li>intel/compiler: add a NIR pass to lower conversions</li>
+  <li>intel/compiler: split float to 64-bit opcodes from int to 64-bit</li>
+  <li>intel/compiler: handle b2i/b2f with other integer conversion opcodes</li>
+  <li>intel/compiler: assert restrictions on conversions to half-float</li>
+  <li>intel/compiler: lower some 16-bit float operations to 32-bit</li>
+  <li>intel/compiler: handle extended math restrictions for half-float</li>
+  <li>intel/compiler: implement 16-bit fsign</li>
+  <li>intel/compiler: drop unnecessary temporary from 32-bit fsign implementation</li>
+  <li>intel/compiler: add instruction setters for Src1Type and Src2Type.</li>
+  <li>intel/compiler: add new half-float register type for 3-src instructions</li>
+  <li>intel/compiler: don't compact 3-src instructions with Src1Type or Src2Type bits</li>
+  <li>intel/compiler: allow half-float on 3-source instructions since gen8</li>
+  <li>intel/compiler: set correct precision fields for 3-source float instructions</li>
+  <li>intel/compiler: fix ddx and ddy for 16-bit float</li>
+  <li>intel/compiler: fix ddy for half-float in Broadwell</li>
+  <li>intel/compiler: workaround for SIMD8 half-float MAD in gen8</li>
+  <li>intel/compiler: split is_partial_write() into two variants</li>
+  <li>intel/compiler: activate 16-bit bit-size lowerings also for 8-bit</li>
+  <li>intel/compiler: rework conversion opcodes</li>
+  <li>intel/compiler: ask for an integer type if requesting an 8-bit type</li>
+  <li>intel/eu: force stride of 2 on NULL register for Byte instructions</li>
+  <li>intel/compiler: generalize the combine constants pass</li>
+  <li>intel/compiler: implement is_zero, is_one, is_negative_one for 8-bit/16-bit</li>
+  <li>intel/compiler: add a brw_reg_type_is_integer helper</li>
+  <li>intel/compiler: fix cmod propagation for non 32-bit types</li>
+  <li>intel/compiler: remove inexact algebraic optimizations from the backend</li>
+  <li>intel/compiler: skip MAD algebraic optimization for half-float or mixed mode</li>
+  <li>intel/compiler: implement SIMD16 restrictions for mixed-float instructions</li>
+  <li>intel/compiler: also set F execution type for mixed float mode in BDW</li>
+  <li>intel/compiler: validate region restrictions for half-float conversions</li>
+  <li>intel/compiler: validate conversions between 64-bit and 8-bit types</li>
+  <li>intel/compiler: validate region restrictions for mixed float mode</li>
+  <li>compiler/spirv: move the check for Int8 capability</li>
+  <li>anv/pipeline: support Float16 and Int8 SPIR-V capabilities in gen8+</li>
+  <li>anv/device: expose VK_KHR_shader_float16_int8 in gen8+</li>
+</ul>
+
+<p>Ian Romanick (55):</p>
+<ul>
+  <li>nir: Silence zillions of unused parameter warnings in release builds</li>
+  <li>intel/compiler: Silence warning about value that may be used uninitialized</li>
+  <li>nir: Document some fields of nir_loop_terminator</li>
+  <li>nir: Refactor code that checks phi nodes in opt_peel_loop_initial_if</li>
+  <li>nir: Select phi nodes using prev_block instead of continue_block</li>
+  <li>nir: Split ALU instructions in loops that read phis</li>
+  <li>nir: Convert a bcsel with only phi node sources to a phi node</li>
+  <li>spirv: Add missing break</li>
+  <li>nir/algebraic: Convert some f2u to f2i</li>
+  <li>nir/algebraic: Simplify comparison with sequential integers starting with 0</li>
+  <li>intel/vec4: Emit constants for some ALU sources as immediate values</li>
+  <li>nir/algebraic: Replace i2b used by bcsel or if-statement with comparison</li>
+  <li>intel/fs: Relax type matching rules in cmod propagation from MOV instructions</li>
+  <li>intel/fs: Handle OR source modifiers in algebraic optimization</li>
+  <li>intel/fs: Refactor ALU source and destination handling to a separate function</li>
+  <li>intel/fs: Emit logical-not of operands on Gen8+</li>
+  <li>intel/fs: Use De Morgan's laws to avoid logical-not of a logic result on Gen8+</li>
+  <li>intel/fs: Emit better code for b2f(inot(a)) and b2i(inot(a))</li>
+  <li>nir/algebraic: Replace a bcsel of a b2f sources with a b2f(!(a || b))</li>
+  <li>intel/fs: Generate if instructions with inverted conditions</li>
+  <li>nir/algebraic: Replace a-fract(a) with floor(a)</li>
+  <li>intel/fs: Don't assert on b2f with a saturate modifier</li>
+  <li>nir/algebraic: Optimize away an fsat of a b2f</li>
+  <li>intel/compiler: Silence many unused parameter warnings in brw_eu.h</li>
+  <li>intel/compiler: Silence unused parameter warning in brw_interpolation_map.c</li>
+  <li>intel/fs: nir_op_extract_i8 extracts a byte, not a word</li>
+  <li>intel/fs: Fix extract_u8 of an odd byte from a 64-bit integer</li>
+  <li>nir/algebraic: Fix up extract_[iu]8 after loop unrolling</li>
+  <li>nir/algebraic: Remove redundant extract_[iu]8 patterns</li>
+  <li>nir/algebraic: Add missing 64-bit extract_[iu]8 patterns</li>
+  <li>nir/algebraic: Add missing 16-bit extract_[iu]8 patterns</li>
+  <li>nir/algebraic: Fix up extract_[iu]8 after loop unrolling</li>
+  <li>nir/algebraic: Remove redundant extract_[iu]8 patterns</li>
+  <li>nir/algebraic: Add missing 64-bit extract_[iu]8 patterns</li>
+  <li>nir/algebraic: Add missing 16-bit extract_[iu]8 patterns</li>
+  <li>nir: Add nir_const_value_negative_equal</li>
+  <li>nir: Add nir_alu_srcs_negative_equal</li>
+  <li>nir: Add partial redundancy elimination for compares</li>
+  <li>intel/compiler: Use partial redundancy elimination for compares</li>
+  <li>intel/fs: Eliminate dead code first</li>
+  <li>intel/fs: Refactor code generation for nir_op_fsign to its own function</li>
+  <li>intel/fs: Add a scale factor to emit_fsign</li>
+  <li>intel/fs: Generate better code for fsign multiplied by a value</li>
+  <li>nir/algebraic: Recognize open-coded copysign(1.0, a)</li>
+  <li>nir/algebraic: Replace a pattern where iand with a Boolean is used as a bcsel</li>
+  <li>nir/algebraic: Fix some 1-bit Boolean weirdness</li>
+  <li>nir/algebraic: Strength reduce some compares of x and -x</li>
+  <li>intel/fs: Add support for float16 to the fsign optimizations</li>
+  <li>glsl: Silence may unused parameter warnings in glsl/ir.h</li>
+  <li>intel/compiler: Don't have sepearate, per-Gen nir_options</li>
+  <li>intel/compiler: Lower ffma on Gen4 and Gen5</li>
+  <li>intel/fs: Fix D to W conversion in opt_combine_constants</li>
+  <li>mesa: Add missing display list support for GL_FOG_COORDINATE_SOURCE</li>
+  <li>nir: Saturating integer arithmetic is not associative</li>
+  <li>Revert "nir: add late opt to turn inot/b2f combos back to bcsel"</li>
+</ul>
+
+<p>Icenowy Zheng (5):</p>
+<ul>
+  <li>lima: add dummy set_sample_mask function</li>
+  <li>lima: make lima_context_framebuffer subtype of pipe_framebuffer_state</li>
+  <li>lima: implement blit with util_blitter</li>
+  <li>lima: lower bool to float when building shaders</li>
+  <li>lima: add Android build</li>
+</ul>
+
+<p>Ilia Mirkin (14):</p>
+<ul>
+  <li>nv50,nvc0: add explicit settings for recent caps</li>
+  <li>nvc0: add support for handling indirect draws with attrib conversion</li>
+  <li>nvc0/ir: always use CG mode for loads from atomic-only buffers</li>
+  <li>nvc0/ir: fix second tex argument after levelZero optimization</li>
+  <li>nvc0: fix 3d images on kepler</li>
+  <li>nv50,nvc0: use condition for occlusion queries when already complete</li>
+  <li>nvc0: stick zero values for the compute invocation counts</li>
+  <li>nvc0: we have 16k-sized framebuffers, fix default scissors</li>
+  <li>swr: set PIPE_CAP_MAX_VARYINGS correctly</li>
+  <li>mesa: add explicit enable for EXT_float_blend, and error condition</li>
+  <li>st/mesa: enable GL_EXT_float_blend when possible</li>
+  <li>i965: always enable EXT_float_blend</li>
+  <li>nv50: disable compute</li>
+  <li>glsl: fix recording of variables for XFB in TCS shaders</li>
+</ul>
+
+<p>Illia Iorin (1):</p>
+<ul>
+  <li>mesa/main: Fix multisample texture initialize</li>
+</ul>
+
+<p>James Zhu (12):</p>
+<ul>
+  <li>gallium/auxiliary/vl: Move dirty define to header file</li>
+  <li>gallium/auxiliary/vl: Split vl_compositor graphic shaders from vl_compositor API</li>
+  <li>gallium/auxiliary/vl: Rename csc_matrix and increase its size.</li>
+  <li>gallium/auxiliary/vl: Add compute shader to support video compositor render</li>
+  <li>gallium/auxiliary/vl: Add video compositor compute shader render</li>
+  <li>gallium/auxiliary/vl: Fix transparent issue on compute shader with rgba</li>
+  <li>gallium/auxiliary/vl: Increase shader_params size</li>
+  <li>gallium/auxiliary/vl: Change grid setting</li>
+  <li>gallium/auxiliary/vl: Change weave compute shader implementation</li>
+  <li>gallium/auxiliary/vl: Fixed blur issue with weave compute shader</li>
+  <li>gallium/auxiliary/vl: Fixed blank issue with compute shader</li>
+  <li>gallium/auxiliary/vl: Add barrier/unbind after compute shader launch.</li>
+</ul>
+
+<p>Jan Vesely (2):</p>
+<ul>
+  <li>Partially revert "gallium: fix autotools build of pipe_msm.la"</li>
+  <li>gallium/aux: Report error if loading of a pipe driver fails.</li>
+</ul>
+
+<p>Jan Zielinski (1):</p>
+<ul>
+  <li>swr/rast: fix 32-bit compilation on Linux</li>
+</ul>
+
+<p>Jason Ekstrand (212):</p>
+<ul>
+  <li>spirv: Replace vtn_constant_value with vtn_constant_uint</li>
+  <li>spirv: Rework handling of spec constant workgroup size built-ins</li>
+  <li>spirv: Handle constants and types before execution modes</li>
+  <li>spirv: Handle OpExecutionModeId</li>
+  <li>spirv: Support LocalSizeId and LocalSizeHintId execution modes</li>
+  <li>intel/nir: Add global support to lower_mem_access_bit_sizes</li>
+  <li>intel/fs/cse: Split create_copy_instr into three cases</li>
+  <li>intel/fs: Properly handle 64-bit types in LOAD_PAYLOAD</li>
+  <li>intel/fs: Do the grf127 hack on SIMD8 instructions in SIMD16 mode</li>
+  <li>intel/fs: Implement load/store_global with A64 untyped messages</li>
+  <li>intel/fs: Use SENDS for A64 writes on gen9+</li>
+  <li>intel/fs: Implement nir_intrinsic_global_atomic_*</li>
+  <li>anv: Implement VK_EXT_buffer_device_address</li>
+  <li>relnotes: Add VK_EXT_buffer_device_address</li>
+  <li>nir/deref: Drop zero ptr_as_array derefs</li>
+  <li>README: Drop the badges from the readme</li>
+  <li>intel/fs: Use enumerated array assignments in fb read TXF setup</li>
+  <li>nir/deref: Rematerialize parents in rematerialize_derefs_in_use_blocks</li>
+  <li>nir: Silence a couple of warnings in release builds</li>
+  <li>anv/blorp: Delete a pointless assert</li>
+  <li>anv: Silence some compiler warnings in release builds</li>
+  <li>intel/fs: Silence a compiler warning</li>
+  <li>intel/fs: Bail in optimize_extract_to_float if we have modifiers</li>
+  <li>nir/dead_cf: Inline cf_node_has_side_effects</li>
+  <li>nir/dead_cf: Stop relying on liveness analysis</li>
+  <li>compiler/types: Add a contains_64bit helper</li>
+  <li>nir/xfb: Properly align 64-bit values</li>
+  <li>nir: Rewrite lower_clip_cull_distance_arrays to do a lot less lowering</li>
+  <li>nir/xfb: Work in terms of components rather than slots</li>
+  <li>nir/xfb: Handle compact arrays in gather_xfb_info</li>
+  <li>nir: Fix a compile warning</li>
+  <li>nir/lower_clip_cull: Fix an incorrect assert</li>
+  <li>iris: Don't lower image formats for write-only images</li>
+  <li>iris/compute: Don't increment the grid size offset</li>
+  <li>iris/compute: Zero out the last grid size on indirect dispatches</li>
+  <li>iris: Configure the L3$ on the compute context</li>
+  <li>iris: Don't set constant read lengths at upload time</li>
+  <li>iris: Allocate buffer resources separately</li>
+  <li>iris: Copy anv's MI_MATH helpers for multiplication and division</li>
+  <li>nir/split_vars: Don't compact vectors unnecessarily</li>
+  <li>nir/builder: Don't emit no-op swizzles</li>
+  <li>intel/eu: Add an EOT parameter to send_indirect_[split]_message</li>
+  <li>intel/fs: Add an enum type for logical sampler inst sources</li>
+  <li>intel/fs: Re-order logical surface arguments</li>
+  <li>intel/fs: Drop the fs_surface_builder</li>
+  <li>intel/vec4: Drop dead code for handling typed surface messages</li>
+  <li>intel/fs: Get rid of the IMAGE_SIZE opcode</li>
+  <li>intel/compiler: Drop unused surface opcodes</li>
+  <li>intel/schedule_instructions: Move some comments</li>
+  <li>intel/compiler: Re-prefix non-logical surface opcodes with VEC4</li>
+  <li>anv: Count surfaces for non-YCbCr images in GetDescriptorSetLayoutSupport</li>
+  <li>spirv: OpImageQueryLod requires a sampler</li>
+  <li>intel,nir: Lower TXD with min_lod when the sampler index is not &lt; 16</li>
+  <li>anv: Use an actual binding for gl_NumWorkgroups</li>
+  <li>anv/pipeline: Drop anv_fill_binding_table</li>
+  <li>anv/descriptor_set: Refactor alloc/free of descriptor sets</li>
+  <li>anv: Rework arguments to anv_descriptor_set_write_*</li>
+  <li>anv: Stop allocating buffer views for dynamic buffers</li>
+  <li>anv: Count image param entries rather than images</li>
+  <li>anv: Clean up descriptor set layouts</li>
+  <li>anv: drop add_var_binding from anv_nir_apply_pipeline_layout.c</li>
+  <li>anv: Refactor descriptor pushing a bit</li>
+  <li>anv: Take references to push descriptor set layouts</li>
+  <li>anv: Add a concept of a descriptor buffer</li>
+  <li>spirv: Pull offset/stride from the pointer for OpArrayLength</li>
+  <li>spirv: Use the generic dereference function for OpArrayLength</li>
+  <li>spirv: Use the same types for resource indices as pointers</li>
+  <li>anv: Implement VK_EXT_inline_uniform_block</li>
+  <li>nir: Expose double and int64 op_to_options_mask helpers</li>
+  <li>nir: Teach loop unrolling about 64-bit instruction lowering</li>
+  <li>i965: Compile the fp64 program based on nir options</li>
+  <li>intel/debug: Add a debug flag to force software fp64</li>
+  <li>intel/nir: Drop an unneeded lower_constant_initializers call</li>
+  <li>glsl/nir: Add a shared helper for building float64 shaders</li>
+  <li>glsl/nir: Inline functions in float64_funcs_to_nir</li>
+  <li>nir/inline_functions: Break inlining into a builder helper</li>
+  <li>nir/deref: Expose nir_opt_deref_impl</li>
+  <li>nir/lower_doubles: Inline functions directly in lower_doubles</li>
+  <li>intel/nir: Move 64-bit lowering later</li>
+  <li>st/nir: Move 64-bit lowering later</li>
+  <li>nir/builder: Emit better code for iadd/imul_imm</li>
+  <li>nir/builder: Cast array indices in build_deref_follower</li>
+  <li>nir/builder: Add a build_deref_array_imm helper</li>
+  <li>intel/nir: Move lower_mem_access_bit_sizes to postprocess_nir</li>
+  <li>anv/pipeline: Move lower_explicit_io much later</li>
+  <li>nir: Add a pass for lowering IO back to vector when possible</li>
+  <li>intel/nir: Vectorize all IO</li>
+  <li>anv: Ignore VkRenderPassInputAttachementAspectCreateInfo</li>
+  <li>nir/loop_unroll: Fix out-of-bounds access handling</li>
+  <li>glsl/list: Add a list variant of insert_after</li>
+  <li>glsl/lower_vector_derefs: Don't use a temporary for TCS outputs</li>
+  <li>anv: Stop using VK_TRUE/FALSE</li>
+  <li>anv/pass: Flag the need for a RT flush for resolve attachments</li>
+  <li>anv: Only set 3DSTATE_PS::VectorMaskEnable on gen8+</li>
+  <li>nir/algebraic: Add a couple optimizations for iabs and ishr</li>
+  <li>nir/validate: Only require bare types to match for copy_deref</li>
+  <li>nir/validate: Allow 32-bit boolean load/store intrinsics</li>
+  <li>compiler/types: Add a new is_interface C wrapper</li>
+  <li>compiler/types: Add a C wrapper to get full struct field data</li>
+  <li>compiler/types: Add helpers to get explicit types for standard layouts</li>
+  <li>nir/deref: Consider COHERENT decorated var derefs as aliasing</li>
+  <li>nir: Rename nir_address_format_vk_index_offset to not be vk</li>
+  <li>nir/lower_io: Add a new buffer_array_length intrinsic and lowering</li>
+  <li>glsl: Don't lower vector derefs for SSBOs, UBOs, and shared</li>
+  <li>glsl/nir: Set explicit types on UBO/SSBO variables</li>
+  <li>glsl/nir: Handle unlowered SSBO atomic and array_length intrinsics</li>
+  <li>glsl/nir: Add a pass to lower UBO and SSBO access</li>
+  <li>i965: Stop setting LowerBuferInterfaceBlocks</li>
+  <li>st/mesa: Let NIR lower UBO and SSBO access when we have it</li>
+  <li>nir/builder: Add a vector extract helper</li>
+  <li>nir: Add a new pass to lower array dereferences on vectors</li>
+  <li>intel/nir: Lower array-deref-of-vector UBO and SSBO loads</li>
+  <li>anv: Implement VK_EXT_host_query_reset</li>
+  <li>anv,radv: Implement VK_KHR_surface_capability_protected</li>
+  <li>Revert "nir: const `nir_call_instr::callee`"</li>
+  <li>anv: Bump maxComputeWorkgroupInvocations</li>
+  <li>nir: Constant values are per-column not per-component</li>
+  <li>anv,radv,turnip: Lower TG4 offsets with nir_lower_tex</li>
+  <li>spirv: Drop inline tg4 lowering</li>
+  <li>nir/lower_io: Add a bounds-checked 64-bit global address format</li>
+  <li>nir: Add a lowering pass for non-uniform resource access</li>
+  <li>nir: Add texture sources and intrinsics for bindless</li>
+  <li>nir: Add access flags to deref and SSBO atomics</li>
+  <li>spirv: Handle the NonUniformEXT decoration</li>
+  <li>Revert "anv/radv: release memory allocated by glsl types during spirv_to_nir"</li>
+  <li>nir: Lock around validation fail shader dumping</li>
+  <li>nir/algebraic: Drop some @bool specifiers</li>
+  <li>nir/algebraic: Add some logical OR and AND patterns</li>
+  <li>vc4: Prefer nir_src_comp_as_uint over nir_src_as_const_value</li>
+  <li>nir/search: Search for all combinations of commutative ops</li>
+  <li>nir: Get rid of nir_register::is_packed</li>
+  <li>nir: Get rid of global registers</li>
+  <li>intel/common: Add a MI command builder</li>
+  <li>intel/common: Add unit tests for gen_mi_builder</li>
+  <li>anv: Use gen_mi_builder for CmdDrawIndirectByteCount</li>
+  <li>anv: Use gen_mi_builder for computing resolve predicates</li>
+  <li>anv: Use gen_mi_builder for indirect draw parameters</li>
+  <li>anv: Use gen_mi_builder for indirect dispatch</li>
+  <li>anv: Use gen_mi_builder for conditional rendering</li>
+  <li>anv: Use gen_mi_builder for queries</li>
+  <li>anv: Move mi_memcpy and mi_memset to gen_mi_builder</li>
+  <li>anv/cmd_buffer: Use gen_mi_sub instead of gen_mi_add with a negative</li>
+  <li>intel/common: Support bigger right-shifts with mi_builder</li>
+  <li>anv/pipeline: Fix MEDIA_VFE_STATE::PerThreadScratchSpace on gen7</li>
+  <li>nir: Add a pass for selectively lowering variables to scratch space</li>
+  <li>intel/nir: Take a nir_tex_instr and src index in brw_texture_offset</li>
+  <li>nir/builder: Add a nir_imm_zero helper</li>
+  <li>nir/print: Use nir_src_as_int for array indices</li>
+  <li>nir/constant_folding: Get rid of a bit size switch statement</li>
+  <li>spirv: Drop some unneeded bit size switch statements</li>
+  <li>nir/load_const_to_scalar: Get rid of a bit size switch statement</li>
+  <li>nir/validate: Require unused bits of nir_const_value to be zero</li>
+  <li>vulkan: Update the XML and headers to 1.1.106</li>
+  <li>anv: Update to use the new features struct names</li>
+  <li>nir/algebraic: Move the template closer to the render function</li>
+  <li>nir/algebraic: Use a cache to avoid re-emitting structs</li>
+  <li>intel/mi_builder: Re-order an initializer</li>
+  <li>intel/mi_builder: Disable mem_mem tests on IVB</li>
+  <li>nir: Drop "struct" from some nir_* declarations</li>
+  <li>nir: Rework nir_src_as_alu_instr to not take a pointer</li>
+  <li>nir: Add a nir_src_as_intrinsic() helper</li>
+  <li>anv: Re-sort the GetPhysicalDeviceFeatures2 switch statement</li>
+  <li>anv: Drop some unneeded ANV_FROM_HANDLE for physical devices</li>
+  <li>intel/fs: Account for live range lengths in spill costs</li>
+  <li>anv: Make all VkDeviceMemory BOs resident permanently</li>
+  <li>anv: Put image params in the descriptor set buffer on gen8 and earlier</li>
+  <li>anv: Add a #define for the max binding table size</li>
+  <li>anv/pipeline: Sort bindings by most used first</li>
+  <li>anv/pipeline: Add skeleton support for spilling to bindless</li>
+  <li>nir/lower_io: Expose some explicit I/O lowering helpers</li>
+  <li>intel/nir: Re-run int64 lowering in postprocess_nir</li>
+  <li>anv: Add a has_a64_buffer_access to anv_physical_device</li>
+  <li>anv: Lower some SSBO operations in apply_pipeline_layout</li>
+  <li>anv: Implement SSBOs bindings with GPU addresses in the descriptor BO</li>
+  <li>anv: Implement VK_KHR_shader_atomic_int64</li>
+  <li>intel,nir: Lower TXD with a bindless sampler</li>
+  <li>intel/fs: Add support for bindless texture ops</li>
+  <li>anv: Count the number of planes in each descriptor binding</li>
+  <li>anv: Use write_image_view to initialize immutable samplers</li>
+  <li>anv: Pass the plane into lower_tex_deref</li>
+  <li>anv: Use bindless textures and samplers</li>
+  <li>intel/fs: Add support for bindless image load/store/atomic</li>
+  <li>anv: Use bindless handles for images</li>
+  <li>anv: Put binding flags in descriptor set layouts</li>
+  <li>anv: Implement VK_EXT_descriptor_indexing</li>
+  <li>nir: Add helpers for getting the type of an address format</li>
+  <li>anv/nir: Add a central helper for figuring out SSBO address formats</li>
+  <li>anv: Ignore descriptor binding flags if bindingCount == 0</li>
+  <li>anv: Rework the descriptor set layout create loop</li>
+  <li>anv,radv: Update release notes for newly implemented extensiosn</li>
+  <li>nir: Use the NIR_SRC_AS_ macro to define nir_src_as_deref</li>
+  <li>anv/descriptor_set: Unlink sets from the pool in set_destroy</li>
+  <li>anv/descriptor_set: Destroy sets before pool finalization</li>
+  <li>anv/descriptor_set: Only vma_heap_finish if we have a descriptor buffer</li>
+  <li>anv/descriptor_set: Properly align descriptor buffer to a page</li>
+  <li>anv: Better handle 32-byte alignment of descriptor set buffers</li>
+  <li>anv/descriptor_set: Don't fully destroy sets in pool destroy/reset</li>
+  <li>nir/algebraic: Optimize integer cast-of-cast</li>
+  <li>util/bitset: Return an actual bool from test macros</li>
+  <li>anv: Stop including POS in FS input limits</li>
+  <li>anv,i965: Stop warning about incomplete gen11 support</li>
+  <li>nir: Add a SSA type gathering pass</li>
+  <li>intel/fs/ra: Only add dest interference to sources that exist</li>
+  <li>intel/fs/ra: Stop adding RA interference to too many SENDS nodes</li>
+  <li>anv: Emulate texture swizzle in the shader when needed</li>
+  <li>anv: Stop forcing bindless for images</li>
+  <li>anv: Only consider minSampleShading when sampleShadingEnable is set</li>
+  <li>iris: Don't assume UBO indices are constant</li>
+  <li>intel/fs,vec4: Use g0 as the header for MFENCE</li>
+  <li>intel/fs: Do a stalling MFENCE in endInvocationInterlock()</li>
+  <li>nir/dead_cf: Call instructions aren't dead</li>
+  <li>nir/propagate_invariant: Don't add NULL vars to the hash table</li>
+</ul>
+
+<p>Jian-Hong Pan (1):</p>
+<ul>
+  <li>intel: Fix the description of Coffeelake pci-id 0x3E98</li>
+</ul>
+
+<p>Jiang, Sonny (1):</p>
+<ul>
+  <li>va: use a compute shader for the blit</li>
+</ul>
+
+<p>John Stultz (3):</p>
+<ul>
+  <li>mesa: android: freedreno: Fix build failure due to path change</li>
+  <li>mesa: Makefile.sources: Add ir3_nir_lower_load_barycentric_at_sample/offset to Makefile.sources</li>
+  <li>mesa: Makefile.sources: Add nir_lower_fb_read.c to Makefile.sources list</li>
+</ul>
+
+<p>Jon Turney (1):</p>
+<ul>
+  <li>meson: Force '.so' extension for DRI drivers</li>
+</ul>
+
+<p>Jonathan Marek (22):</p>
+<ul>
+  <li>nir: add missing vec opcodes in lower_bool_to_float</li>
+  <li>freedreno: a2xx: fix fast clear</li>
+  <li>freedreno: a2xx: don't write 4th vertex in mem2gmem</li>
+  <li>freedreno: a2xx: add use_hw_binning function</li>
+  <li>freedreno: a2xx: fix fast clear for some gmem configurations</li>
+  <li>freedreno: a2xx: fix mipmapping for NPOT textures</li>
+  <li>freedreno: use renderonly path for buffers allocated with modifiers</li>
+  <li>freedreno: catch failing fd_blit and fallback to software blit</li>
+  <li>mesa: add GL_AMD_compressed_ATC_texture support</li>
+  <li>gallium: add ATC format support</li>
+  <li>llvmpipe, softpipe: no support for ATC textures</li>
+  <li>st/mesa: add ATC support</li>
+  <li>freedreno: a3xx: add GL_AMD_compressed_ATC_texture support</li>
+  <li>freedreno: a2xx: add GL_AMD_compressed_ATC_texture support</li>
+  <li>svga: add new ATC formats to the format conversion table</li>
+  <li>freedreno: a2xx: fix builtin blit program compilation</li>
+  <li>freedreno: a2xx: disable PIPE_CAP_PACKED_UNIFORMS</li>
+  <li>freedreno: a2xx: use nir_lower_io for TGSI shaders</li>
+  <li>freedreno: a2xx: enable batch reordering</li>
+  <li>freedreno: a2xx: same gmem2mem sequence for all tiles</li>
+  <li>nir: improve convert_yuv_to_rgb</li>
+  <li>freedreno/ir3: fix input ncomp for vertex shaders</li>
+</ul>
+
+<p>Jordan Justen (22):</p>
+<ul>
+  <li>iris: Set num_uniforms in bytes</li>
+  <li>iris/compute: Set mask bits on PIPELINE_SELECT</li>
+  <li>iris: Add IRIS_DIRTY_CONSTANTS_CS</li>
+  <li>iris: Add iris_restore_compute_saved_bos</li>
+  <li>iris/compute: Add MEDIA_STATE_FLUSH following WALKER</li>
+  <li>iris/compute: Flush compute batches</li>
+  <li>iris/compute: Get group counts from grid-&gt;grid</li>
+  <li>iris/program: Don't try to push ubo ranges for compute</li>
+  <li>iris/compute: Wait on compute batch when mapping</li>
+  <li>iris/compute: Provide binding table entry for gl_NumWorkGroups</li>
+  <li>iris/compute: Flush compute batch on memory-barriers</li>
+  <li>iris/compute: Push subgroup-id</li>
+  <li>iris/compute: Support indirect compute dispatch</li>
+  <li>iris: Emit default L3 config for the render pipeline</li>
+  <li>genxml/gen_bits_header.py: Use regex to strip no alphanum chars</li>
+  <li>genxml: Remove extra space in gen4/45/5 field name</li>
+  <li>iris: Add gitlab-ci build testing</li>
+  <li>iris: Always use in-tree i915_drm.h</li>
+  <li>nir: Add int64/doubles options into nir_shader_compiler_options</li>
+  <li>intel/compiler: Move int64/doubles lowering options</li>
+  <li>scons: Generate float64_glsl.h for glsl_to_nir fp64 lowering</li>
+  <li>intel/genxml: Support base-16 in value &amp; start fields in gen_sort_tags.py</li>
+</ul>
+
+<p>Jose Maria Casanova Crespo (4):</p>
+<ul>
+  <li>iris: Enable ARB_shader_draw_parameters support</li>
+  <li>glsl: fix typos in comments "transfor" -&gt; "transform"</li>
+  <li>glsl: TCS outputs can not be transform feedback candidates on GLES</li>
+  <li>iris: setup EdgeFlag Vertex Element when needed.</li>
+</ul>
+
+<p>José Fonseca (1):</p>
+<ul>
+  <li>scons: Workaround failures with MSVC when using SCons 3.0.[2-4].</li>
+</ul>
+
+<p>Juan A. Suarez Romero (22):</p>
+<ul>
+  <li>anv/cmd_buffer: check for NULL framebuffer</li>
+  <li>nir: move ALU instruction before the jump instruction</li>
+  <li>nir: remove jump from two merging jump-ending blocks</li>
+  <li>genxml: add missing field values for 3DSTATE_SF</li>
+  <li>anv: advertise 8 subpixel precision bits</li>
+  <li>nir/spirv: return after emitting a branch in block</li>
+  <li>anv: destroy descriptor sets when pool gets reset</li>
+  <li>nir: deref only for OpTypePointer</li>
+  <li>anv: advertise 8 subtexel/mipmap precision bits</li>
+  <li>nir/xfb: do not use bare interface type</li>
+  <li>meson: Add dependency on genxml to anvil genfiles</li>
+  <li>Revert "intel/compiler: split is_partial_write() into two variants"</li>
+  <li>spirv: add missing SPV_EXT_descriptor_indexing capabilities</li>
+  <li>radv: enable descriptor indexing capabilities</li>
+  <li>anv: enable descriptor indexing capabilities</li>
+  <li>Update version to 19.1.0-rc1</li>
+  <li>Update version to 19.1.0-rc2</li>
+  <li>cherry-ignore: radeonsi: update buffer descriptors in all contexts after buffer invalidation</li>
+  <li>Update version to 19.1.0-rc3</li>
+  <li>Update version to 19.1.0-rc4</li>
+  <li>Update version to 19.1.0-rc5</li>
+  <li>Update version to 19.1.0</li>
+</ul>
+
+<p>Julien Isorce (5):</p>
+<ul>
+  <li>gallium: add resource_get_info to pipe_screen</li>
+  <li>radeonsi: implement resource_get_info</li>
+  <li>st/va: properly set stride and offset in vlVaDeriveImage</li>
+  <li>r600: implement resource_get_info</li>
+  <li>st/va: check resource_get_info nullity in vlVaDeriveImage</li>
+</ul>
+
+<p>Józef Kucia (3):</p>
+<ul>
+  <li>mesa: Fix GL_NUM_DEVICE_UUIDS_EXT</li>
+  <li>radv: Fix driverUUID</li>
+  <li>radv: clear vertex bindings while resetting command buffer</li>
+</ul>
+
+<p>Karol Herbst (82):</p>
+<ul>
+  <li>nvc0/ir: replace cvt instructions with add to improve shader performance</li>
+  <li>gk104/ir: Use the new rcp/rsq in library</li>
+  <li>gm107/ir: add fp64 rcp</li>
+  <li>gm107/ir: add fp64 rsq</li>
+  <li>gallium: add PIPE_CAP_MAX_VARYINGS</li>
+  <li>st/mesa: require RGBA2, RGB4, and RGBA4 to be renderable</li>
+  <li>glsl_type: initialize offset and location to -1 for glsl_struct_field</li>
+  <li>nir/opt_if: don't mark progress if nothing changes</li>
+  <li>clover: update ICD table to support everything up to 2.2</li>
+  <li>nir: replace magic numbers with M_PI</li>
+  <li>nir/spirv: improve parsing of the memory model</li>
+  <li>nir: add support for address bit sized system values</li>
+  <li>nir/vtn: add support for SpvBuiltInGlobalLinearId</li>
+  <li>nir/spirv: initial handling of OpenCL.std extension opcodes</li>
+  <li>prog_to_nir: fix write from vps to FOG</li>
+  <li>nvc0: print the shader type when dumping headers</li>
+  <li>nv50/ir: move common converter code in base class</li>
+  <li>nv50/ir: add lowering helper</li>
+  <li>nouveau: add support for nir</li>
+  <li>nouveau: fix nir and TGSI shader cache collision</li>
+  <li>nv50/ir/nir: run some passes to make the conversion easier</li>
+  <li>nv50/ir/nir: track defs and provide easy access functions</li>
+  <li>nv50/ir/nir: add nir type helper functions</li>
+  <li>nv50/ir/nir: run assignSlots</li>
+  <li>nv50/ir/nir: add loadFrom and storeTo helpler</li>
+  <li>nv50/ir/nir: parse NIR shader info</li>
+  <li>nv50/ir/nir: implement nir_load_const_instr</li>
+  <li>nv50/ir/nir: add skeleton for nir_intrinsic_instr</li>
+  <li>nv50/ir/nir: implement nir_alu_instr handling</li>
+  <li>nv50/ir/nir: implement nir_intrinsic_load_uniform</li>
+  <li>nv50/ir/nir: implement nir_intrinsic_store_(per_vertex_)output</li>
+  <li>nv50/ir/nir: implement load_(interpolated_)input/output</li>
+  <li>nv50/ir/nir: implement intrinsic_discard(_if)</li>
+  <li>nv50/ir/nir: implement loading system values</li>
+  <li>nv50/ir/nir: implement nir_ssa_undef_instr</li>
+  <li>nv50/ir/nir: implement nir_instr_type_tex</li>
+  <li>nv50/ir/nir: add skeleton getOperation for intrinsics</li>
+  <li>nv50/ir/nir: implement vote and ballot</li>
+  <li>nv50/ir/nir: implement variable indexing</li>
+  <li>nv50/ir/nir: implement geometry shader nir_intrinsics</li>
+  <li>nv50/ir/nir: implement nir_intrinsic_load_ubo</li>
+  <li>nv50/ir/nir: implement ssbo intrinsics</li>
+  <li>nv50/ir/nir: implement images</li>
+  <li>nv50/ir/nir: add memory barriers</li>
+  <li>nv50/ir/nir: implement load_per_vertex_output</li>
+  <li>nv50/ir/nir: implement intrinsic shader_clock</li>
+  <li>nv50/ir/nir: handle user clip planes for each emitted vertex</li>
+  <li>nv50ir/nir: move immediates before use</li>
+  <li>glsl: add packed for struct types</li>
+  <li>glsl: add cl_size and cl_alignment</li>
+  <li>nir/lower_locals_to_regs: cast array index to 32 bit</li>
+  <li>nir/spirv: handle kernel function parameters</li>
+  <li>nir/spirv: support physical pointers</li>
+  <li>nir: add support for gather offsets</li>
+  <li>nv50/ir/nir: support gather offsets</li>
+  <li>nir/lower_tex: Add support for tg4 offsets lowering</li>
+  <li>nir/print: fix printing the image_array intrinsic index</li>
+  <li>nir/validate: validate that tex deref sources are actually derefs</li>
+  <li>v3d: prefer using nir_src_comp_as_int over nir_src_as_const_value</li>
+  <li>panfrost/midgard: use nir_src_is_const and nir_src_as_uint</li>
+  <li>glsl/standalone: add GLES3.1 and GLES3.2 compatibility</li>
+  <li>nir: move brw_nir_rewrite_image_intrinsic into common code</li>
+  <li>glsl_to_nir: handle bindless textures</li>
+  <li>glsl/nir: fetch the type for images from the deref instruction</li>
+  <li>glsl/nir: add support for lowering bindless images_derefs</li>
+  <li>nv50/ir/nir: handle bindless texture</li>
+  <li>nv50/ir/nir: add support for bindless images</li>
+  <li>nvc0/nir: enable bindless texture</li>
+  <li>lima: add bool parameter to type_size function</li>
+  <li>amd/nir: some cleanups</li>
+  <li>radv: use nir constant helpers</li>
+  <li>intel/nir: use nir_src_is_const and nir_src_as_uint</li>
+  <li>freedreno/ir3: use nir_src_as_uint in a few places</li>
+  <li>lima: use nir_src_as_float</li>
+  <li>nir/builder: Move nir_imm_vec2 from blorp into the builder</li>
+  <li>nir/loop_analyze: use nir_const_value.b for boolean results, not u32</li>
+  <li>spirv: reduce array size in vtn_handle_constant</li>
+  <li>nir: make nir_const_value scalar</li>
+  <li>vtn: handle bitcast with pointer src/dest</li>
+  <li>nir: Add a nir_builder_alu variant which takes an array of components</li>
+  <li>nir: Add nir_op_vec helper</li>
+  <li>spirv/cl: support vload/vstore</li>
+</ul>
+
+<p>Kasireddy, Vivek (3):</p>
+<ul>
+  <li>nir/lower_tex: Add support for XYUV lowering</li>
+  <li>dri: Add XYUV8888 format</li>
+  <li>i965: Add support for sampling from XYUV images</li>
+</ul>
+
+<p>Kenneth Graunke (872):</p>
+<ul>
+  <li>st/mesa: Set pipe_image_view::shader_access in PBO readpixels.</li>
+  <li>st/nir: Move varying setup code to a helper function.</li>
+  <li>st/nir: Make new helpers for constructing built-in NIR shaders.</li>
+  <li>st/mesa: Add a NIR version of the drawpixels/bitmap VS copy shader.</li>
+  <li>st/mesa: Add NIR versions of the drawpixels Z/stencil fragment shaders.</li>
+  <li>st/mesa: Add NIR versions of the clear shaders.</li>
+  <li>st/mesa: Add a NIR version of the OES_draw_texture built-in shaders.</li>
+  <li>st/mesa: Add NIR versions of the PBO upload/download shaders.</li>
+  <li>program: Use u_bit_scan64 in prog_to_nir.</li>
+  <li>program: Extend prog_to_nir handle system values.</li>
+  <li>nir: Record info-&gt;fs.pixel_center_integer in lower_system_values</li>
+  <li>compiler: Mark clip/cull distance arrays as compact before lowering.</li>
+  <li>nir: Bail on clip/cull distance lowering if GLSL IR already did it.</li>
+  <li>nir: Avoid clip/cull distance lowering multiple times.</li>
+  <li>nir: Avoid splitting compact arrays into per-element variables.</li>
+  <li>st/nir: Call nir_lower_clip_cull_distance_arrays().</li>
+  <li>gallium: Add a PIPE_CAP_NIR_COMPACT_ARRAYS capability bit.</li>
+  <li>nouveau: Silence unhandled cap warnings</li>
+  <li>st/mesa: Limit GL_MAX_[NATIVE_]PROGRAM_PARAMETERS_ARB to 2048</li>
+  <li>glsl: Allow gl_nir_lower_samplers*() without a gl_shader_program</li>
+  <li>glsl: Don't look at sampler uniform storage for internal vars</li>
+  <li>i965: Call nir_lower_samplers for ARB programs.</li>
+  <li>st/nir: Pull sampler lowering into a helper function.</li>
+  <li>st/nir: Lower sampler derefs for builtin shaders.</li>
+  <li>st/nir: Use sampler derefs in built-in shaders.</li>
+  <li>program: Make prog_to_nir create texture/sampler derefs.</li>
+  <li>nir: Use sampler derefs in drawpixels and bitmap lowering.</li>
+  <li>nir: Gather texture bitmasks in gl_nir_lower_samplers_as_deref.</li>
+  <li>i965: Drop unnecessary 'and' with prog-&gt;SamplerUnits</li>
+  <li>i965: Use info-&gt;textures_used instead of prog-&gt;SamplersUsed.</li>
+  <li>mesa: Advertise EXT_float_blend in ES 3.0+ contexts.</li>
+  <li>anv: Put MOCS in the correct location</li>
+  <li>spirv: Eliminate dead input/output variables after translation.</li>
+  <li>nir: Don't reassociate add/mul chains containing only constants</li>
+  <li>compiler: Make is_64bit(GL_*) helper more broadly available</li>
+  <li>mesa: Align doubles to a 64-bit starting boundary, even if packing.</li>
+  <li>radeonsi: Go back to using llvm.pow intrinsic for nir_op_fpow</li>
+  <li>st/mesa: Copy VP TGSI tokens if they exist, even for NIR shaders.</li>
+  <li>nir: Don't forget if-uses in new nir_opt_dead_cf liveness check</li>
+  <li>iris: Initial commit of a new 'iris' driver for Intel Gen8+ GPUs.</li>
+  <li>iris: viewport state, sort of</li>
+  <li>iris: port over batchbuffer updates</li>
+  <li>iris: initial render state upload</li>
+  <li>iris: packing with valgrind.</li>
+  <li>iris: merge pack</li>
+  <li>iris: initial gpu state, merges</li>
+  <li>iris: RASTER + SF + some CLIP, fix DIRTY vs. NEW</li>
+  <li>iris: scissors</li>
+  <li>iris: SF_CLIP_VIEWPORT</li>
+  <li>iris: Surfaces!</li>
+  <li>iris: sampler views</li>
+  <li>iris: stipples and vertex elements</li>
+  <li>iris: framebuffers</li>
+  <li>iris: don't segfault on !old_cso</li>
+  <li>iris: fix SF_CL length</li>
+  <li>iris: a bit of depth</li>
+  <li>iris: some draw info, vbs, sample mask</li>
+  <li>iris: fix crash - CSO binding can be NULL (when destroying context)</li>
+  <li>iris: COLOR_CALC_STATE</li>
+  <li>iris: sampler states</li>
+  <li>iris: emit 3DSTATE_SAMPLER_STATE_POINTERS</li>
+  <li>iris: basic push constant alloc</li>
+  <li>iris: some program code</li>
+  <li>iris: linear resources</li>
+  <li>iris: maps</li>
+  <li>iris: shader debug log</li>
+  <li>iris: drop unused field</li>
+  <li>iris: make an ice-&gt;render_batch field</li>
+  <li>iris: disable execbuf for now</li>
+  <li>iris: delete iris_pipe.c, shuffle code around</li>
+  <li>iris: init the batch!</li>
+  <li>iris: fix/rework line stipple</li>
+  <li>iris: actually save VBs</li>
+  <li>iris: msaa sample count packing problems</li>
+  <li>iris: fix prim type</li>
+  <li>iris: fix bogus index buffer reference</li>
+  <li>iris: draw-&gt;restart_index is uninitialized if PR is not enabled</li>
+  <li>iris: parse INTEL_DEBUG</li>
+  <li>iris: reworks, FS compile pieces</li>
+  <li>iris: import program cache code</li>
+  <li>iris: do the FS...asserts because we don't lower uniforms yet</li>
+  <li>iris: lower io</li>
+  <li>iris: make iris_batch target a particular ring</li>
+  <li>iris: kill iris_new_batch</li>
+  <li>iris: move MAX defines to iris_batch.h</li>
+  <li>iris: bit of SBA code</li>
+  <li>iris: flag SBA updates when instruction BO changes</li>
+  <li>iris: try and have an iris address</li>
+  <li>iris: so, sba then.</li>
+  <li>iris: reference VB BOs</li>
+  <li>iris: VB addresses</li>
+  <li>iris: DEBUG=bat</li>
+  <li>iris: VB fixes</li>
+  <li>iris: actually APPEND commands, not stomp over the top and never incr</li>
+  <li>iris: actually flush the commands</li>
+  <li>iris: actually advance forward when emitting commands</li>
+  <li>iris: initialize dirty bits to ~0ull</li>
+  <li>iris: hack to stop crashing on samplers for now</li>
+  <li>iris: fix indentation</li>
+  <li>iris: fix assert</li>
+  <li>iris: fix VBs</li>
+  <li>iris: vertex packet fixes</li>
+  <li>iris: fix VF instancing length so we don't get garbage in batch</li>
+  <li>iris: 3DPRIMITIVE fields</li>
+  <li>iris: bind_state -&gt; compute state</li>
+  <li>iris: scissor slots</li>
+  <li>iris: some shader bits</li>
+  <li>iris: promote iris_program_cache_item to iris_compiled_shader</li>
+  <li>iris: actually save derived state</li>
+  <li>iris: emit shader packets</li>
+  <li>iris: convert IRIS_DIRTY_* to #defines</li>
+  <li>iris: don't forget about TE</li>
+  <li>iris: reorganize commands to match brw</li>
+  <li>iris: initial gpu state</li>
+  <li>iris: WM.</li>
+  <li>iris: index buffer BO</li>
+  <li>iris: more comes from bits filled in</li>
+  <li>iris: drop const from prog data parameters</li>
+  <li>iris: softpin some things</li>
+  <li>iris: use vtbl to avoid multiple symbols, fix state base address</li>
+  <li>iris: fix SBA</li>
+  <li>iris: move key pop to state module</li>
+  <li>iris: bits of WM key</li>
+  <li>iris: shuffle comments</li>
+  <li>iris: no NEW_SBA</li>
+  <li>iris: rewrite program cache to use u_upload_mgr</li>
+  <li>iris: actually destroy the cache</li>
+  <li>iris: actually softpin at an address</li>
+  <li>iris: actually set KSP offsets</li>
+  <li>iris: URB configs.</li>
+  <li>iris: dummy constants</li>
+  <li>iris: blend state</li>
+  <li>iris: alpha testing in PSB</li>
+  <li>iris: basic SBE code</li>
+  <li>iris: warning fixes</li>
+  <li>iris: fix silly unused batch with addr macro</li>
+  <li>iris: render targets!</li>
+  <li>iris: don't do samplers for disabled stages</li>
+  <li>iris: smaller blend state</li>
+  <li>iris: actually pin the instruction cache buffers</li>
+  <li>iris: compctrl</li>
+  <li>iris: more sketchy SBE</li>
+  <li>iris: fix dmabuf retval comparisons</li>
+  <li>iris: more SF CL VPs</li>
+  <li>iris: catastrophic state pointer mistake</li>
+  <li>iris: fix extents</li>
+  <li>iris: write DISABLES are not write ENABLES...whoops</li>
+  <li>iris: sample mask...not 0.</li>
+  <li>iris: uniform bits...badly</li>
+  <li>iris: warn if execbuf fails</li>
+  <li>iris: NOOP pad batches correctly</li>
+  <li>iris: decode batches if they fail to submit</li>
+  <li>iris: enable a few more formats</li>
+  <li>iris: set strides on transfers</li>
+  <li>iris: stop adding 9 to our varyings</li>
+  <li>iris: bufmgr updates.</li>
+  <li>iris: some thinking about binding tables</li>
+  <li>iris: Soft-pin the universe</li>
+  <li>iris: fix icache memzone</li>
+  <li>iris: dump gtt offset in dump_validation_list</li>
+  <li>iris: Also set SUPPORTS_48B? Not sure if necessary.</li>
+  <li>iris: more uploaders</li>
+  <li>iris: rewrite to use memzones and not relocs</li>
+  <li>iris: set EXEC_OBJECT_WRITE</li>
+  <li>iris: include p_defines.h in iris_bufmgr.h</li>
+  <li>iris: binders</li>
+  <li>iris: hook up batch decoder</li>
+  <li>iris: binder fixes</li>
+  <li>iris: decoder fixes</li>
+  <li>iris: update vb BO handling now that we have softpin</li>
+  <li>iris: validation dumping improvements</li>
+  <li>iris: canonicalize addresses.</li>
+  <li>iris: delete more trash</li>
+  <li>iris: allocate SURFACE_STATEs up front and stop streaming them</li>
+  <li>iris: same treatment for sampler views</li>
+  <li>iris: assemble SAMPLER_STATE table at bind time</li>
+  <li>iris: fix a scissor bug</li>
+  <li>iris: SBA once at context creation, not per batch</li>
+  <li>iris: TES stash</li>
+  <li>iris: isv freeing fixes</li>
+  <li>iris: set sampler views</li>
+  <li>iris: decoder fixes</li>
+  <li>iris: better BT asserts</li>
+  <li>iris: increase allocator alignment</li>
+  <li>iris: fix index</li>
+  <li>iris: port bug fix from i965</li>
+  <li>iris: fixes from i965</li>
+  <li>iris: fixes</li>
+  <li>iris: crazy pipe control code</li>
+  <li>iris: bo reuse</li>
+  <li>iris: vma fixes - don't free binder address</li>
+  <li>iris: vma - fix assert</li>
+  <li>iris: better SBE</li>
+  <li>iris: fix texturing!</li>
+  <li>iris: Move get_command_space to iris_batch.c</li>
+  <li>iris: Defines for base addresses rather than numbers everywhere</li>
+  <li>iris: pull in newer comments</li>
+  <li>iris: copy over i965's cache tracking</li>
+  <li>iris: move bo_offset_from_sba</li>
+  <li>iris: bits of blorp code</li>
+  <li>iris: more blitting code to make readpixels work</li>
+  <li>iris: drop bogus binder free</li>
+  <li>iris: fix sampler view crashes</li>
+  <li>iris: more blorp</li>
+  <li>iris: fix blorp prog data crashes</li>
+  <li>iris: add INTEL_DEBUG=reemit</li>
+  <li>iris: drop the 48b printout, we never use anything else</li>
+  <li>iris: hacky flushing for now</li>
+  <li>iris: linear staging buffers - fast CPU access...</li>
+  <li>iris: make blorp pin the binder</li>
+  <li>iris: blorp URB</li>
+  <li>iris: no more drawing rectangle in blorp</li>
+  <li>iris: assert surf init</li>
+  <li>iris: some depth stuff :(</li>
+  <li>iris: bump GL version to 4.2</li>
+  <li>iris: uniforms for VS</li>
+  <li>iris: proper length for VE packet?</li>
+  <li>iris: proper # of uniforms</li>
+  <li>iris: properly reject formats, fixes RGB32 rendering with texture float</li>
+  <li>iris: blorp bug fixes</li>
+  <li>iris: delete growing code and just die for now</li>
+  <li>iris: just turn batch reset_and_clear_caches into reset</li>
+  <li>iris: chaining not growing</li>
+  <li>iris: caps</li>
+  <li>iris: fix batch chaining...</li>
+  <li>iris: fix decoding and undo testing code</li>
+  <li>iris: Lower the max number of decoded VBO lines</li>
+  <li>iris: fix whitespace</li>
+  <li>iris: fix 3DSTATE_VERTEX_ELEMENTS length</li>
+  <li>iris: more depth stuffs...</li>
+  <li>iris: fix VF INSTANCING length</li>
+  <li>iris: util_copy_framebuffer_state (ported from Rob's v3d patches)</li>
+  <li>iris: transfers</li>
+  <li>iris: flush always</li>
+  <li>iris: maybe slightly less boats uniforms</li>
+  <li>iris: fix constant packet length to match i965</li>
+  <li>iris: better ubo handling</li>
+  <li>iris: completely rewrite binder</li>
+  <li>iris: have more than one const_offset</li>
+  <li>iris: make surface states for cbufs</li>
+  <li>iris: fill out pull constant buffers</li>
+  <li>iris: fix pull bufs that aren't the first user upload</li>
+  <li>iris: use u_transfer helpers for now</li>
+  <li>iris: better VFI</li>
+  <li>iris: fix release builds</li>
+  <li>iris: drop assert for now</li>
+  <li>iris: disable __gen_validate_value in release mode</li>
+  <li>iris: allow mapped buffers during execution (faster)</li>
+  <li>iris: comment about reemitting and flushing</li>
+  <li>iris: state cleaning</li>
+  <li>iris: untested index buffer upload</li>
+  <li>iris: delete some pointless STATIC_ASSERTS</li>
+  <li>iris: untested SAMPLER_STATE pin BO fix</li>
+  <li>iris: put back the always flush - fixes some things :(</li>
+  <li>iris: save pointers to streamed state resources</li>
+  <li>iris: fix the validation list on new batches</li>
+  <li>iris: flag DIRTY_WM properly</li>
+  <li>iris: bindings dirty tracking</li>
+  <li>iris: some dirty fixes</li>
+  <li>iris: clear dirty</li>
+  <li>iris: plug leaks</li>
+  <li>iris: more leak fixes</li>
+  <li>iris: pc fixes</li>
+  <li>iris: remove 4 bytes of padding in iris_compiled_shader</li>
+  <li>iris: rzalloc iris_compiled_shader so memcmp works even if padding creeps in</li>
+  <li>iris: don't leak sampler state table resources</li>
+  <li>iris: don't leak keyboxes when searching for an existing program</li>
+  <li>iris: indentation</li>
+  <li>iris: use pipe resources not direct BOs</li>
+  <li>iris: clean up some warnings so I can see through the noise</li>
+  <li>iris: print binder utilization in INTEL_DEBUG=submit</li>
+  <li>iris: redo VB CSO a bit</li>
+  <li>iris: print refcounts in INTEL_DEBUG=submit</li>
+  <li>iris: support signed vertex buffer offsets</li>
+  <li>iris: fix major refcounting bug with resources</li>
+  <li>iris: fix caps so tests run again</li>
+  <li>iris: avoid crashing on unbound constant resources</li>
+  <li>iris: emit 3DSTATE_SBE_SWIZ</li>
+  <li>iris: max VP index</li>
+  <li>iris: fix viewport counts and settings</li>
+  <li>iris: fix num viewports to be based on programs</li>
+  <li>iris: fix VP iteration</li>
+  <li>iris: scissor count fixes</li>
+  <li>iris: actually init num_viewports</li>
+  <li>iris: print second batch size separately</li>
+  <li>iris: don't always flush</li>
+  <li>iris: Handle batch submission failure "better"</li>
+  <li>iris: bad inherited comments</li>
+  <li>iris: colorize batchbuffer failures to make them stand out</li>
+  <li>iris: iris - fix QWord aligned endings after batch chaining rework</li>
+  <li>iris: tidy comments about mirroring modes</li>
+  <li>iris: Disable unsupported mirror clamp modes</li>
+  <li>iris: fix fragcoord ytransform</li>
+  <li>iris: better boxing on maps</li>
+  <li>iris: clears</li>
+  <li>iris: rework DEBUG_REEMIT</li>
+  <li>iris: shader dirty bits</li>
+  <li>iris: clear fix</li>
+  <li>iris: fall back to u_generate_mipmap</li>
+  <li>iris: implement copy image</li>
+  <li>iris: lightmodel flat</li>
+  <li>iris: maybe-flush before blorp operations</li>
+  <li>iris: fix provoking vertex ordering</li>
+  <li>iris: larger polygon offset</li>
+  <li>iris: TES uniform fixes</li>
+  <li>iris: geometry shader support</li>
+  <li>iris: don't emit garbage 3DSTATE_VERTEX_BUFFERS when there aren't any</li>
+  <li>iris: fix 3DSTATE_VERTEX_ELEMENTS / VF_INSTANCING for 0 elements</li>
+  <li>iris: fix GS dispatch mode</li>
+  <li>iris: depth clears</li>
+  <li>iris: null surface for unbound textures</li>
+  <li>iris: state ref tuple</li>
+  <li>iris: don't include binder in surface VMA range</li>
+  <li>iris: border color memory zone :(</li>
+  <li>iris: implement border color, fix other sampler nonsense</li>
+  <li>iris: dead pointer</li>
+  <li>iris: just malloc one iris_genx_state instead of a bunch of oddball pieces</li>
+  <li>iris: SBE change stash</li>
+  <li>iris: fix zoffset asserts with 2DArray/Cube</li>
+  <li>iris: rename map-&gt;stride</li>
+  <li>iris: actually set cube bit properly</li>
+  <li>iris: keep DISCARD_RANGE</li>
+  <li>iris: actually handle array layers in blits</li>
+  <li>iris: comment out l/a/i/la</li>
+  <li>iris: fix clip flagging on fb changes</li>
+  <li>iris: fix depth bounds clamp enables</li>
+  <li>iris: don't crash on shader perf logs</li>
+  <li>iris: slab allocate transfers</li>
+  <li>iris: rearrange iris_resource.h</li>
+  <li>iris: Implement 3DSTATE_SO_DECL_LIST</li>
+  <li>iris: SO buffers</li>
+  <li>iris: streamout</li>
+  <li>iris: set even if no outputs</li>
+  <li>iris: bother setting program_string_id...</li>
+  <li>iris: fix SO_DECL_LIST</li>
+  <li>iris: actually pin the buffers</li>
+  <li>iris: fix sample mask for MSAA-off</li>
+  <li>iris: disable 6x MSAA support</li>
+  <li>iris: multislice transfer maps</li>
+  <li>iris: fix CC_VIEWPORT</li>
+  <li>iris: draw indirect support?</li>
+  <li>iris: save query type</li>
+  <li>iris: bits of multisample program key</li>
+  <li>iris: s/hwcso/state/g</li>
+  <li>iris: bind state helper function</li>
+  <li>iris: NOS mechanics</li>
+  <li>iris: record FS NOS</li>
+  <li>iris: fix crash</li>
+  <li>iris: fix sampler views of TBOs</li>
+  <li>iris: fix texture buffer stride</li>
+  <li>iris: TES program key inputs</li>
+  <li>iris: compile a TCS...don't bother with passthrough yet</li>
+  <li>iris: don't emit SO_BUFFERS and SO_DECL_LIST unless streamout is enabled</li>
+  <li>iris: vertex ID, instance ID</li>
+  <li>iris: fix SGVS when there are no valid vertex elements</li>
+  <li>iris: fill out MAX_PATCH_VERTICES</li>
+  <li>iris: assert about passthrough shaders to make this easier to detect</li>
+  <li>iris: fix EmitNoIndirect</li>
+  <li>iris: fix Z24</li>
+  <li>iris: reemit blend state for alpha test function changes</li>
+  <li>iris: point sprite enables</li>
+  <li>iris: hack around samples confusion</li>
+  <li>iris: fix blorp filters</li>
+  <li>iris: expose more things that we already support</li>
+  <li>iris: fix msaa flipping filters</li>
+  <li>iris: export get_shader_info</li>
+  <li>iris: implement set_shader_buffers</li>
+  <li>iris: emit binding table for atomic counters and SSBOs</li>
+  <li>iris: shorten loop</li>
+  <li>iris: unbind compiled shaders if none are present</li>
+  <li>iris: fix TBO alignment to match 965</li>
+  <li>iris: enable SSBOs</li>
+  <li>iris: fix SSBO indexing</li>
+  <li>iris: fix for disabling ssbos</li>
+  <li>iris: update bindings when changing programs</li>
+  <li>iris: drop unused bo parameter</li>
+  <li>iris: implement texture/memory barriers</li>
+  <li>iris: Don't reserve new binding table section unless things are dirty</li>
+  <li>iris: update a todo comment</li>
+  <li>iris: BIG OL' HACK for UBO updates</li>
+  <li>iris: enable texture gather</li>
+  <li>iris: Avoid croaking when trying to create FBO surfaces with bad formats</li>
+  <li>iris: fix GS output component limit</li>
+  <li>iris: drop pipe_shader_state</li>
+  <li>iris: fix sample mask</li>
+  <li>iris: cube arrays are cubes too</li>
+  <li>iris: we don't support textureGatherOffsets, need it lowered</li>
+  <li>iris: add minor comments</li>
+  <li>iris: comment everything</li>
+  <li>iris: sync bugfixes from brw_bufmgr</li>
+  <li>iris: remember to set bo-&gt;userptr</li>
+  <li>iris: rename ring to engine</li>
+  <li>iris: simplify batch len qword alignment</li>
+  <li>iris: get angry about execbuf failures</li>
+  <li>iris: fill out more caps</li>
+  <li>iris: depth or stencil fixes</li>
+  <li>iris: clear stencil</li>
+  <li>iris: actually emit stencil packets</li>
+  <li>iris: allow S8 as a stencil format</li>
+  <li>iris: WTF transfers</li>
+  <li>iris: use u_transfer_helper for depth stencil packing/unpacking</li>
+  <li>iris: drop stencil handling now that u_transfer_helper does it</li>
+  <li>iris: refcounting, who needs it?</li>
+  <li>iris: actually do stencil blits</li>
+  <li>iris: say no to more formats</li>
+  <li>iris: deal with Marek's new MSAA caps</li>
+  <li>iris: we can do multisample Z resolves</li>
+  <li>iris: Convert RGBX to RGBA for rendering.</li>
+  <li>iris: disallow RGB32 formats too</li>
+  <li>iris: Fix tiled memcpy for cubes...and for array slices</li>
+  <li>iris: blorp blit multiple slices</li>
+  <li>iris: assert depth is 1 in resource_copy_region</li>
+  <li>iris: call maybe_flush for each blorp operation</li>
+  <li>iris: implement ARB_clear_texture</li>
+  <li>iris: last VUE map NOS, handle &gt; 16 FS inputs</li>
+  <li>iris: drop dead assignments</li>
+  <li>iris: drop pwrite</li>
+  <li>iris: port non-bucket alignment bugfix</li>
+  <li>iris: don't emit SBE all the time</li>
+  <li>iris: rename pipe to base</li>
+  <li>iris: Drop bogus sampler state saving</li>
+  <li>iris: move iris_shader_state from ice-&gt;shaders.state to ice-&gt;state.shaders</li>
+  <li>iris: Move things to iris_shader_state</li>
+  <li>iris: Move iris_sampler_view declaration to iris_resource.h</li>
+  <li>iris: track depth/stencil writes enabled</li>
+  <li>iris: use consistent copyright formatting</li>
+  <li>iris: Move cache tracking to iris_resolve.c</li>
+  <li>iris: proper cache tracking</li>
+  <li>iris: precompute hashes for cache tracking</li>
+  <li>iris: Reduce binder alignment from 64 to 32</li>
+  <li>iris: reenable R32G32B32 texture buffers</li>
+  <li>iris: z_res -&gt; s_res</li>
+  <li>iris: implement get_sample_position</li>
+  <li>iris: fix line-aa-width</li>
+  <li>iris: try to hack around binder issue</li>
+  <li>iris: fix sampler state setting</li>
+  <li>iris: big old hack for tex-miplevel-selection</li>
+  <li>iris: use linear for 1D textures</li>
+  <li>iris: handle level/layer in direct maps</li>
+  <li>iris: fix crash when binding optional shader for the first time</li>
+  <li>iris: Skip primitive ID overrides if the shader wrote a custom value</li>
+  <li>iris: fix blend state memcpy</li>
+  <li>iris: new caps</li>
+  <li>iris: use Eric's new caps helper</li>
+  <li>iris: Allow inlining of require/get_command_space</li>
+  <li>iris: skip over whole function if dirty == 0</li>
+  <li>iris: don't unconditionally emit 3DSTATE_VF / 3DSTATE_VF_TOPOLOGY</li>
+  <li>iris: fix constant buffer 0 to be absolute</li>
+  <li>iris: set EXEC_OBJECT_CAPTURE on all driver internal buffers</li>
+  <li>iris: fix null FB and unbound tex surface state addresses</li>
+  <li>iris: Support multiple binder BOs, update Surface State Base Address</li>
+  <li>iris: fix SO offset writes for multiple streams</li>
+  <li>iris: update comments for multibinder</li>
+  <li>iris: move binder pinning outside the dirty == 0 check</li>
+  <li>iris: re-pin binding table contents if we didn't re-emit them</li>
+  <li>iris: enable ARB_enhanced_layouts</li>
+  <li>iris: refactor LRIs in context setup</li>
+  <li>iris: initialize "don't suck" bits, as Ben likes to call them</li>
+  <li>iris: totally untested icelake support</li>
+  <li>iris: refactor program CSO stuff</li>
+  <li>iris: silence const warning</li>
+  <li>iris: fix context restore of 3DSTATE_CONSTANT ranges</li>
+  <li>iris: properly re-pin stencil buffers</li>
+  <li>iris: delete bogus comment</li>
+  <li>iris: inherit the index buffer properly</li>
+  <li>iris: use 0 for TCS passthrough program string ID</li>
+  <li>iris: rw_bo for pipe controls</li>
+  <li>iris: LRM/SRM/SDI hooks</li>
+  <li>iris: initial query code</li>
+  <li>iris: gen10+ workarounds and break fix</li>
+  <li>iris: results write</li>
+  <li>iris: flush batch when asking for result via QBO</li>
+  <li>iris: fix random failures via CS stall...but why?</li>
+  <li>iris: gpr0 to bool</li>
+  <li>iris: play chicken with timer queries for now</li>
+  <li>iris: pipeline stats</li>
+  <li>iris: primitives generated query support</li>
+  <li>iris: drop explicit pinning</li>
+  <li>iris: timestamps</li>
+  <li>iris: ...and SO prims emitted queries</li>
+  <li>iris: glGet timestamps, more correct timestamps</li>
+  <li>iris: Need to | 1 when asking for timestamps</li>
+  <li>iris: 36-bit overflow fixes</li>
+  <li>iris: early return properly</li>
+  <li>iris: better query file comment</li>
+  <li>iris: magic number 36 -&gt; #define</li>
+  <li>iris: Enable ARB_shader_vote</li>
+  <li>iris: just mark snapshots_landed from the CPU</li>
+  <li>iris: drop a bunch of pipe_sampler_state stuff we don't need</li>
+  <li>iris: vma_free bo-&gt;size, not bo_size</li>
+  <li>iris: don't mark contains_draw = false when chaining batches</li>
+  <li>iris: fix Z32_S8 depth sampling</li>
+  <li>iris: stencil texturing</li>
+  <li>iris: force persample interp cap</li>
+  <li>iris: pipe to scs -&gt; iris_pipe.h</li>
+  <li>iris: inline stage_from_pipe to avoid unused warnings</li>
+  <li>iris: add gen11 to genX_call</li>
+  <li>iris: Allow PIPE_CONTROL with Stall at Scoreboard and RT flush</li>
+  <li>iris: rework format translation apis</li>
+  <li>iris: Use R/RG instead of I/L/A when sampling</li>
+  <li>iris: enable I/L formats</li>
+  <li>iris: X32_S8X24 :/</li>
+  <li>iris: set the binding table size</li>
+  <li>iris: lower storage image derefs</li>
+  <li>iris: implement set_shader_images hook</li>
+  <li>iris: bother with BTIs</li>
+  <li>iris: set image access correctly</li>
+  <li>iris: actually set image access</li>
+  <li>iris: null for non-existent cbufs</li>
+  <li>iris: move images next to textures in binding table</li>
+  <li>iris: advertise GL_ARB_shader_texture_image_samples</li>
+  <li>iris: Enable fb fetch</li>
+  <li>iris: initial compute caps</li>
+  <li>iris: yes</li>
+  <li>iris: drop dead format //'s</li>
+  <li>iris: drop XXX's about swizzling</li>
+  <li>iris: little bits of compute basics</li>
+  <li>iris: drop XXX that Jordan handled</li>
+  <li>iris: drop unnecessary #ifdefs</li>
+  <li>iris: leave XXX about unnecessary binding table uploads</li>
+  <li>iris: bail if SLM is needed</li>
+  <li>iris: fix whitespace</li>
+  <li>iris: XXX for compute state tracking :/</li>
+  <li>iris: rewrite grid surface handling</li>
+  <li>iris: better dirty checking</li>
+  <li>iris: don't let render/compute contexts stomp each other's dirty bits</li>
+  <li>iris: hack to avoid memorybarriers out the wazoo</li>
+  <li>iris: do PIPELINE_SELECT for render engine, add flushes, GLK hacks</li>
+  <li>iris: fix SBA flushing by refactoring code</li>
+  <li>iris: try and avoid pointless compute submissions</li>
+  <li>iris: fix UBOs with bindings that have an offset</li>
+  <li>iris: flag CC_VIEWPORT when changing num viewports</li>
+  <li>iris: fix SF_CLIP_VIEWPORT array indexing with multiple VPs</li>
+  <li>iris: Fix texture buffer / image buffer sizes.</li>
+  <li>iris: Clamp UBO and SSBO access to the actual BO size, for safety</li>
+  <li>iris: Move snapshots_landed to the front.</li>
+  <li>iris: Fix off by one in scissoring, empty scissors, default scissors</li>
+  <li>iris: Fall back to 1x1x1 null surface if no framebuffer supplied</li>
+  <li>iris: SO_DECL_LIST fix</li>
+  <li>iris: Fix refcounting of grid surface</li>
+  <li>iris: delete dead code</li>
+  <li>iris: fix overhead regression from "don't stomp each other's dirty bits"</li>
+  <li>iris: allow binding a null vertex buffer</li>
+  <li>iris: Flag constants dirty on program changes</li>
+  <li>iris: Disable a PIPE_CONTROL workaround on Icelake</li>
+  <li>iris: Enable ARB_shader_stencil_export</li>
+  <li>iris: Enable A8/A16_UNORM in an inefficient manner</li>
+  <li>iris: Drop B5G5R5X1 support</li>
+  <li>iris: Use at least 1x1 size for null FB surface state.</li>
+  <li>iris: Cross-link iris_batches so they can potentially flush each other</li>
+  <li>iris: cross batch flushing</li>
+  <li>iris: Don't leak the compute batch</li>
+  <li>iris: Actually create/destroy HW contexts</li>
+  <li>iris: Enable msaa_map transfer helpers</li>
+  <li>iris: tidy more warnings</li>
+  <li>iris: implement scratch space!</li>
+  <li>iris: Fix MSAA smooth points</li>
+  <li>iris: Fix TextureBarrier</li>
+  <li>iris: Fix multiple RTs with non-independent blending</li>
+  <li>iris: partial set_query_active_state</li>
+  <li>iris: Print the batch name when decoding</li>
+  <li>iris: Clone the NIR</li>
+  <li>iris: Defer cbuf0 upload to draw time</li>
+  <li>iris: drop unnecessary param[] setup from iris_setup_uniforms</li>
+  <li>iris: add param domain defines</li>
+  <li>iris: fill out params array with built-ins, like clip planes</li>
+  <li>iris: only bother with params if there are any...</li>
+  <li>iris: lower user clip planes</li>
+  <li>iris: hook up key stuff for clip plane lowering</li>
+  <li>iris: fix system value remapping</li>
+  <li>iris: dodge backend UCP lowering</li>
+  <li>iris: bypass params and do it ourselves</li>
+  <li>iris: actually upload clip planes.</li>
+  <li>iris: fix num clip plane consts</li>
+  <li>iris: fix more uniform setup</li>
+  <li>iris: drop iris_setup_push_uniform_range</li>
+  <li>iris: enable push constants if we have sysvals but no uniforms</li>
+  <li>iris: regather info so we get CLIP_DIST slots, not CLIP_VERTEX</li>
+  <li>iris: don't support pull constants.</li>
+  <li>iris: don't trip on param asserts</li>
+  <li>iris: drop param stuffs</li>
+  <li>iris: don't forget to upload CS consts</li>
+  <li>iris: fix sysval only binding tables</li>
+  <li>iris: only clip lower if there's something to clip against</li>
+  <li>iris: leave another TODO</li>
+  <li>iris: Fix SourceAlphaBlendFactor</li>
+  <li>iris: "Fix" transfer maps of buffers</li>
+  <li>iris: Fix independent alpha blending.</li>
+  <li>iris: more TODO</li>
+  <li>iris: scissored and mirrored blits</li>
+  <li>iris: more todo notes</li>
+  <li>iris: Fix TCS/TES slot unification</li>
+  <li>iris: properly pin stencil buffers</li>
+  <li>iris: Fix SLM</li>
+  <li>iris: Use iris_use_pinned_bo rather than add_exec_bo directly</li>
+  <li>iris: Combine iris_use_pinned_bo and add_exec_bo</li>
+  <li>iris: Avoid cross-batch synchronization on read/reads</li>
+  <li>iris: Avoid synchronizing due to the workaround BO</li>
+  <li>iris: replace vestiges of fence fds with newer exec_fence API</li>
+  <li>iris: Drop vestiges of throttling code</li>
+  <li>iris: Hang on to the last batch's sync-point, so we can wait on it</li>
+  <li>iris: Add wait fences to properly sync between render/compute</li>
+  <li>iris: leave a TODO</li>
+  <li>iris: flush the compute batch too if border pool is redone</li>
+  <li>iris: put render batch first in fence code</li>
+  <li>iris: Put batches in an array</li>
+  <li>iris: PIPE_CONTROL workarounds for GPGPU mode</li>
+  <li>iris: RT flush for memorybarrier with texture bit</li>
+  <li>iris: update comment</li>
+  <li>iris: Enable ctx-&gt;Const.UseSTD430AsDefaultPacking</li>
+  <li>iris: Lie about indirects</li>
+  <li>iris: Fix buffer -&gt; buffer copy_region</li>
+  <li>iris: Fix VIEWPORT/LAYER in stream output info</li>
+  <li>iris: Do the 48-bit vertex buffer address invalidation workaround</li>
+  <li>iris: drop long dead XXX comment</li>
+  <li>iris: Track a binding history for buffer resources</li>
+  <li>iris: add iris_flush_and_dirty_for_history</li>
+  <li>iris: Flush for history at various moments</li>
+  <li>iris: Re-pin even if nothing is dirty</li>
+  <li>iris: fix prototype warning</li>
+  <li>iris: export iris_upload_shader</li>
+  <li>iris: fix comment location</li>
+  <li>iris: Use wrappers for create_xs_state rather than a switch statement</li>
+  <li>iris: rework program cache interface</li>
+  <li>iris: Enable precompiles</li>
+  <li>iris: Use program's num textures not the state tracker's bound</li>
+  <li>iris: drop pull constant binding table entry</li>
+  <li>iris: add assertions about binding table starts</li>
+  <li>iris: add an extra BT assert from Chris Wilson</li>
+  <li>iris: actually flush for storage images</li>
+  <li>iris: fix some SO overflow query bugs and tidy the code a bit</li>
+  <li>iris: drop key_size_for_cache</li>
+  <li>iris: for BLORP, only use the predicate enable bit when USE_BIT</li>
+  <li>iris: check query first</li>
+  <li>iris: fix conditional compute, don't stomp predicate for pipelined queries</li>
+  <li>iris: Rework tiling/modifiers handling</li>
+  <li>iris: Fix failed to compile TCS message</li>
+  <li>iris: Destroy transfer helper on screen teardown</li>
+  <li>iris: Destroy the border color pool</li>
+  <li>iris: Unref unbound_tex resource</li>
+  <li>iris: Fix IRIS_MEMZONE_COUNT to exclude the border color pool</li>
+  <li>iris: Destroy the bufmgr</li>
+  <li>iris: Stop leaking iris_uncompiled_shaders like mad</li>
+  <li>iris: move some non-buffer case code in a bit</li>
+  <li>iris: Don't bother considering if the underlying surface is a cube</li>
+  <li>iris: fix alpha channel for RGB BC1 formats</li>
+  <li>iris: fix dma buf import strides</li>
+  <li>iris: CS stall for stream out -&gt; VB</li>
+  <li>iris: make clipper statistics dynamic</li>
+  <li>iris: reject all clipping when we can't use streamout render disabled</li>
+  <li>iris: omask can kill</li>
+  <li>iris: reemit SBE when sprite coord origin changes</li>
+  <li>iris: re-pin inherited streamout buffers</li>
+  <li>iris: Fix NOS mechanism</li>
+  <li>iris: fix overhead regression from flushing for storage images</li>
+  <li>iris: fix set_sampler_views to not unbind, be better about bounds</li>
+  <li>iris: Fix set_sampler_views with start &gt; 0</li>
+  <li>iris: Replace num_textures etc with a bitmask we can scan</li>
+  <li>iris: Drop continues in resolve</li>
+  <li>iris: Fix clear dimensions</li>
+  <li>iris: Clamp viewport extents to the framebuffer dimensions</li>
+  <li>iris: Enable guardband clipping</li>
+  <li>iris: Fix primitive generated query active flag</li>
+  <li>iris: Always do rasterizer discard in clipper</li>
+  <li>iris: override alpha to one src1 blend factors</li>
+  <li>iris: handle PatchVerticesIn as a system value.</li>
+  <li>iris: rewrite set_vertex_buffer and VB handling</li>
+  <li>iris: Reorder LRR parameters to have dst first.</li>
+  <li>iris: Add _MI_ALU helpers that don't paste</li>
+  <li>iris: Don't bother packing 3DSTATE_SO_BUFFER at create time</li>
+  <li>iris: Move iris_stream_output_target def to iris_context.h</li>
+  <li>iris: only get space for one offset in stream output targets</li>
+  <li>iris: Implement DrawTransformFeedback()</li>
+  <li>iris: drop unnecessary genx-&gt;streamout field</li>
+  <li>iris: Fix for PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET</li>
+  <li>iris: Fix the prototype for iris_bo_alloc_tiled</li>
+  <li>iris: don't print the pointer in INTEL_DEBUG=submit</li>
+  <li>iris: Use a surface state fill helper</li>
+  <li>iris: Make a alloc_surface_state helper</li>
+  <li>iris: whitespace fixes</li>
+  <li>iris: Track blend enables, save outbound for resolve code</li>
+  <li>iris: always pin the binder...in the compute context, too.</li>
+  <li>iris: delete finished comments</li>
+  <li>iris: pin and re-pin the scratch BO</li>
+  <li>iris: more dead comments</li>
+  <li>iris: only mark depth/stencil as writable if writes are actually enabled</li>
+  <li>iris: better MOCS</li>
+  <li>iris: Fix scratch space allocation on Icelake.</li>
+  <li>iris: Only resolve inputs for actual shader stages</li>
+  <li>iris: Add a more long term TODO about timebase scaling</li>
+  <li>iris: Fix compute scratch pinning</li>
+  <li>iris: Delete bogus comment about cube array counting.</li>
+  <li>iris: Fix framebuffer layer count</li>
+  <li>iris: Don't enable push constants just because there are system values</li>
+  <li>iris: Don't make duplicate system values</li>
+  <li>iris: Fill out brw_image_params for storage images on Broadwell</li>
+  <li>iris: Fix surface states for Gen8 lowered-to-untype images</li>
+  <li>iris: Leave a comment about why Broadwell images are broken</li>
+  <li>iris: Implement multi-slice copy_region</li>
+  <li>iris: Flush the render cache in flush_and_dirty_for_history</li>
+  <li>iris: Handle PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE somewhat</li>
+  <li>iris: Don't check other batches for our batch BO</li>
+  <li>iris: Drop a dead comment</li>
+  <li>iris: Delete genx-&gt;bound_vertex_buffers</li>
+  <li>iris: Fix Broadwell WaDividePSInvocationCountBy4</li>
+  <li>iris: Use new PIPE_STAT_QUERY enums rather than hardcoded numbers.</li>
+  <li>iris: Switch to the new PIPELINE_STATISTICS_QUERY_SINGLE capability</li>
+  <li>iris: fail to create screen for older unsupported HW</li>
+  <li>iris: Allow sample mask of 0</li>
+  <li>iris: Don't enable smooth points when point sprites are enabled</li>
+  <li>iris: Assert about blits with color masking</li>
+  <li>iris: Pay attention to blit masks</li>
+  <li>iris: CS stall on VF cache invalidate workarounds</li>
+  <li>iris: Fix SO issue with INTEL_DEBUG=reemit, set fewer bits</li>
+  <li>iris: Don't whack SO dirty bits when finishing a BLORP op</li>
+  <li>iris: Fix memzone_for_address for the surface and binder zones</li>
+  <li>iris: Do binder address allocations per-context, not globally.</li>
+  <li>iris: Zero the compute predicate when changing the render condition</li>
+  <li>iris: Remap stream output indexes back to VARYING_SLOT_*.</li>
+  <li>iris: Enable PIPE_CAP_COMPACT_ARRAYS</li>
+  <li>iris: Drop comment about ISP_DIS</li>
+  <li>iris: Drop dead state_size hash table</li>
+  <li>iris: Unreference some more things on state module teardown</li>
+  <li>iris: minor tidying</li>
+  <li>iris: Fix bug in bound vertex buffer tracking</li>
+  <li>iris: Implement ALT mode for ARB_{vertex,fragment}_shader</li>
+  <li>iris: Add a timeout_nsec parameter, rename check_syncpt to wait_syncpt</li>
+  <li>iris: Fix accidental busy-looping in query waits</li>
+  <li>iris: Use READ_ONCE and WRITE_ONCE for snapshots_landed</li>
+  <li>iris: Make a iris_batch_reference_signal_syncpt helper function.</li>
+  <li>iris: Add PIPE_CAP_MAX_VARYINGS</li>
+  <li>iris: rework num textures to util_lastbit</li>
+  <li>iris: Stop chopping off the first nine characters of the renderer string</li>
+  <li>iris: Drop XXX about alpha testing</li>
+  <li>iris: Set 3DSTATE_WM::ForceThreadDispatchEnable</li>
+  <li>iris: Set HasWriteableRT correctly</li>
+  <li>iris: Drop XXX about checking for swizzling</li>
+  <li>iris: Move create and bind driver hooks to the end of iris_program.c</li>
+  <li>iris: Make an IRIS_MAX_MIPLEVELS define</li>
+  <li>iris: Simplify iris_get_depth_stencil_resources</li>
+  <li>iris: Add missing depth cache flushes</li>
+  <li>iris: Always emit at least one BLEND_STATE</li>
+  <li>iris: Add iris_resource fields for aux surfaces</li>
+  <li>iris: Fill out res-&gt;aux.possible_usages</li>
+  <li>iris: Fill out SURFACE_STATE entries for each possible aux usage</li>
+  <li>iris: create aux surface if needed</li>
+  <li>iris: Initial import of resolve code</li>
+  <li>iris: blorp using resolve hooks</li>
+  <li>iris: add some draw resolve hooks</li>
+  <li>iris: actually use the multiple surf states for aux modes</li>
+  <li>iris: try to fix copyimage vs copybuffers</li>
+  <li>iris: be sure to skip buffers in resolve code</li>
+  <li>iris: resolve before transfer maps</li>
+  <li>iris: pin the buffers</li>
+  <li>iris: store modifier info in res</li>
+  <li>iris: Make blit code use actual aux usages</li>
+  <li>iris: consider framebuffer parameter for aux usages</li>
+  <li>iris: Resolves for compute</li>
+  <li>iris: disable aux for external things</li>
+  <li>iris: some initial HiZ bits</li>
+  <li>iris: don't use hiz for MSAA buffers</li>
+  <li>iris: Set program key fields for MCS</li>
+  <li>iris: make surface states for CCS_D too</li>
+  <li>iris: do flush for buffers still</li>
+  <li>iris: Allow disabling aux via INTEL_DEBUG options</li>
+  <li>iris: Fix aux usage in render resolve code</li>
+  <li>iris: Only resolve compute resources for compute shaders</li>
+  <li>iris: Enable auxiliary buffer support</li>
+  <li>iris: Enable -msse2 and -mstackrealign</li>
+  <li>Revert "iris: Enable auxiliary buffer support"</li>
+  <li>vulkan: Fix 32-bit build for the new overlay layer</li>
+  <li>mesa: Fix RGBBuffers for renderbuffers with sized internal formats</li>
+  <li>iris: Drop RGBX -&gt; RGBA for storage image usages</li>
+  <li>iris: Properly allow rendering to RGBX formats.</li>
+  <li>i965: Implement threaded GL support.</li>
+  <li>tgsi_to_nir: use sampler variables and derefs</li>
+  <li>iris: Fix MOCS for blits and clears</li>
+  <li>isl: Add a swizzle parameter to isl_buffer_fill_state()</li>
+  <li>iris: Plumb through ISL_SWIZZLE_IDENTITY in buffer surface emitters</li>
+  <li>iris: Defer uploading sampler state tables until draw time</li>
+  <li>iris: Properly support alpha and luminance-alpha formats</li>
+  <li>iris: Drop PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY</li>
+  <li>iris: Spruce up "are we using this engine?" checks for flushing</li>
+  <li>iris: Export a copy_region helper that doesn't flush</li>
+  <li>iris: Use copy_region and staging resources to avoid transfer stalls</li>
+  <li>Revert MR 369 (Fix extract_i8 and extract_u8 for 64-bit integers)</li>
+  <li>iris: Fix backface stencil write condition</li>
+  <li>iris: Rework default tessellation level uploads</li>
+  <li>iris: Fix TES gl_PatchVerticesIn handling.</li>
+  <li>iris: Move depth/stencil flushes so they actually do something</li>
+  <li>iris: Refactor depth/stencil buffer pinning into a helper.</li>
+  <li>iris: Fix write enable in pinning of depth/stencil resources</li>
+  <li>i965: Move some genX infrastructure to genX_boilerplate.h.</li>
+  <li>i965: Rename ISP_DIS to INDIRECT_STATE_POINTERS_DISABLE.</li>
+  <li>i965: Use genxml for emitting PIPE_CONTROL.</li>
+  <li>i965: Reimplement all the PIPE_CONTROL rules.</li>
+  <li>intel/fs: Fix opt_peephole_csel to not throw away saturates.</li>
+  <li>iris: Don't mutate box in transfer map code</li>
+  <li>iris: Don't flush the batch for unsynchronized mappings</li>
+  <li>iris: Slightly better bounds on buffer sizes</li>
+  <li>gallium: Add PIPE_BARRIER_UPDATE_BUFFER and UPDATE_TEXTURE bits.</li>
+  <li>nvc0: Skip new update barrier bits</li>
+  <li>nir: Record non-vector/scalar varyings as unmovable when compacting</li>
+  <li>iris: Fix util_vma_heap_init size for IRIS_MEMZONE_SHADER</li>
+  <li>iris: Skip input resolve handling if bindings haven't changed</li>
+  <li>iris: Skip framebuffer resolve tracking if framebuffer isn't dirty</li>
+  <li>iris: Skip resolves and flushes altogether if unnecessary</li>
+  <li>iris: Fix batch chaining map_next increment.</li>
+  <li>iris: Actually advertise some modifiers</li>
+  <li>st/nir: Free the GLSL IR after linking.</li>
+  <li>st/mesa: Fix blitting from GL_DEPTH_STENCIL to GL_STENCIL_INDEX</li>
+  <li>iris: Fix blits with S8_UINT destination</li>
+  <li>iris: Print the memzone name when allocating BOs with INTEL_DEBUG=buf</li>
+  <li>iris: Save/restore MI_PREDICATE_RESULT, not MI_PREDICATE_DATA.</li>
+  <li>iris: Silence unused variable warnings in release mode</li>
+  <li>gallium/util: Add const to u_range_intersect</li>
+  <li>iris: Actually pin the scratch BO.</li>
+  <li>glsl: Set location on structure-split sampler uniform variables</li>
+  <li>intel: Emit 3DSTATE_VF_STATISTICS dynamically</li>
+  <li>iris: Actually mark blorp_copy_buffer destinations as written.</li>
+  <li>iris: Preserve all PIPE_TRANSFER flags in xfer-&gt;usage</li>
+  <li>iris: Fix FLUSH_EXPLICIT handling with staging buffers.</li>
+  <li>iris: Make shader_perf_log print to stderr if INTEL_DEBUG=perf is set</li>
+  <li>i965: Move program key debugging to the compiler.</li>
+  <li>iris: Print the reason for shader recompiles.</li>
+  <li>iris: Move iris_debug_recompile calls before uploading.</li>
+  <li>iris: Change vendor and renderer strings</li>
+  <li>iris: Add texture cache flushing hacks for blit and resource_copy_region</li>
+  <li>iris: Be less aggressive at postdraw work skipping</li>
+  <li>iris: Add mechanism for iris-specific driconf options</li>
+  <li>iris: Enable the dual_color_blend_by_location driconf option.</li>
+  <li>iris: Track bound and writable SSBOs</li>
+  <li>Revert "glsl: Set location on structure-split sampler uniform variables"</li>
+  <li>i965: Ignore uniform storage for samplers or images, use binding info</li>
+  <li>i965: Tidy bogus indentation left by previous commit</li>
+  <li>iris: Mark constants dirty on transfer unmap even if no flushes occur</li>
+  <li>iris: Track bound constant buffers</li>
+  <li>iris: Rework UBOs and SSBOs to use pipe_shader_buffer</li>
+  <li>iris: Rework image views to store pipe_image_view.</li>
+  <li>iris: Make a gl_shader_stage -&gt; pipe_shader_stage helper function</li>
+  <li>iris: Make memzone_for_address non-static</li>
+  <li>iris: Replace buffer backing storage and rebind to update addresses.</li>
+  <li>iris: Make a resource_is_busy() helper</li>
+  <li>iris: Track valid data range and infer unsynchronized mappings.</li>
+  <li>iris: Make some offset math helpers take a const isl_surf pointer</li>
+  <li>iris: Fix DrawTransformFeedback math when there's a buffer offset</li>
+  <li>iris: Prefer staging blits when destination supports CCS_E.</li>
+  <li>iris: Actually put Mesa in GL_RENDERER string</li>
+  <li>iris: Split iris_flush_and_dirty_for_history into two helpers.</li>
+  <li>iris: Enable GL_AMD_depth_clamp_separate</li>
+  <li>iris: Advertise EXT_texture_sRGB_R8 support</li>
+  <li>iris: Some tidying for preemption support</li>
+  <li>iris: Silence unused function warning</li>
+  <li>iris: Fix zeroing of transform feedback offsets in strange cases.</li>
+  <li>glsl/list: Add an exec_list_is_singular() helper.</li>
+  <li>nir: Add a new nir_cf_list_is_empty_block() helper.</li>
+  <li>intel/fs: Don't emit empty ELSE blocks.</li>
+  <li>iris: Set XY Clipping correctly.</li>
+  <li>iris: Only enable GL_AMD_depth_clamp_separate on Gen9+</li>
+  <li>iris: Fix imageBuffer and PBO download.</li>
+  <li>iris: Disable dual source blending when shader doesn't handle it</li>
+  <li>iris: Resolve textures used by the program, not merely bound textures</li>
+  <li>iris: Fix 4GB memory zone heap sizes.</li>
+  <li>iris: leave the top 4Gb of the high heap VMA unused</li>
+  <li>iris: Force VMA alignment to be a multiple of the page size.</li>
+  <li>iris: Delete bucketing allocators</li>
+  <li>i965: Fix BRW_MEMZONE_LOW_4G heap size.</li>
+  <li>i965: Force VMA alignment to be a multiple of the page size.</li>
+  <li>i965: leave the top 4Gb of the high heap VMA unused</li>
+  <li>i965: Fix memory leaks in brw_upload_cs_work_groups_surface().</li>
+  <li>iris: Use full ways for L3 cache setup on Icelake.</li>
+  <li>egl/x11: calloc dri2_surf so it's properly zeroed</li>
+</ul>
+
+<p>Kevin Strasser (1):</p>
+<ul>
+  <li>egl/dri: Avoid out of bounds array access</li>
+</ul>
+
+<p>Khaled Emara (1):</p>
+<ul>
+  <li>freedreno: PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT unreachable statement</li>
+</ul>
+
+<p>Khem Raj (1):</p>
+<ul>
+  <li>winsys/svga/drm: Include sys/types.h</li>
+</ul>
+
+<p>Kishore Kadiyala (1):</p>
+<ul>
+  <li>android: static link with libexpat with Android O+</li>
+</ul>
+
+<p>Konstantin Kharlamov (1):</p>
+<ul>
+  <li>mapi: work around GCC LTO dropping assembly-defined functions</li>
+</ul>
+
+<p>Kristian Høgsberg (49):</p>
+<ul>
+  <li>st/nir: Use src/ relative include path for autotools</li>
+  <li>freedreno/a6xx: Emit blitter dst with OUT_RELOCW</li>
+  <li>freedreno/a6xx: Use tiling for all resources</li>
+  <li>freedreno/a6xx: regen headers</li>
+  <li>freedreno/a6xx: Drop render condition check in blitter</li>
+  <li>freedreno: Log number of draw for sysmem passes</li>
+  <li>freedreno/a6xx: Use the right resource for separate stencil stride</li>
+  <li>freedreno/a6xx: Combine emit_blit and fd6_blit</li>
+  <li>freedreno: Consolidate u_blitter functions in freedreno_blitter.c</li>
+  <li>freedreno: Don't tell the blitter what it can't do</li>
+  <li>freedreno/a6xx: Move blit check so as to restore comment</li>
+  <li>freedreno/a6xx: Support some depth/stencil blits on blitter</li>
+  <li>freedreno/a6xx: Support y-inverted blits</li>
+  <li>freedreno/a6xx: Add format argument to fd6_tex_swiz()</li>
+  <li>freedreno/a6xx: Fall back to masked RGBA blits for depth/stencil</li>
+  <li>freedreno/a6xx: Clean up mixed use of swap and swizzle for texture state</li>
+  <li>freedreno/a6xx: Update headers</li>
+  <li>freedreno/a6xx: Front facing needs UNK3 bit</li>
+  <li>freedreno/a6xx: Fix point coord</li>
+  <li>.mailmap: Add a few more alises for myself</li>
+  <li>freedreno: Update headers</li>
+  <li>freedreno/a6xx: Copy stencil as R8_UINT</li>
+  <li>freedreno/a6xx: Support MSAA resolve blits on blitter</li>
+  <li>freedreno/a6xx: Only output MRT control for used framebuffers</li>
+  <li>freedreno/a6xx: Don't zero SO buffer addresses</li>
+  <li>freedreno: Fix a couple of warnings</li>
+  <li>turnip: Only get bo offset when we need to mmap</li>
+  <li>freedreno: Use c_vis_args and no_override_init_args</li>
+  <li>freedreno/a6xx: Remove extra parens</li>
+  <li>freedreno/ir3: Track whether shader needs derivatives</li>
+  <li>freedreno/ir3: Fix operand order for DSX/DSY</li>
+  <li>st/glsl_to_nir: Calculate num_uniforms from NumParameterValues</li>
+  <li>freedreno/ir3: Enable PIPE_CAP_PACKED_UNIFORMS</li>
+  <li>freedreno/ir3: Push UBOs to constant file</li>
+  <li>freedreno/ir3: Don't access beyond available regs</li>
+  <li>freedreno/ir3: Add workaround for VS samgq</li>
+  <li>freedreno/ir3: Mark ir3_context_error() as NORETURN</li>
+  <li>freedreno/a2xx: Fix redundant if statement</li>
+  <li>freedreno: Use enum values from matching enum</li>
+  <li>freedreno/a6xx: Add helper for incrementing regid</li>
+  <li>freedreno: Fix format string warning</li>
+  <li>.gitignore: Remove autotool artifacts</li>
+  <li>tgsi: Mark tgsi_strings_check() unused</li>
+  <li>glsl_to_nir: Initialize debug variable</li>
+  <li>nir_opcodes.py: Saturate to expression that doesn't overflow</li>
+  <li>ralloc: Fully qualify non-virtual destructor call</li>
+  <li>egl/dri2: Mark potentially unused 'display' variable with MAYBE_UNUSED</li>
+  <li>gallium/auxiliary/vl: Fix a couple of warnings</li>
+  <li>freedreno/drm: Quiet pointer to u64 conversion warning</li>
+</ul>
+
+<p>Leo Liu (6):</p>
+<ul>
+  <li>st/va: fix the incorrect max profiles report</li>
+  <li>st/va/vp9: set max reference as default of VP9 reference number</li>
+  <li>vl/dri3: remove the wait before getting back buffer</li>
+  <li>radeon/vcn: add H.264 constrained baseline support</li>
+  <li>radeon/vcn/vp9: search the render target from the whole list</li>
+  <li>winsys/amdgpu: add VCN JPEG to no user fence group</li>
+</ul>
+
+<p>Lepton Wu (2):</p>
+<ul>
+  <li>virgl: close drm fd when destroying virgl screen.</li>
+  <li>virgl: Set bind when creating temp resource.</li>
+</ul>
+
+<p>Lionel Landwerlin (127):</p>
+<ul>
+  <li>anv: assert that color attachment are valid</li>
+  <li>radv: assert that colorAttachment is valid for CmdClearAttachment</li>
+  <li>i965: scale factor changes should trigger recompile</li>
+  <li>vulkan: Update the XML and headers to 1.1.101</li>
+  <li>anv: implement VK_EXT_depth_clip_enable</li>
+  <li>build: move imgui out of src/intel/tools to be reused</li>
+  <li>imgui: bump copy</li>
+  <li>imgui: make sure our copy of imgui doesn't clash with others in the same process</li>
+  <li>vulkan: add an overlay layer</li>
+  <li>intel: fix urb size for CFL GT1</li>
+  <li>anv: add support for INTEL_DEBUG=bat</li>
+  <li>Revert "anv: add support for INTEL_DEBUG=bat"</li>
+  <li>intel/aub_viewer: printout 48bits addresses</li>
+  <li>intel/aub_viewer: silence compiler warning</li>
+  <li>intel/aub_viewer: silence more compiler warnings</li>
+  <li>vulkan/overlay: fix missing installation of layer</li>
+  <li>vulkan/overlay: fix includes</li>
+  <li>imgui: update commit</li>
+  <li>imgui: update memory editor</li>
+  <li>vulkan/overlay: install layer binary in libdir</li>
+  <li>intel/compiler: use correct swizzle for replacement</li>
+  <li>vulkan/overlay: fix min/max computations</li>
+  <li>vulkan/overlay: rework option parsing</li>
+  <li>vulkan/overlay: add support for fps output in file</li>
+  <li>anv: add support for INTEL_DEBUG=bat</li>
+  <li>vulkan: update headers/registry to 1.1.102</li>
+  <li>anv: update supported patch version</li>
+  <li>radv: set num_components on vulkan_resource_index intrinsic</li>
+  <li>vulkan/util: make header available from c++</li>
+  <li>vulkan/util: generate instance/device dispatch tables</li>
+  <li>vulkan/overlay: drop dependency on validation layer headers</li>
+  <li>intel/decoders: add address space indicator to get BOs</li>
+  <li>intel/decoders: handle decoding MI_BBS from ring</li>
+  <li>intel/decoders: limit number of decoded batchbuffers</li>
+  <li>intel/aub_read: reuse defines from gen_context</li>
+  <li>intel/aub_write: split comment section from HW setup</li>
+  <li>intel/aub_write: write header in init</li>
+  <li>intel/aub_write: break execlist write in 2</li>
+  <li>intel/aub_write: switch to use i915_drm engine classes</li>
+  <li>intel/aub_write: log mmio writes</li>
+  <li>intel/aub_write: store the physical page allocator in struct</li>
+  <li>intel/aub_write: turn context images arrays into functions</li>
+  <li>intel/aub_write: factorize context image/pphwsp/ring creation</li>
+  <li>iris: fix decoder call</li>
+  <li>iris: fix decode_get_bo callback</li>
+  <li>intel/error2aub: build a list of BOs before writing them</li>
+  <li>intel/error2aub: identify buffers by engine</li>
+  <li>intel/error2aub: strenghten batchbuffer identifier marker</li>
+  <li>intel/error2aub: parse other buffer types</li>
+  <li>intel/error2aub: annotate buffer with their address space</li>
+  <li>intel/error2aub: store engine last ring buffer head/tail pointers</li>
+  <li>intel/error2aub: write GGTT buffers into the aub file</li>
+  <li>intel/error2aub: add a verbose option</li>
+  <li>intel/error2aub: deal with GuC log buffer</li>
+  <li>intel/error2aub: support older style engine names</li>
+  <li>vulkan: factor out wsi dependencies</li>
+  <li>anv: implement VK_EXT_pipeline_creation_feedback</li>
+  <li>vulkan/overlay: properly register layer object with loader</li>
+  <li>vulkan/overlay: silence validation layer warnings</li>
+  <li>vulkan/overlay: check return value of swapchain get images</li>
+  <li>vulkan/overlay: improve error reporting</li>
+  <li>i965: perf: sklgt2: update a priority for register programming</li>
+  <li>i965: perf: sklgt2: update compute metrics config</li>
+  <li>i965: perf: sklgt2: update memory write config</li>
+  <li>i965: perf: add PMA stall metrics</li>
+  <li>i965: perf: chv: fixup counters names</li>
+  <li>i965: perf: hsw: drop register programming not needed on HSW</li>
+  <li>i965: perf: sklgt2: drop programming of an unused NOA register</li>
+  <li>i965: perf: add Icelake metrics</li>
+  <li>i965: perf: enable Icelake metrics</li>
+  <li>i965: perf: add ring busyness metric for cfl gt2</li>
+  <li>i965: perf: update render basic configs for big core gen9/gen10</li>
+  <li>anv: implement VK_KHR_swapchain revision 70</li>
+  <li>intel: add dependency on genxml generated files</li>
+  <li>genxml: add a sorting script</li>
+  <li>genxml: sort xml files using new script</li>
+  <li>anv: don't use default pipeline cache for hits for VK_EXT_pipeline_creation_feedback</li>
+  <li>anv: store heap address bounds when initializing physical device</li>
+  <li>anv: leave the top 4Gb of the high heap VMA unused</li>
+  <li>i965: store device revision in gen_device_info</li>
+  <li>i965: extract performance query metrics</li>
+  <li>i965: move mdapi data structure to intel/perf</li>
+  <li>i965: move OA accumulation code to intel/perf</li>
+  <li>i965: move brw_timebase_scale to device info</li>
+  <li>i965: move mdapi result data format to intel/perf</li>
+  <li>i965: move mdapi guid into intel/perf</li>
+  <li>intel/perf: stub gen10/11 missing definitions</li>
+  <li>i965: perf: add mdapi pipeline statistics queries on gen10/11</li>
+  <li>intel/perf: drop counter size field</li>
+  <li>intel/perf: constify accumlator parameter</li>
+  <li>iris: implement WaEnableStateCacheRedirectToCS</li>
+  <li>i965: implement WaEnableStateCacheRedirectToCS</li>
+  <li>anv: implement WaEnableStateCacheRedirectToCS</li>
+  <li>anv: fix uninitialized pthread cond clock domain</li>
+  <li>intel/devinfo: fix missing num_thread_per_eu on ICL</li>
+  <li>intel/devinfo: add basic sanity tests on device database</li>
+  <li>anv: limit URB reconfigurations when using blorp</li>
+  <li>intel: workaround VS fixed function issue on Gen9 GT1 parts</li>
+  <li>anv: fix argument name for vkCmdEndQuery</li>
+  <li>i965: fix icelake performance query enabling</li>
+  <li>Revert "anv: limit URB reconfigurations when using blorp"</li>
+  <li>vulkan/util: generate a helper function to return pNext struct sizes</li>
+  <li>vulkan/overlay: update help printout</li>
+  <li>vulkan/overlay: record stats in command buffers and accumulate on exec/submit</li>
+  <li>vulkan/overlay: add pipeline statistic &amp; timestamps support</li>
+  <li>vulkan/overlay: add no display option</li>
+  <li>vulkan/overlay: add a margin to the size of the window</li>
+  <li>vulkan/overlay: record all select metrics into output file</li>
+  <li>vulkan/overlay: add a frame counter option</li>
+  <li>vulkan/overlay: make overlay size configurable</li>
+  <li>vulkan/overlay: make overriden functions static</li>
+  <li>vulkan/overlay: add TODO list</li>
+  <li>anv: fix crash when application does not provide push constants</li>
+  <li>anv: rework queries writes to ensure ordering memory writes</li>
+  <li>anv: fix use after free</li>
+  <li>anv: Use corresponding type from the vector allocation</li>
+  <li>vulkan/overlay: keep allocating draw data until it can be reused</li>
+  <li>nir: fix lower_non_uniform_access pass</li>
+  <li>vulkan/overlay-layer: fix cast errors</li>
+  <li>vulkan/overlay: fix truncating error on 32bit platforms</li>
+  <li>nir: lower_non_uniform_access: iterate over instructions safely</li>
+  <li>vulkan/overlay: fix timestamp query emission with no pipeline stats</li>
+  <li>vulkan: fix build dependency issue with generated files</li>
+  <li>anv: fix apply_pipeline_layout pass for arrays of YCbCr descriptors</li>
+  <li>nir/lower_non_uniform: safely iterate over blocks</li>
+  <li>intel/perf: fix EuThreadsCount value in performance equations</li>
+  <li>intel/perf: improve dynamic loading config detection</li>
+</ul>
+
+<p>Lubomir Rintel (3):</p>
+<ul>
+  <li>kmsro: Extend to include armada-drm</li>
+  <li>gallivm: guess CPU features also on ARM</li>
+  <li>gallivm: disable NEON instructions if they are not supported</li>
+</ul>
+
+<p>Lucas Stach (3):</p>
+<ul>
+  <li>etnaviv: don't flush own context when updating resource use</li>
+  <li>etnaviv: flush all pending contexts when accessing a resource with the CPU</li>
+  <li>etnaviv: only try to construct scanout resource when on KMS winsys</li>
+</ul>
+
+<p>Marek Olšák (121):</p>
+<ul>
+  <li>radeonsi: enable dithered alpha-to-coverage for better quality</li>
+  <li>radeonsi: merge &amp; rename texture BO metadata functions</li>
+  <li>radeonsi: unify error paths in si_texture_create_object</li>
+  <li>winsys/amdgpu: remove amdgpu_drm.h definitions</li>
+  <li>r600: add -Wstrict-overflow=0 to meson to silence the warning</li>
+  <li>radeonsi: fix a comment typo in si_fine_fence_set</li>
+  <li>gallium: allow more PIPE_RESOURCE_ driver flags</li>
+  <li>meson: drop the xcb-xrandr version requirement</li>
+  <li>radeonsi: handle render_condition_enable in si_compute_clear_render_target</li>
+  <li>radeonsi: fix crashing performance counters (division by zero)</li>
+  <li>radeonsi: initialize textures using DCC to black when possible</li>
+  <li>radeonsi: clear allocator_zeroed_memory with SDMA</li>
+  <li>radeonsi: make allocator_zeroed_memory unmappable and use bigger buffers</li>
+  <li>radeonsi: don't leak an index buffer if draw_vbo fails</li>
+  <li>radeonsi: use local ws variable in si_need_dma_space</li>
+  <li>gallium/u_threaded: fix EXPLICIT_FLUSH for flush offsets &gt; 0</li>
+  <li>radeonsi: fix EXPLICIT_FLUSH for flush offsets &gt; 0</li>
+  <li>winsys/amdgpu: don't drop manually added fence dependencies</li>
+  <li>winsys/amdgpu: unify fence list code</li>
+  <li>winsys/amdgpu: use a separate fence list for syncobjs</li>
+  <li>winsys/amdgpu: remove occurence of INDIRECT_BUFFER_CONST</li>
+  <li>winsys/amdgpu: clean up IB buffer size computation</li>
+  <li>winsys/amdgpu: cs_check_space sets the minimum IB size for future IBs</li>
+  <li>radeonsi: add AMD_DEBUG env var as an alternative to R600_DEBUG</li>
+  <li>radeonsi: use MEM instead of MEM_GRBM in COPY_DATA.DST_SEL</li>
+  <li>radeonsi: add driconf option radeonsi_enable_nir</li>
+  <li>radeonsi: always enable NIR for Civilization 6 to fix corruption</li>
+  <li>driconf: add Civ6Sub executable for Civilization 6</li>
+  <li>st/mesa: always unmap the uploader in st_atom_array.c</li>
+  <li>gallium/u_threaded: always unmap const_uploader</li>
+  <li>gallium/u_upload_mgr: allow use of FLUSH_EXPLICIT with persistent mappings</li>
+  <li>radeonsi: use SDMA for uploading data through const_uploader</li>
+  <li>tgsi: don't set tgsi_info::uses_bindless_images for constbufs and hw atomics</li>
+  <li>radeonsi: always use compute rings for clover on CI and newer (v2)</li>
+  <li>gallium/u_tests: use a compute-only context to test GCN compute ring</li>
+  <li>gallium: add pipe_grid_info::last_block</li>
+  <li>omx: clean up enc_LoadImage_common</li>
+  <li>omx: add a compute path in enc_LoadImage_common</li>
+  <li>radeonsi: fix assertion failure by using the correct type</li>
+  <li>mesa: implement ARB/KHR_parallel_shader_compile</li>
+  <li>gallium: implement ARB/KHR_parallel_shader_compile</li>
+  <li>util/queue: move thread creation into a separate function</li>
+  <li>util/queue: add ability to kill a subset of threads</li>
+  <li>util/queue: hold a lock when reading num_threads in util_queue_finish</li>
+  <li>util/queue: add util_queue_adjust_num_threads</li>
+  <li>radeonsi: implement ARB/KHR_parallel_shader_compile callbacks</li>
+  <li>radeonsi: don't use PFP_SYNC_ME with compute-only contexts</li>
+  <li>docs/relnotes: document parallel_shader_compile changes in 19.1.0, not 19.0.0</li>
+  <li>amd/addrlib: fix uninitialized values for Addr2ComputeDccAddrFromCoord</li>
+  <li>radeonsi/gfx9: add support for PIPE_ALIGNED=0</li>
+  <li>radeonsi: add ability to bind images as image buffers</li>
+  <li>radeonsi: add support for displayable DCC for 1 RB chips</li>
+  <li>radeonsi: add support for displayable DCC for multi-RB chips</li>
+  <li>radeonsi: enable displayable DCC on Ravens</li>
+  <li>gallium: add writable_bitmask parameter into set_shader_buffers</li>
+  <li>glsl: remember which SSBOs are not read-only and pass it to gallium</li>
+  <li>radeonsi: set exact shader buffer read/write usage in CS</li>
+  <li>tegra: fix the build after the set_shader_buffers change</li>
+  <li>radeonsi: fix a crash when unbinding sampler states</li>
+  <li>glsl: fix shader_storage_blocks_write_access for SSBO block arrays</li>
+  <li>Revert "glsl: fix shader_storage_blocks_write_access for SSBO block arrays"</li>
+  <li>glsl: allow the #extension directive within code blocks for the dri option</li>
+  <li>mesa: don't overwrite existing shader files with MESA_SHADER_CAPTURE_PATH</li>
+  <li>radeonsi: set AC_FUNC_ATTR_READNONE for image opcodes where it was missing</li>
+  <li>ac: use the common helper ac_apply_fmask_to_sample</li>
+  <li>ac: fix incorrect bindless atomic code in visit_image_atomic</li>
+  <li>radeonsi: enable GL_EXT_shader_image_load_formatted</li>
+  <li>nir: optimize gl_SampleMaskIn to gl_HelperInvocation for radeonsi when possible</li>
+  <li>winsys/amdgpu: don't set GTT with GDS &amp; OA placements on APUs</li>
+  <li>radeonsi/gfx9: use the correct condition for the DPBB + QUANT_MODE workaround</li>
+  <li>radeonsi: use CP DMA for the null const buffer clear on CIK</li>
+  <li>tgsi/scan: add uses_drawid</li>
+  <li>ac: add radeon_info::marketing_name, replacing the winsys callback</li>
+  <li>ac: add radeon_info::is_pro_graphics</li>
+  <li>ac: add ac_get_i1_sgpr_mask</li>
+  <li>ac: add REWIND and GDS registers to register headers</li>
+  <li>winsys/amdgpu: make IBs writable and expose their address</li>
+  <li>winsys/amdgpu: reorder chunks, make BO_HANDLES first, IB and FENCE last</li>
+  <li>winsys/amdgpu: enable chaining for compute IBs</li>
+  <li>winsys/amdgpu: clean up and remove nonsensical assertion</li>
+  <li>radeonsi: add si_cp_copy_data</li>
+  <li>radeonsi: add helper si_get_minimum_num_gfx_cs_dwords</li>
+  <li>radeonsi: delay adding BOs at the beginning of IBs until the first draw</li>
+  <li>gallium: document conservative rasterization flags</li>
+  <li>st/dri: simplify throttling code</li>
+  <li>gallium: replace DRM_CONF_THROTTLE with PIPE_CAP_MAX_FRAMES_IN_FLIGHT</li>
+  <li>gallium: replace DRM_CONF_SHARE_FD with PIPE_CAP_DMABUF</li>
+  <li>gallium: replace drm_driver_descriptor::configuration with driconf_xml</li>
+  <li>gallium: set PIPE_CAP_MAX_FRAMES_IN_FLIGHT to 2 for all drivers</li>
+  <li>gallium: add PIPE_CAP_PREFER_COMPUTE_BLIT_FOR_MULTIMEDIA</li>
+  <li>util: fix a compile failure in u_compute.c on windows</li>
+  <li>mesa: enable glGet for EXT_gpu_shader4</li>
+  <li>glsl: add `unsigned int` type for EXT_GPU_shader4</li>
+  <li>glsl: apply some 1.30 and other rules to EXT_gpu_shader4 as well</li>
+  <li>glsl: add builtin variables for EXT_gpu_shader4</li>
+  <li>glsl: add arithmetic builtin functions for EXT_gpu_shader4</li>
+  <li>glsl: add texture builtin functions for EXT_gpu_shader4</li>
+  <li>glsl: allow "varying out" for fragment shader outputs with EXT_gpu_shader4</li>
+  <li>mesa: expose EXT_texture_buffer_object</li>
+  <li>mesa: only allow EXT_gpu_shader4 in the compatibility profile</li>
+  <li>st/mesa: expose EXT_gpu_shader4 if GLSL 1.40 is supported</li>
+  <li>glsl: handle interactions between EXT_gpu_shader4 and texture extensions</li>
+  <li>radeonsi: add BOs after need_cs_space</li>
+  <li>radeonsi/gfx9: set that window_rectangles always roll the context</li>
+  <li>radeonsi/gfx9: rework the gfx9 scissor bug workaround (v2)</li>
+  <li>radeonsi: remove dirty slot masks from scissor and viewport states</li>
+  <li>glsl: fix shader_storage_blocks_write_access for SSBO block arrays (v2)</li>
+  <li>radeonsi: don't ignore PIPE_FLUSH_ASYNC</li>
+  <li>mesa: rework error handling in glDrawBuffers</li>
+  <li>mesa: fix pbuffers because internally they are front buffers</li>
+  <li>st/mesa: don't flush the front buffer if it's a pbuffer</li>
+  <li>radeonsi: use new atomic LLVM helpers</li>
+  <li>radeonsi: set sampler state and view functions for compute-only contexts</li>
+  <li>st/dri: decrease input lag by syncing sooner in SwapBuffers</li>
+  <li>glsl: fix and clean up NV_compute_shader_derivatives support</li>
+  <li>st/mesa: fix 2 crashes in st_tgsi_lower_yuv</li>
+  <li>radeonsi: remove old_va parameter from si_rebind_buffer by remembering offsets</li>
+  <li>radeonsi: update buffer descriptors in all contexts after buffer invalidation</li>
+  <li>radeonsi: fix a regression in si_rebind_buffer</li>
+  <li>u_blitter: don't fail mipmap generation for depth formats containing stencil</li>
+  <li>ac: fix a typo in ac_build_wg_scan_bottom</li>
+</ul>
+
+<p>Mario Kleiner (1):</p>
+<ul>
+  <li>drirc: Add sddm-greeter to adaptive_sync blacklist.</li>
+</ul>
+
+<p>Mark Janes (5):</p>
+<ul>
+  <li>mesa: properly report the length of truncated log messages</li>
+  <li>mesa: rename logging functions to reflect that they format strings</li>
+  <li>mesa: add logging function for formatted string</li>
+  <li>intel/common: move gen_debug to intel/dev</li>
+  <li>intel/tools: Remove redundant definitions of INTEL_DEBUG</li>
+</ul>
+
+<p>Mateusz Krzak (2):</p>
+<ul>
+  <li>panfrost: cast bo_handles pointer to uintptr_t first</li>
+  <li>panfrost: use os_mmap and os_munmap</li>
+</ul>
+
+<p>Mathias Fröhlich (22):</p>
+<ul>
+  <li>st/mesa: Reduce array updates due to current changes.</li>
+  <li>mesa: Track buffer object use also for VAO usage.</li>
+  <li>st/mesa: Invalidate the gallium array atom only if needed.</li>
+  <li>mesa: Implement helper functions to map and unmap a VAO.</li>
+  <li>mesa: Factor out _mesa_array_element.</li>
+  <li>mesa: Use _mesa_array_element in dlist save.</li>
+  <li>mesa: Replace _ae_{,un}map_vbos with _mesa_vao_{,un}map_arrays</li>
+  <li>mesa: Remove _ae_{,un}map_vbos and dependencies.</li>
+  <li>mesa: Use mapping tools in debug prints.</li>
+  <li>vbo: Fix basevertex handling in display list compiles.</li>
+  <li>vbo: Fix GL_PRIMITIVE_RESTART_FIXED_INDEX in display list compiles.</li>
+  <li>mesa: Add assert to _mesa_primitive_restart_index.</li>
+  <li>mesa: Factor out index function that will have multiple use.</li>
+  <li>mesa: Use glVertexAttrib*NV functions for fixed function attribs.</li>
+  <li>mesa: Implement _mesa_array_element by walking enabled arrays.</li>
+  <li>mesa: Rip out now unused gl_context::aelt_context.</li>
+  <li>mesa: Remove the now unused _NEW_ARRAY state change flag.</li>
+  <li>mesa: Constify static const array in api_arrayelt.c</li>
+  <li>mesa: Remove the _glapi_table argument from _mesa_array_element.</li>
+  <li>mesa: Set CurrentSavePrimitive in vbo_save_NotifyBegin.</li>
+  <li>mesa: Correct the is_vertex_position decision for dlists.</li>
+  <li>mesa: Leave aliasing of vertex and generic0 attribute to the dlist code.</li>
+</ul>
+
+<p>Matt Turner (7):</p>
+<ul>
+  <li>intel/compiler/test: Set devinfo-&gt;gen = 7</li>
+  <li>intel/compiler: Avoid propagating inequality cmods if types are different</li>
+  <li>intel/compiler/test: Add unit test for mismatched signedness comparison</li>
+  <li>intel/compiler: Add commas on final values of compaction table arrays</li>
+  <li>intel/compiler: Use SIMD16 instructions in fs saturate prop unit test</li>
+  <li>intel/compiler: Add unit tests for sat prop for different exec sizes</li>
+  <li>intel/compiler: Improve fix_3src_operand()</li>
+</ul>
+
+<p>Matthias Lorenz (1):</p>
+<ul>
+  <li>vulkan/overlay: Add fps counter</li>
+</ul>
+
+<p>Mauro Rossi (6):</p>
+<ul>
+  <li>android: intel/isl: remove redundant building rules</li>
+  <li>android: anv: fix generated files depedencies (v2)</li>
+  <li>android: anv: fix libexpat shared dependency</li>
+  <li>android: nouveau: add support for nir</li>
+  <li>android: fix LLVM version string related building errors</li>
+  <li>draw: fix building error in draw_gs_init()</li>
+</ul>
+
+<p>Maya Rashish (1):</p>
+<ul>
+  <li>configure: fix test portability</li>
+</ul>
+
+<p>Michel Dänzer (19):</p>
+<ul>
+  <li>loader/dri3: Use strlen instead of sizeof for creating VRR property atom</li>
+  <li>gitlab-ci: Re-use docker image from the main repo in forked repos</li>
+  <li>gitlab-ci: List some longer-running jobs before others of the same stage</li>
+  <li>gitlab-ci: Use 8 CPU cores in autotools job</li>
+  <li>gitlab-ci: Make sure clang job actually uses ccache</li>
+  <li>gitlab-ci: Only pull/push cache contents in build+test stage jobs</li>
+  <li>gitlab-ci: Automatically retry jobs after runner system failure</li>
+  <li>gitlab-ci: Run CI pipeline for all branches in the main repository</li>
+  <li>gitlab-ci: Use Debian stretch instead of Ubuntu bionic</li>
+  <li>gitlab-ci: Use HTTPS for APT repositories</li>
+  <li>gitlab-ci: Use Debian packages instead of pip ones for meson and scons</li>
+  <li>gitlab-ci: Install most packages from Debian buster</li>
+  <li>gitlab-ci: Remove unneded (stuff from) APT command lines</li>
+  <li>gitlab-ci: Remove unused Debian packages from Docker image</li>
+  <li>gitlab-ci: Use clang 8 instead of 7</li>
+  <li>gitlab-ci: Drop unused clang 5/6 packages</li>
+  <li>gitlab-ci: Do not use subshells for compiling dependencies</li>
+  <li>gitlab-ci: Use LLVM 3.4 from Debian jessie for scons-llvm job</li>
+  <li>gitlab-ci: Use meson buildtype debug instead of default debugoptimized</li>
+</ul>
+
+<p>Mike Blumenkrantz (6):</p>
+<ul>
+  <li>iris: support INTEL_NO_HW environment variable</li>
+  <li>gallium: add pipe cap for inner_coverage conservative raster mode</li>
+  <li>st/mesa: indicate intel extension support for inner_coverage based on cap</li>
+  <li>iris: add support for INTEL_conservative_rasterization</li>
+  <li>iris: add preemption support on gen9</li>
+  <li>iris: enable preemption support for gen10</li>
+</ul>
+
+<p>Nanley Chery (3):</p>
+<ul>
+  <li>i965: Rename intel_mipmap_tree::r8stencil_* -&gt; ::shadow_*</li>
+  <li>anv: Fix some depth buffer sampling cases on ICL+</li>
+  <li>anv/cmd_buffer: Initalize the clear color struct for CNL+</li>
+</ul>
+
+<p>Nataraj Deshpande (1):</p>
+<ul>
+  <li>anv: Fix check for isl_fmt in assert</li>
+</ul>
+
+<p>Neha Bhende (2):</p>
+<ul>
+  <li>st/mesa: Fix topogun-1.06-orc-84k-resize.trace crash</li>
+  <li>draw: fix memory leak introduced 7720ce32a</li>
+</ul>
+
+<p>Nicolai Hähnle (9):</p>
+<ul>
+  <li>amd/surface: provide firstMipIdInTail for metadata surface calculations</li>
+  <li>radeonsi: add si_debug_options for convenient adding/removing of options</li>
+  <li>util/u_log: flush auto loggers before starting a new page</li>
+  <li>ddebug: set thread name</li>
+  <li>ddebug: log calls to pipe-&gt;flush</li>
+  <li>ddebug: dump driver state into a separate file</li>
+  <li>ddebug: expose some helper functions as non-inline</li>
+  <li>radeonsi: add radeonsi_aux_debug option for aux context debug dumps</li>
+  <li>radeonsi: add radeonsi_sync_compile option</li>
+</ul>
+
+<p>Oscar Blumberg (3):</p>
+<ul>
+  <li>intel/fs: Fix memory corruption when compiling a CS</li>
+  <li>radeonsi: Fix guardband computation for large render targets</li>
+  <li>glsl: Fix function return typechecking</li>
+</ul>
+
+<p>Patrick Lerda (1):</p>
+<ul>
+  <li>lima/ppir: fix pointer referenced after a free</li>
+</ul>
+
+<p>Patrick Rudolph (1):</p>
+<ul>
+  <li>d3dadapter9: Support software renderer on any DRI device</li>
+</ul>
+
+<p>Philipp Zabel (1):</p>
+<ul>
+  <li>etnaviv: fill missing offset in etna_resource_get_handle</li>
+</ul>
+
+<p>Pierre Moreau (12):</p>
+<ul>
+  <li>include/CL: Update to the latest OpenCL 2.2 headers</li>
+  <li>clover: Avoid warnings from new OpenCL headers</li>
+  <li>clover: Remove the TGSI backend as unused</li>
+  <li>clover: Add an helper for checking if an IR is supported</li>
+  <li>clover/api: Rework the validation of devices for building</li>
+  <li>clover/api: Fail if trying to build a non-executable binary</li>
+  <li>clover: Disallow creating libraries from other libraries</li>
+  <li>clover: Validate program and library linking options</li>
+  <li>clover: Move device extensions definitions to core/device.cpp</li>
+  <li>clover: Move platform extensions definitions to clover/platform.cpp</li>
+  <li>clover: Only use devices supporting IR_NATIVE</li>
+  <li>clover: Fix indentation issues</li>
+</ul>
+
+<p>Pierre-Eric Pelloux-Prayer (1):</p>
+<ul>
+  <li>radeonsi: init sctx-&gt;dma_copy before using it</li>
+</ul>
+
+<p>Plamena Manolova (3):</p>
+<ul>
+  <li>i965: Disable ARB_fragment_shader_interlock for platforms prior to GEN9</li>
+  <li>isl: Set ClearColorConversionEnable.</li>
+  <li>i965: Re-enable fast color clears for GEN11.</li>
+</ul>
+
+<p>Qiang Yu (9):</p>
+<ul>
+  <li>u_math: add ushort_to_float/float_to_ushort</li>
+  <li>u_dynarray: add util_dynarray_grow_cap</li>
+  <li>gallium/u_vbuf: export u_vbuf_get_minmax_index</li>
+  <li>drm-uapi: add lima_drm.h</li>
+  <li>gallium: add lima driver</li>
+  <li>lima/gpir: fix compile fail when two slot node</li>
+  <li>lima/gpir: fix alu check miss last store slot</li>
+  <li>lima: fix lima_blit with non-zero level source resource</li>
+  <li>lima: fix render to non-zero level texture</li>
+</ul>
+
+<p>Rafael Antognolli (45):</p>
+<ul>
+  <li>iris: Store internal_format when getting resource from handle.</li>
+  <li>iris: Skip msaa16 on gen &lt; 9.</li>
+  <li>iris: Flush before hiz_exec.</li>
+  <li>iris: Pin HiZ buffers when rendering.</li>
+  <li>iris: Avoid leaking if we fail to allocate the aux buffer.</li>
+  <li>iris/clear: Pass on render_condition_enabled.</li>
+  <li>iris: Skip resolve if there's no context.</li>
+  <li>iris: Flag ALL_DIRTY_BINDINGS on aux state change.</li>
+  <li>iris: Add resolve on iris_flush_resource.</li>
+  <li>iris: Convert RGBX to RGBA always.</li>
+  <li>iris: Enable auxiliary buffer support again</li>
+  <li>iris: Enable HiZ for multisampled depth surfaces.</li>
+  <li>iris: Make intel_hiz_exec public.</li>
+  <li>iris: Allocate buffer space for the fast clear color.</li>
+  <li>iris: Use the clear depth when emitting 3DSTATE_CLEAR_PARAMS.</li>
+  <li>iris: Fast clear depth buffers.</li>
+  <li>iris: Add helper to convert fast clear color.</li>
+  <li>iris: Add function to update clear color in surface state.</li>
+  <li>iris: Bring back check for srgb and fast clear color.</li>
+  <li>intel/isl: Add isl_format_has_color_component() function.</li>
+  <li>intel/blorp: Make swizzle_color_value public.</li>
+  <li>iris: Implement fast clear color.</li>
+  <li>iris: Add iris_resolve_conditional_render().</li>
+  <li>iris: Stall on the CPU and resolve predication during fast clears.</li>
+  <li>iris: Track fast clear color.</li>
+  <li>iris: Let blorp update the clear color for us.</li>
+  <li>i965/blorp: Remove unused parameter from blorp_surf_for_miptree.</li>
+  <li>iris: Only update clear color for gens 8 and 9.</li>
+  <li>iris/gen8: Re-emit the SURFACE_STATE if the clear color changed.</li>
+  <li>iris: Manually apply fast clear color channel overrides.</li>
+  <li>iris: Do not allocate clear_color_bo for gen8.</li>
+  <li>iris: Add aux.sampler_usages.</li>
+  <li>iris: Enable fast clears on gen8.</li>
+  <li>intel/fs: Only propagate saturation if exec_size is the same.</li>
+  <li>intel/fs: Move the scalar-region conversion to the generator.</li>
+  <li>intel/fs: Add a lowering pass for linear interpolation.</li>
+  <li>intel/fs: Remove fs_generator::generate_linterp from gen11+.</li>
+  <li>intel/isl: Resize clear color buffer to full cacheline</li>
+  <li>intel/genxml: Update MI_ATOMIC genxml definition.</li>
+  <li>intel/blorp: Make blorp update the clear color in gen11.</li>
+  <li>iris: Do not advertise multisampled image load/store.</li>
+  <li>iris: Support sRGB fast clears even if the colorspaces differ.</li>
+  <li>iris: Use the linear version of the surface format during fast clears.</li>
+  <li>iris: Update the surface state clear color address when available.</li>
+  <li>iris: Enable fast clear colors on gen11.</li>
+</ul>
+
+<p>Ray Zhang (1):</p>
+<ul>
+  <li>glx: fix shared memory leak in X11</li>
+</ul>
+
+<p>Rhys Kidd (1):</p>
+<ul>
+  <li>iris: Fix assertion in iris_resource_from_handle() tiling usage</li>
+</ul>
+
+<p>Rhys Perry (28):</p>
+<ul>
+  <li>nvc0: add compute invocation counter</li>
+  <li>radv: bitcast 16-bit outputs to integers</li>
+  <li>radv: ensure export arguments are always float</li>
+  <li>ac/nir: implement 8-bit nir_load_const_instr</li>
+  <li>ac/nir: fix 64-bit nir_op_f2f16_rtz</li>
+  <li>ac/nir: make ac_build_clamp work on all bit sizes</li>
+  <li>ac/nir: make ac_build_isign work on all bit sizes</li>
+  <li>ac/nir: make ac_build_fdiv support 16-bit floats</li>
+  <li>ac/nir: implement half-float nir_op_frcp</li>
+  <li>ac/nir: implement half-float nir_op_frsq</li>
+  <li>ac/nir: implement half-float nir_op_ldexp</li>
+  <li>ac/nir: fix 16-bit ssbo stores</li>
+  <li>ac/nir: implement 8-bit push constant, ssbo and ubo loads</li>
+  <li>ac/nir: implement 8-bit ssbo stores</li>
+  <li>ac/nir: add 8-bit types to glsl_base_to_llvm_type</li>
+  <li>ac/nir: implement 8-bit conversions</li>
+  <li>radv: enable VK_KHR_8bit_storage</li>
+  <li>ac/nir: implement 16-bit pack/unpack opcodes</li>
+  <li>radv: lower 16-bit flrp</li>
+  <li>ac: add 16-bit support to ac_build_ddxy()</li>
+  <li>nir,ac/nir: fix cube_face_coord</li>
+  <li>gallium: add support for formatted image loads</li>
+  <li>mesa, glsl: add support for EXT_shader_image_load_formatted</li>
+  <li>st/mesa: add support for EXT_shader_image_load_formatted</li>
+  <li>vc4: fix build</li>
+  <li>ac,ac/nir: use a better sync scope for shared atomics</li>
+  <li>radv: fix set_output_usage_mask() with composite and 64-bit types</li>
+  <li>ac/nir: mark some texture intrinsics as convergent</li>
+</ul>
+
+<p>Rob Clark (135):</p>
+<ul>
+  <li>freedreno: fix release tarball</li>
+  <li>freedreno: more fixing release tarball</li>
+  <li>freedreno/a6xx: small compiler warning fix</li>
+  <li>freedreno/ir3: fix varying packing vs. tex sharp edge</li>
+  <li>freedreno/a6xx: move stream-out emit to helper</li>
+  <li>freedreno/a6xx: clean up some open-coded bits</li>
+  <li>freedreno/ir3: split out image helpers</li>
+  <li>freedreno/ir3: split out a4xx+ instructions</li>
+  <li>freedreno/ir3: fix ncomp for _store_image() src</li>
+  <li>freedreno/ir3: add image/ssbo &lt;-&gt; ibo/tex mapping</li>
+  <li>freedreno/ir3: add a6xx instruction encoding</li>
+  <li>freedreno/ir3: add a6xx+ SSBO/image support</li>
+  <li>freedreno/ir3: HIGH reg w/a for a6xx</li>
+  <li>freedreno/a6xx: border-color offset helper</li>
+  <li>freedreno/a6xx: image/ssbo state emit</li>
+  <li>freedreno/a6xx: compute support</li>
+  <li>freedreno/a6xx: cache flush harder</li>
+  <li>freedreno/a6xx: fix helper_invocation (sampler mask/id)</li>
+  <li>freedreno/ir3: handle quirky atomic dst for a6xx</li>
+  <li>freedreno/ir3: fix legalize for vecN inputs</li>
+  <li>freedreno/ir3: fix crash in compile fail case</li>
+  <li>freedreno/a6xx: 3d and cube image fixes</li>
+  <li>freedreno: fix crash w/ masked non-SSA dst</li>
+  <li>freedreno/ir3: rename put_dst()</li>
+  <li>freedreno/ir3/a6xx: fix load_ssbo barrier type.</li>
+  <li>freedreno/ir3: sync instr/disasm and add ldib encoding</li>
+  <li>freedreno/ir3/a6xx: use ldib for ssbo reads</li>
+  <li>freedreno/a6xx: samplerBuffer fixes</li>
+  <li>freedreno/a6xx: enable tiled images</li>
+  <li>freedreno: fix race condition</li>
+  <li>freedreno/ir3: don't hardcode wrmask</li>
+  <li>freedreno/a6xx: fix border-color offset</li>
+  <li>freedreno/a6xx: cube image fix</li>
+  <li>freedreno/a6xx: fix hangs with large shaders</li>
+  <li>freedreno/ir3: use nopN encoding when possible</li>
+  <li>freedreno/a6xx: fix ssbo alignment</li>
+  <li>freedreno/ir3/a6xx: fix non-ssa atomic dst</li>
+  <li>freedreno/a6xx: fix DRAW_IDX_INDIRECT max_indicies</li>
+  <li>freedreno/a6xx: vertex_id is not _zero_based</li>
+  <li>freedreno/ir3/a6xx: fix atomic shader outputs</li>
+  <li>freedreno/ir3: gsampler2DMSArray fixes</li>
+  <li>freedreno/ir3: include nopN in expanded instruction count</li>
+  <li>freedreno/ir3: add Sethi–Ullman numbering pass</li>
+  <li>freedreno/ir3: track register pressure in sched</li>
+  <li>freedreno: fix ir3_cmdline build</li>
+  <li>freedreno/a6xx: remove astc_srgb workaround</li>
+  <li>freedreno/a6xx: refactor fd6_tex_swiz()</li>
+  <li>freedreno/a6xx: fix border-color swizzles</li>
+  <li>freedreno/a6xx: perfcntrs</li>
+  <li>freedreno/ir3: fix ir3_cmdline harder</li>
+  <li>freedreno/ir3: turn on [iu]mul_high</li>
+  <li>freedreno/a6xx: more bcolor fixes</li>
+  <li>freedreno/ir3/cp: fix ldib bug</li>
+  <li>freedreno/ir3/a6xx: fix ssbo comp_swap</li>
+  <li>freedreno/ir3 better cat6 encoding detection</li>
+  <li>freedreno/ir3/ra: fix half-class conflicts</li>
+  <li>freedreno/ir3: fix sam.s2en decoding</li>
+  <li>freedreno/ir3: fix sam.s2en encoding</li>
+  <li>freedreno/ir3: fix regmask for merged regs</li>
+  <li>nir: move gls_type_get_{sampler,image}_count()</li>
+  <li>freedreno/ir3: find # of samplers from uniform vars</li>
+  <li>freedreno/ir3: enable indirect tex/samp (sam.s2en)</li>
+  <li>freedreno/ir3: optimize sam.s2en to sam</li>
+  <li>freedreno/ir3: additional lowering</li>
+  <li>freedreno/ir3: fix bit_count</li>
+  <li>freedreno/ir3: dynamic UBO indexing vs 64b pointers</li>
+  <li>freedreno/ir3: rename has_kill to no_earlyz</li>
+  <li>freedreno/ir3: disable early-z for SSBO/image writes</li>
+  <li>gallium: add PIPE_CAP_ESSL_FEATURE_LEVEL</li>
+  <li>mesa/st: use ESSL cap top enable gpu_shader5</li>
+  <li>freedreno: add ESSL cap</li>
+  <li>docs: update freedreno status</li>
+  <li>freedreno/a6xx: small cleanup</li>
+  <li>freedreno/ir3: sched fix</li>
+  <li>freedreno/ir3: reads/writes to unrelated arrays are not dependent</li>
+  <li>freedreno/ir3: align const size to vec4</li>
+  <li>nir: print var name for load_interpolated_input too</li>
+  <li>nir: add lower_all_io_to_elements</li>
+  <li>freedreno/ir3: re-indent comment</li>
+  <li>freedreno/ir3: rework varying packing</li>
+  <li>freedreno/ir3: add pass to move varying loads</li>
+  <li>freedreno/ir3: convert to "new style" frag inputs</li>
+  <li>gallium/docs: clarify set_sampler_views (v2)</li>
+  <li>iris: fix set_sampler_view</li>
+  <li>freedreno/ir3: fix const assert</li>
+  <li>freedreno/drm: update for robustness</li>
+  <li>freedreno: add robustness support</li>
+  <li>compiler: rename SYSTEM_VALUE_VARYING_COORD</li>
+  <li>freedreno/ir3: fix rgetpos decoding</li>
+  <li>freedreno/ir3: more emit-cat5 fixes</li>
+  <li>freedreno/ir3: cleanup instruction builder macros</li>
+  <li>freedreno: update generated headers</li>
+  <li>freedreno/ir3: lower load_barycentric_at_sample</li>
+  <li>freedreno/ir3: lower load_barycentric_at_offset</li>
+  <li>freedreno/ir3: remove bogus assert</li>
+  <li>freedreno/ir3: rename frag_vcoord -&gt; ij_pixel</li>
+  <li>freedreno/a6xx: add VALIDREG/CONDREG helper macros</li>
+  <li>freedreno/ir3: fix load_interpolated_input slot</li>
+  <li>freedreno: wire up core sample-shading support</li>
+  <li>freedreno/ir3: sample-shading support</li>
+  <li>freedreno/a6xx: sample-shading support</li>
+  <li>docs/features: update GL too</li>
+  <li>freedreno/ir3: switch fragcoord to sysval</li>
+  <li>freedreno/a6xx: small texture emit cleanup</li>
+  <li>freedreno/a6xx: pre-bake UBWC flags in texture-view</li>
+  <li>freedreno/ir3: fixes for half reg in/out</li>
+  <li>freedreno/ir3: fix shader variants vs UBO analysis</li>
+  <li>freedreno/ir3: fix lowered ubo region alignment</li>
+  <li>freedreno/ir3: add IR3_SHADER_DEBUG flag to disable ubo lowering</li>
+  <li>freedreno/ir3: add some ubo range related asserts</li>
+  <li>nir: rework tex instruction printing</li>
+  <li>nir: fix lower_wpos_ytransform in load_frag_coord case</li>
+  <li>nir: add pass to lower fb reads</li>
+  <li>freedreno/drm: expose GMEM_BASE address</li>
+  <li>freedreno/ir3: fb read support</li>
+  <li>freedreno/a6xx: KHR_blend_equation_advanced support</li>
+  <li>freedreno/a6xx: smaller hammer for fb barrier</li>
+  <li>docs: mark KHR_blend_equation_advanced done on a6xx</li>
+  <li>nir: fix nir tex print harder</li>
+  <li>freedreno/ir3: remove assert</li>
+  <li>freedreno/a6xx: OUT_RELOC vs OUT_RELOCW fixes</li>
+  <li>freedreno: update generated headers</li>
+  <li>freedreno/a6xx: UBWC fixes</li>
+  <li>freedreno/a6xx: UBWC support for images</li>
+  <li>freedreno: mark imported resources as valid</li>
+  <li>freedreno/a6xx: buffer resources cannot be compressed</li>
+  <li>freedreno: move UBWC color offset to fd_resource_offset()</li>
+  <li>freedreno: add ubwc_enabled helper</li>
+  <li>freedreno/a6xx: deduplicate a few lines</li>
+  <li>freedreno: remove unused forward struct declaration</li>
+  <li>freedreno/ir3: fix rasterflat/glxgears</li>
+  <li>freedreno/ir3: set more barrier bits</li>
+  <li>freedreno/a6xx: fix GPU crash on small render targets</li>
+  <li>freedreno/a6xx: fix issues with gallium HUD</li>
+  <li>freedreno/a6xx: fix hangs with newer sqe fw</li>
+</ul>
+
+<p>Rob Herring (2):</p>
+<ul>
+  <li>kmsro: Add lima renderonly support</li>
+  <li>kmsro: Add platform support for exynos and sun4i</li>
+</ul>
+
+<p>Rodrigo Vivi (1):</p>
+<ul>
+  <li>intel: Add more PCI Device IDs for Coffee Lake and Ice Lake.</li>
+</ul>
+
+<p>Roland Scheidegger (2):</p>
+<ul>
+  <li>gallivm: fix bogus assert in get_indirect_index</li>
+  <li>gallivm: fix saturated signed add / sub with llvm 9</li>
+</ul>
+
+<p>Romain Failliot (1):</p>
+<ul>
+  <li>docs: changed "Done" to "DONE" in features.txt</li>
+</ul>
+
+<p>Ross Burton (1):</p>
+<ul>
+  <li>Revert "meson: drop GLESv1 .so version back to 1.0.0"</li>
+</ul>
+
+<p>Ryan Houdek (1):</p>
+<ul>
+  <li>panfrost: Adds Bifrost shader disassembler utility</li>
+</ul>
+
+<p>Sagar Ghuge (10):</p>
+<ul>
+  <li>iris: Don't allocate a BO per query object</li>
+  <li>nir/glsl: Add another way of doing lower_imul64 for gen8+</li>
+  <li>glsl: [u/i]mulExtended optimization for GLSL</li>
+  <li>nir/algebraic: Optimize low 32 bit extraction</li>
+  <li>spirv: Allow [i/u]mulExtended to use new nir opcode</li>
+  <li>iris: Refactor code to share 3DSTATE_URB_* packet</li>
+  <li>iris: Track last VS URB entry size</li>
+  <li>iris: Flag fewer dirty bits in BLORP</li>
+  <li>intel/fs: Remove unused condition from opt_algebraic case</li>
+  <li>intel/compiler: Fix assertions in brw_alu3</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (4):</p>
+<ul>
+  <li>isl: remove the cache line size alignment requirement</li>
+  <li>isl: the display engine requires 64B alignment for linear surfaces</li>
+  <li>radv: don't overwrite results in VkGetQueryPoolResults() when queries are not available</li>
+  <li>radv: write availability status vkGetQueryPoolResults() when the data is not available</li>
+</ul>
+
+<p>Samuel Pitoiset (147):</p>
+<ul>
+  <li>radv/winsys: fix hash when adding internal buffers</li>
+  <li>radv: fix build</li>
+  <li>radv: bail out when no image transitions will be performed</li>
+  <li>radv: remove unused radv_render_pass_attachment::view_mask</li>
+  <li>radv: remove useless MAYBE_UNUSED in CmdBeginRenderPass()</li>
+  <li>radv: add radv_cmd_buffer_begin_subpass() helper</li>
+  <li>radv: move subpass image transitions to radv_cmd_buffer_begin_subpass()</li>
+  <li>radv: store the list of attachments for every subpass</li>
+  <li>radv: use the new attachments array when starting subpasses</li>
+  <li>radv: determine the last subpass id for every attachments</li>
+  <li>radv: handle final layouts at end of every subpass and render pass</li>
+  <li>radv: move some render pass things to radv_render_pass_compile()</li>
+  <li>radv: add radv_render_pass_add_subpass_dep() helper</li>
+  <li>radv: track if subpasses have color attachments</li>
+  <li>radv: handle subpass dependencies correctly</li>
+  <li>radv: accumulate all ingoing external dependencies to the first subpass</li>
+  <li>radv: execute external subpass barriers after ending subpasses</li>
+  <li>radv: drop useless checks when resolving subpass color attachments</li>
+  <li>radv: do not set preserveAttachments for internal render passes</li>
+  <li>radv: don't flush src stages when dstStageMask == BOTTOM_OF_PIPE</li>
+  <li>radv: fix compiler issues with GCC 9</li>
+  <li>radv: gather more info about push constants</li>
+  <li>radv: gather if shaders load dynamic offsets separately</li>
+  <li>radv: keep track of the number of remaining user SGPRs</li>
+  <li>radv: add support for push constants inlining when possible</li>
+  <li>radv: fix using LOAD_CONTEXT_REG with old GFX ME firmwares on GFX8</li>
+  <li>radv/winsys: fix BO list creation when RADV_DEBUG=allbos is set</li>
+  <li>radv: always export gl_SampleMask when the fragment shader uses it</li>
+  <li>ac: make use of ac_build_expand_to_vec4() in visit_image_store()</li>
+  <li>radv: use MAX_{VBS,VERTEX_ATTRIBS} when defining max vertex input limits</li>
+  <li>radv: store vertex attribute formats as pipeline keys</li>
+  <li>radv: reduce the number of loaded channels for vertex input fetches</li>
+  <li>radv: fix radv_fixup_vertex_input_fetches()</li>
+  <li>radv: fix invalid element type when filling vertex input default values</li>
+  <li>ac: add ac_build_llvm8_tbuffer_load() helper</li>
+  <li>ac: use new LLVM 8 intrinsic when loading 16-bit values</li>
+  <li>radv: write the alpha channel of MRT0 when alpha coverage is enabled</li>
+  <li>radv: remove unused variable in gather_push_constant_info()</li>
+  <li>radv: fix writing the alpha channel of MRT0 when alpha coverage is enabled</li>
+  <li>radv: fix clearing attachments in secondary command buffers</li>
+  <li>radv: fix out-of-bounds access when copying descriptors BO list</li>
+  <li>radv: don't copy buffer descriptors list for samplers</li>
+  <li>rav: use 32_AR instead of 32_ABGR when alpha coverage is required</li>
+  <li>radv: allocate enough space in cmdbuf when starting a subpass</li>
+  <li>radv: properly align the fence and EOP bug VA on GFX9</li>
+  <li>radv: enable lower_mul_2x32_64</li>
+  <li>Revert "radv: execute external subpass barriers after ending subpasses"</li>
+  <li>radv: fix pointSizeRange limits</li>
+  <li>radv: set the maximum number of IBs per submit to 192</li>
+  <li>ac: rework typed buffers loads for LLVM 7</li>
+  <li>radv: store more vertex attribute infos as pipeline keys</li>
+  <li>radv: use typed buffer loads for vertex input fetches</li>
+  <li>ac: add ac_build_{struct,raw}_tbuffer_load() helpers</li>
+  <li>ac: use the raw tbuffer version for 16-bit SSBO loads</li>
+  <li>radv: always initialize HTILE when the src layout is UNDEFINED</li>
+  <li>radv: always load 3 channels for formats that need to be shuffled</li>
+  <li>ac: use llvm.amdgcn.fract intrinsic for nir_op_ffract</li>
+  <li>radv: fix binding transform feedback buffers</li>
+  <li>ac: make use of ac_get_store_intr_attribs() where possible</li>
+  <li>ac/nir: set attrib flags for SSBO and image store operations</li>
+  <li>ac: add ac_build_buffer_store_format() helper</li>
+  <li>ac/nir: remove one useless check in visit_store_ssbo()</li>
+  <li>ac/nir: use new LLVM 8 intrinsics for SSBO atomic operations</li>
+  <li>ac/nir: use ac_build_buffer_load() for SSBO load operations</li>
+  <li>ac/nir: use ac_build_buffer_store_dword() for SSBO store operations</li>
+  <li>ac: use new LLVM 8 intrinsics in ac_build_buffer_load()</li>
+  <li>ac: add ac_build_{struct,raw}_tbuffer_store() helpers</li>
+  <li>ac: use new LLVM 8 intrinsic when storing 16-bit values</li>
+  <li>ac: use new LLVM 8 intrinsics in ac_build_buffer_store_dword()</li>
+  <li>ac: add various int8 definitions</li>
+  <li>ac: add ac_build_tbuffer_load_byte() helper</li>
+  <li>ac: add ac_build_tbuffer_store_byte() helper</li>
+  <li>radv: add missing initializations since VK_EXT_pipeline_creation_feedback</li>
+  <li>ac: add f16_0 and f16_1 constants</li>
+  <li>ac: add 16-bit support fo fsign</li>
+  <li>ac: add 16-bit support to fract</li>
+  <li>ac: fix 16-bit shifts</li>
+  <li>ac: fix incorrect argument type for tbuffer.{load,store} with LLVM 7</li>
+  <li>nir: use generic float types for frexp_exp and frexp_sig</li>
+  <li>spirv,nir: lower frexp_exp/frexp_sig inside a new NIR pass</li>
+  <li>nir: add nir_{load,store}_deref_with_access() helpers</li>
+  <li>spirv: propagate the access flag for store and load derefs</li>
+  <li>ac: use llvm.amdgcn.fmed3 intrinsic for nir_op_fmed3</li>
+  <li>ac: add ac_build_frexp_mant() helper and 16-bit/32-bit support</li>
+  <li>ac: add ac_build_frex_exp() helper ans 16-bit/32-bit support</li>
+  <li>radv: do not lower frexp_exp and frexp_sig</li>
+  <li>radv: enable VK_AMD_gpu_shader_int16</li>
+  <li>radv: skip updating depth/color metadata for conditional rendering</li>
+  <li>radv: do not always initialize HTILE in compressed state</li>
+  <li>ac: fix return type for llvm.amdgcn.frexp.exp.i32.64</li>
+  <li>ac/nir: fix nir_op_b2i16</li>
+  <li>ac: fix ac_build_bit_count() for 16-bit integer type</li>
+  <li>ac: fix ac_build_bitfield_reverse() for 16-bit integer type</li>
+  <li>ac: fix ac_find_lsb() for 16-bit integer type</li>
+  <li>ac: fix ac_build_umsb() for 16-bit integer type</li>
+  <li>ac/nir: add support for nir_op_b2i8</li>
+  <li>ac: add 8-bit support to ac_build_bit_count()</li>
+  <li>ac: add 8-bit support to ac_find_lsb()</li>
+  <li>ac: add 8-bit support to ac_build_umsb()</li>
+  <li>ac: add 8-bit and 64-bit support to ac_build_bitfield_reverse()</li>
+  <li>radv: partially enable VK_KHR_shader_float16_int8</li>
+  <li>nir: do not pack varying with different types</li>
+  <li>ac/nir: fix intrinsic names for atomic operations with LLVM 9+</li>
+  <li>radv: fix getting the vertex strides if the bindings aren't contiguous</li>
+  <li>ac/nir: fix nir_op_b2f16</li>
+  <li>radv: enable VK_AMD_gpu_shader_half_float</li>
+  <li>wsi: allow to override the present mode with MESA_VK_WSI_PRESENT_MODE</li>
+  <li>ac/nir: make use of ac_build_imax() where possible</li>
+  <li>ac/nir: make use of ac_build_imin() where possible</li>
+  <li>ac/nir: make use of ac_build_umin() where possible</li>
+  <li>ac: add ac_build_umax() and use it where possible</li>
+  <li>ac: add ac_build_ddxy_interp() helper</li>
+  <li>ac: add ac_build_load_helper_invocation() helper</li>
+  <li>ac/nir: remove useles LLVMGetUndef for nir_op_pack_64_2x32_split</li>
+  <li>ac/nir: remove useless integer cast in adjust_sample_index_using_fmask()</li>
+  <li>ac/nir: remove useless integer cast in visit_image_load()</li>
+  <li>ac/nir: remove some useless integer casts for ALU operations</li>
+  <li>spirv: add SpvCapabilityFloat16 support</li>
+  <li>radv: enable VK_KHR_shader_float16_int8</li>
+  <li>radv: set ACCESS_NON_READABLE on stores for copy/fill/clear meta shaders</li>
+  <li>radv: enable shaderInt8 on SI and CIK</li>
+  <li>radv: sort the shader capabilities alphabetically</li>
+  <li>ac/nir: use new LLVM 8 intrinsics for SSBO atomics except cmpswap</li>
+  <li>ac/nir: add 64-bit SSBO atomic operations support</li>
+  <li>radv: add VK_KHR_shader_atomic_int64 but disable it for now</li>
+  <li>ac: add support for more types with struct/raw LLVM intrinsics</li>
+  <li>ac: use struct/raw load intrinsics for 8-bit/16-bit int with LLVM 9+</li>
+  <li>ac: use struct/raw store intrinsics for 8-bit/16-bit int with LLVM 9+</li>
+  <li>ac/nir: only use the new raw/struct image atomic intrinsics with LLVM 9+</li>
+  <li>ac/nir: only use the new raw/struct SSBO atomic intrinsics with LLVM 9+</li>
+  <li>ac/nir: use the new raw/struct SSBO atomic intrisics for comp_swap</li>
+  <li>radv: add VK_NV_compute_shader_derivates support</li>
+  <li>radv: add missing VEGA20 chip in radv_get_device_name()</li>
+  <li>radv: do not need to force emit the TCS regs on Vega20</li>
+  <li>radv: fix color conversions for normalized uint/sint formats</li>
+  <li>radv: implement a workaround for VK_EXT_conditional_rendering</li>
+  <li>ac: tidy up ac_build_llvm8_tbuffer_{load,store}</li>
+  <li>radv: set WD_SWITCH_ON_EOP=1 when drawing primitives from a stream output buffer</li>
+  <li>radv: only need to force emit the TCS regs on Vega10 and Raven1</li>
+  <li>radv: fix radv_get_aspect_format() for D+S formats</li>
+  <li>radv: apply the indexing workaround for atomic buffer operations on GFX9</li>
+  <li>radv: fix setting the number of rectangles when it's dyanmic</li>
+  <li>radv: add a workaround for Monster Hunter World and LLVM 7&amp;8</li>
+  <li>radv: allocate more space in the CS when emitting events</li>
+  <li>radv: do not use gfx fast depth clears for layered depth/stencil images</li>
+  <li>radv: fix alpha-to-coverage when there is unused color attachments</li>
+  <li>radv: fix setting CB_SHADER_MASK for dual source blending</li>
+</ul>
+
+<p>Sergii Romantsov (4):</p>
+<ul>
+  <li>dri: meson: do not prefix user provided dri-drivers-path</li>
+  <li>d3d: meson: do not prefix user provided d3d-drivers-path</li>
+  <li>i965,iris/blorp: do not blit 0-sizes</li>
+  <li>glsl: Fix input/output structure matching across shader stages</li>
+</ul>
+
+<p>Sonny Jiang (1):</p>
+<ul>
+  <li>radeonsi: use compute for clear_render_target when possible</li>
+</ul>
+
+<p>Tapani Pälli (42):</p>
+<ul>
+  <li>nir: add option to use scaling factor when sampling planes YUV lowering</li>
+  <li>dri: add P010, P012, P016 for 10bit/12bit/16bit YUV420 formats</li>
+  <li>intel/compiler: add scale_factors to sampler_prog_key_data</li>
+  <li>i965: add P0x formats and propagate required scaling factors</li>
+  <li>drirc/i965: add option to disable 565 configs and visuals</li>
+  <li>mesa: return NULL if we exceed MaxColorAttachments in get_fb_attachment</li>
+  <li>anv: anv: refactor error handling in anv_shader_bin_write_to_blob()</li>
+  <li>iris: add Android build</li>
+  <li>nir: initialize value in copy_prop_vars_block</li>
+  <li>nir: use nir_variable_create instead of open-coding the logic</li>
+  <li>android: add liblog to libmesa_intel_common build</li>
+  <li>android: make libbacktrace optional on USE_LIBBACKTRACE</li>
+  <li>iris: add libmesa_iris_gen8 library to the build</li>
+  <li>util: fix a warning when building against clang7 headers</li>
+  <li>anv: retain the is_array state in create_plane_tex_instr_implicit</li>
+  <li>anv: toggle on support for VK_EXT_ycbcr_image_arrays</li>
+  <li>anv: use anv_gem_munmap in block pool cleanup</li>
+  <li>anv: call blob_finish when done with it</li>
+  <li>nir: free dead_ctx in case of no progress</li>
+  <li>anv: destroy descriptor sets when pool gets destroyed</li>
+  <li>anv: release memory allocated by bo_heap when descriptor pool is destroyed</li>
+  <li>anv: release memory allocated by glsl types during spirv_to_nir</li>
+  <li>anv: revert "anv: release memory allocated by glsl types during spirv_to_nir"</li>
+  <li>i965: remove scaling factors from P010, P012</li>
+  <li>isl: fix automake build when sse41 is not supported</li>
+  <li>android: Build fixes for OMR1</li>
+  <li>iris: initialize num_cbufs</li>
+  <li>iris: mark switch case fallthrough</li>
+  <li>anv/radv: release memory allocated by glsl types during spirv_to_nir</li>
+  <li>st/mesa: fix compilation warning on storage_flags_to_buffer_flags</li>
+  <li>st/mesa: fix warnings about implicit conversion on enumeration type</li>
+  <li>spirv: fix a compiler warning</li>
+  <li>st/nir: run st_nir_opts after 64bit ops lowering</li>
+  <li>iris: move variable to the scope where it is being used</li>
+  <li>iris: move iris_flush_resource so we can call it from get_handle</li>
+  <li>iris: handle aux properly in iris_resource_get_handle</li>
+  <li>egl: setup fds array correctly when exporting dmabuf</li>
+  <li>compiler/glsl: handle case where we have multiple users for types</li>
+  <li>android/iris: fix driinfo header filename</li>
+  <li>nir: use braces around subobject in initializer</li>
+  <li>glsl: use empty brace initializer</li>
+  <li>anv: expose VK_EXT_queue_family_foreign on Android</li>
+</ul>
+
+<p>Thomas Hellstrom (5):</p>
+<ul>
+  <li>winsys/svga: Add an environment variable to force host-backed operation</li>
+  <li>winsys/svga: Enable the transfer_from_buffer GPU command for vgpu10</li>
+  <li>svga: Avoid bouncing buffer data in malloced buffers</li>
+  <li>winsys/svga: Update the drm interface file</li>
+  <li>winsys/svga: Don't abort on EBUSY errors from execbuffer</li>
+</ul>
+
+<p>Timo Aaltonen (1):</p>
+<ul>
+  <li>util/os_misc: Add check for PIPE_OS_HURD</li>
+</ul>
+
+<p>Timothy Arceri (72):</p>
+<ul>
+  <li>st/glsl_to_nir: remove dead local variables</li>
+  <li>ac/radv/radeonsi: add ac_get_num_physical_sgprs() helper</li>
+  <li>radv: take LDS into account for compute shader occupancy stats</li>
+  <li>util: move BITFIELD macros to util/macros.h</li>
+  <li>st/glsl_to_nir: call nir_remove_dead_variables() after lowing local indirects</li>
+  <li>nir: add support for marking used patches when packing varyings</li>
+  <li>nir: add glsl_type_is_32bit() helper</li>
+  <li>nir: add is_packing_supported_for_type() helper</li>
+  <li>nir: rewrite varying component packing</li>
+  <li>nir: prehash instruction in nir_instr_set_add_or_rewrite()</li>
+  <li>nir: turn ssa check into an assert</li>
+  <li>nir: turn an ssa check in nir_search into an assert</li>
+  <li>nir: remove simple dead if detection from nir_opt_dead_cf()</li>
+  <li>radeonsi/nir: set input_usage_mask properly</li>
+  <li>radeonsi/nir: set colors_read properly</li>
+  <li>radeonsi/nir: set shader_buffers_declared properly</li>
+  <li>st/nir: use NIR for asm programs</li>
+  <li>nir: remove non-ssa support from nir_copy_prop()</li>
+  <li>nir: clone instruction set rather than removing individual entries</li>
+  <li>nir: allow nir_lower_phis_to_scalar() on more src types</li>
+  <li>radeonsi: fix query buffer allocation</li>
+  <li>glsl: fix shader cache for packed param list</li>
+  <li>radeonsi/nir: move si_lower_nir() call into compiler thread</li>
+  <li>glsl: rename is_record() -&gt; is_struct()</li>
+  <li>glsl: rename get_record_instance() -&gt; get_struct_instance()</li>
+  <li>glsl: rename record_location_offset() -&gt; struct_location_offset()</li>
+  <li>glsl: rename record_types -&gt; struct_types</li>
+  <li>nir: rename glsl_type_is_struct() -&gt; glsl_type_is_struct_or_ifc()</li>
+  <li>glsl/freedreno/panfrost: pass gl_context to the standalone compiler</li>
+  <li>glsl: use NIR function inlining for drivers that use glsl_to_nir()</li>
+  <li>i965: stop calling nir_lower_returns()</li>
+  <li>radeonsi/nir: stop calling nir_lower_returns()</li>
+  <li>st/glsl: start spilling out common st glsl conversion code</li>
+  <li>anv: add support for dumping shader info via VK_EXT_debug_report</li>
+  <li>nir: add guess trip count support to loop analysis</li>
+  <li>nir: add new partially_unrolled bool to nir_loop</li>
+  <li>nir: add partial loop unrolling support</li>
+  <li>nir: calculate trip count for more loops</li>
+  <li>nir: unroll some loops with a variable limit</li>
+  <li>nir: simplify the loop analysis trip count code a little</li>
+  <li>nir: add helper to return inversion op of a comparison</li>
+  <li>nir: add get_induction_and_limit_vars() helper to loop analysis</li>
+  <li>nir: pass nir_op to calculate_iterations()</li>
+  <li>nir: find induction/limit vars in iand instructions</li>
+  <li>st/glsl_to_nir: fix incorrect arrary access</li>
+  <li>radeonsi/nir: call some more var optimisation passes</li>
+  <li>ac/nir_to_llvm: add assert to emit_bcsel()</li>
+  <li>nir: only override previous alu during loop analysis if supported</li>
+  <li>nir: fix opt_if_loop_last_continue()</li>
+  <li>nir: add support for user defined loop control</li>
+  <li>spirv: make use of the loop control support in nir</li>
+  <li>nir: add support for user defined select control</li>
+  <li>spirv: make use of the select control support in nir</li>
+  <li>Revert "ac/nir: use new LLVM 8 intrinsics for SSBO atomic operations"</li>
+  <li>nir: propagate known constant values into the if-then branch</li>
+  <li>Revert "nir: propagate known constant values into the if-then branch"</li>
+  <li>nir/radv: remove restrictions on opt_if_loop_last_continue()</li>
+  <li>nir: initialise some variables in opt_if_loop_last_continue()</li>
+  <li>nir/i965/freedreno/vc4: add a bindless bool to type size functions</li>
+  <li>ac/nir_to_llvm: make get_sampler_desc() more generic and pass it the image intrinsic</li>
+  <li>ac/nir_to_llvm: add image bindless support</li>
+  <li>nir: fix packing components with arrays</li>
+  <li>radeonsi/nir: fix scanning of bindless images</li>
+  <li>st/mesa/radeonsi: fix race between destruction of types and shader compilation</li>
+  <li>nir: fix nir_remove_unused_varyings()</li>
+  <li>radeonsi/nir: create si_nir_opts() helper</li>
+  <li>radeonsi/nir: call radeonsi nir opts before the scan pass</li>
+  <li>util/drirc: add workarounds for bugs in Doom 3: BFG</li>
+  <li>radeonsi: add config entry for Counter-Strike Global Offensive</li>
+  <li>Revert "glx: Fix synthetic error generation in __glXSendError"</li>
+  <li>Revert "st/mesa: expose 0 shader binary formats for compat profiles for Qt"</li>
+  <li>st/glsl: make sure to propagate initialisers to driver storage</li>
+</ul>
+
+<p>Timur Kristóf (19):</p>
+<ul>
+  <li>radeonsi/nir: Use uniform location when calculating const_file_max.</li>
+  <li>iris: implement clearing render target and depth stencil</li>
+  <li>nir: Add ability for shaders to use window space coordinates.</li>
+  <li>tgsi_to_nir: Fix the TGSI ARR translation by converting the result to int.</li>
+  <li>tgsi_to_nir: Fix TGSI LIT translation by using flt.</li>
+  <li>tgsi_to_nir: Make the TGSI IF translation code more readable.</li>
+  <li>tgsi_to_nir: Split to smaller functions.</li>
+  <li>nir: Move nir_lower_uniforms_to_ubo to compiler/nir.</li>
+  <li>nir: Add multiplier argument to nir_lower_uniforms_to_ubo.</li>
+  <li>freedreno: Plumb pipe_screen through to irX_tgsi_to_nir.</li>
+  <li>tgsi_to_nir: Produce optimized NIR for a given pipe_screen.</li>
+  <li>tgsi_to_nir: Restructure system value loads.</li>
+  <li>tgsi_to_nir: Extract ttn_emulate_tgsi_front_face into its own function.</li>
+  <li>tgsi_to_nir: Support FACE and POSITION properly.</li>
+  <li>tgsi_to_nir: Improve interpolation modes.</li>
+  <li>tgsi_to_nir: Set correct location for uniforms.</li>
+  <li>radeonsi/nir: Only set window_space_position for vertex shaders.</li>
+  <li>iris: Face should be a system value.</li>
+  <li>gallium: fix autotools build of pipe_msm.la</li>
+</ul>
+
+<p>Tobias Klausmann (1):</p>
+<ul>
+  <li>vulkan/util: meson build - add wayland client include</li>
+</ul>
+
+<p>Tomasz Figa (1):</p>
+<ul>
+  <li>llvmpipe: Always return some fence in flush (v2)</li>
+</ul>
+
+<p>Tomeu Vizoso (19):</p>
+<ul>
+  <li>panfrost: Add gem_handle to panfrost_memory and panfrost_bo</li>
+  <li>panfrost: Add backend targeting the DRM driver</li>
+  <li>panfrost/midgard: Add support for MIDGARD_MESA_DEBUG</li>
+  <li>panfrost: Add support for PAN_MESA_DEBUG</li>
+  <li>panfrost: Set bo-&gt;size[0] in the DRM backend</li>
+  <li>panfrost: Set bo-&gt;gem_handle when creating a linear BO</li>
+  <li>panfrost: Adapt to uapi changes</li>
+  <li>panfrost: Fix sscanf format options</li>
+  <li>panfrost: Set the GEM handle for AFBC buffers</li>
+  <li>panfrost: Also tell the kernel about the checksum_slab</li>
+  <li>panfrost: Pass the context BOs to the kernel so they aren't unmapped while in use</li>
+  <li>panfrost: Wait for last job to finish in force_flush_fragment</li>
+  <li>panfrost: split asserts in pandecode</li>
+  <li>panfrost: Guard against reading past end of buffer</li>
+  <li>panfrost/ci: Initial commit</li>
+  <li>panfrost/midgard: Skip register allocation if there's no work to do</li>
+  <li>panfrost/midgard: Skip liveness analysis for instructions without dest</li>
+  <li>panfrost: Fix two uninitialized accesses in compiler</li>
+  <li>panfrost: Only take the fast paths on buffers aligned to block size</li>
+</ul>
+
+<p>Toni Lönnberg (8):</p>
+<ul>
+  <li>intel/genxml: Only handle instructions meant for render engine when generating headers</li>
+  <li>intel/genxml: Media instructions and structures for gen6</li>
+  <li>intel/genxml: Media instructions and structures for gen7</li>
+  <li>intel/genxml: Media instructions and structures for gen7.5</li>
+  <li>intel/genxml: Media instructions and structures for gen8</li>
+  <li>intel/genxml: Media instructions and structures for gen9</li>
+  <li>intel/genxml: Media instructions and structures for gen10</li>
+  <li>intel/genxml: Media instructions and structures for gen11</li>
+</ul>
+
+<p>Topi Pohjolainen (2):</p>
+<ul>
+  <li>intel/compiler/icl: Use tcs barrier id bits 24:30 instead of 24:27</li>
+  <li>intel/compiler/fs/icl: Use dummy masked urb write for tess eval</li>
+</ul>
+
+<p>Vasily Khoruzhick (2):</p>
+<ul>
+  <li>lima: use individual tile heap for each GP job.</li>
+  <li>lima: add support for depth/stencil fbo attachments and textures</li>
+</ul>
+
+<p>Vinson Lee (5):</p>
+<ul>
+  <li>gallium/auxiliary/vl: Fix duplicate symbol build errors.</li>
+  <li>nir: Fix anonymous union initialization with older GCC.</li>
+  <li>swr: Fix build with llvm-9.0.</li>
+  <li>gallium: Fix autotools build with libxatracker.la.</li>
+  <li>freedreno: Fix GCC build error.</li>
+</ul>
+
+<p>Vivek Kasireddy (1):</p>
+<ul>
+  <li>drm-uapi: Update headers from drm-next</li>
+</ul>
+
+<p>Xavier Bouchoux (1):</p>
+<ul>
+  <li>nir/spirv: Fix assert when unsampled OpTypeImage has unknown 'Depth'</li>
+</ul>
+
+<p>Yevhenii Kolesnikov (1):</p>
+<ul>
+  <li>i965: Fix allow_higher_compat_version workaround limited by OpenGL 3.0</li>
+</ul>
+
+<p>coypu (1):</p>
+<ul>
+  <li>gbm: don't return void</li>
+</ul>
+
+<p>davidbepo (1):</p>
+<ul>
+  <li>drirc: add Waterfox to adaptive-sync blacklist</li>
+</ul>
+
+<p>grmat (1):</p>
+<ul>
+  <li>drirc: add Spectacle, Falkon to a-sync blacklist</li>
+</ul>
+
+<p>pal1000 (1):</p>
+<ul>
+  <li>scons: Compatibility with Scons development version string</li>
+</ul>
+
+<p>suresh guttula (3):</p>
+<ul>
+  <li>vl: Add cropping flags for H264</li>
+  <li>radeon/vce:Add support for frame_cropping_flag of VAEncSequenceParameterBufferH264</li>
+  <li>st/va/enc: Add support for frame_cropping_flag of VAEncSequenceParameterBufferH264</li>
+</ul>
+
+
 </div>
 </body>
 </html>
diff --git a/docs/relnotes/19.1.1.html b/docs/relnotes/19.1.1.html
new file mode 100644
index 00000000000..9d7426067ac
--- /dev/null
+++ b/docs/relnotes/19.1.1.html
@@ -0,0 +1,154 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 19.1.1 Release Notes / June 25, 2019</h1>
+
+<p>
+Mesa 19.1.1 is a bug fix release which fixes bugs found since the 19.1.0 release.
+</p>
+<p>
+Mesa 19.1.1 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+<h2>SHA256 checksums</h2>
+<pre>
+72114b16b4a84373b2acda060fe2bb1d45ea2598efab3ef2d44bdeda74f15581  mesa-19.1.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110709">Bug 110709</a> - g_glxglvnddispatchfuncs.c and glxglvnd.c fail to build with clang 8.0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110901">Bug 110901</a> - mesa-19.1.0/src/util/futex.h:82: use of out of scope variable ?</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110902">Bug 110902</a> - mesa-19.1.0/src/broadcom/compiler/vir_opt_redundant_flags.c:104]: (style) Same expression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110921">Bug 110921</a> - virgl on OpenGL 3.3 host regressed to OpenGL 2.1</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alejandro Piñeiro (1):</p>
+<ul>
+  <li>v3d: fix checking twice auf flag</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (5):</p>
+<ul>
+  <li>radv: Skip transitions coming from external queue.</li>
+  <li>radv: Decompress DCC when the image format is not allowed for buffers.</li>
+  <li>radv: Fix vulkan build in meson.</li>
+  <li>anv: Fix vulkan build in meson.</li>
+  <li>meson: Allow building radeonsi with just the android platform.</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>nouveau: fix frees in unsupported IR error paths.</li>
+</ul>
+
+<p>Eduardo Lima Mitev (1):</p>
+<ul>
+  <li>freedreno/a5xx: Fix indirect draw max_indices calculation</li>
+</ul>
+
+<p>Eric Engestrom (3):</p>
+<ul>
+  <li>util/futex: fix dangling pointer use</li>
+  <li>glx: fix glvnd pointer types</li>
+  <li>util/os_file: resize buffer to what was actually needed</li>
+</ul>
+
+<p>Gert Wollny (1):</p>
+<ul>
+  <li>virgl: Assume sRGB write control for older guest kernels or virglrenderer hosts</li>
+</ul>
+
+<p>Haihao Xiang (1):</p>
+<ul>
+  <li>i965: support UYVY for external import only</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>anv: Set STATE_BASE_ADDRESS upper bounds on gen7</li>
+</ul>
+
+<p>Juan A. Suarez Romero (2):</p>
+<ul>
+  <li>docs: Add SHA256 sums for 19.1.0</li>
+  <li>Update version to 19.1.1</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>glsl: Fix out of bounds read in shader_cache_read_program_metadata</li>
+  <li>iris: Fix iris_flush_and_dirty_history to actually dirty history.</li>
+</ul>
+
+<p>Kevin Strasser (2):</p>
+<ul>
+  <li>gallium/winsys/kms: Fix dumb buffer bpp</li>
+  <li>st/mesa: Add rgbx handling for fp formats</li>
+</ul>
+
+<p>Lionel Landwerlin (2):</p>
+<ul>
+  <li>anv: do not parse genxml data without INTEL_DEBUG=bat</li>
+  <li>intel/dump: fix segfault when the app hasn't accessed the device</li>
+</ul>
+
+<p>Mathias Fröhlich (1):</p>
+<ul>
+  <li>egl: Don't add hardware device if there is no render node v2.</li>
+</ul>
+
+<p>Richard Thier (1):</p>
+<ul>
+  <li>r300g: restore performance after RADEON_FLAG_NO_INTERPROCESS_SHARING was added</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno/a6xx: un-swap X24S8_UINT</li>
+</ul>
+
+<p>Samuel Pitoiset (4):</p>
+<ul>
+  <li>radv: fix occlusion queries on VegaM</li>
+  <li>radv: fix VK_EXT_memory_budget if one heap isn't available</li>
+  <li>radv: fix FMASK expand with SRGB formats</li>
+  <li>radv: disable viewport clamping even if FS doesn't write Z</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/19.1.2.html b/docs/relnotes/19.1.2.html
new file mode 100644
index 00000000000..b4ecf1d0e06
--- /dev/null
+++ b/docs/relnotes/19.1.2.html
@@ -0,0 +1,194 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 19.1.2 Release Notes / July 9, 2019</h1>
+
+<p>
+Mesa 19.1.2 is a bug fix release which fixes bugs found since the 19.1.1 release.
+</p>
+<p>
+Mesa 19.1.2 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+<h2>SHA256 checksums</h2>
+<pre>
+813a144ea8ebefb7b48b6733f3f603855b0f61268d86cc1cc26a6b4be908fcfd  mesa-19.1.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110702">Bug 110702</a> - segfault in radeonsi HEVC hardware decoding with yuv420p10le</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110783">Bug 110783</a> - Mesa 19.1 rc crashing MPV with VAAPI</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110944">Bug 110944</a> - [Bisected] Blender 2.8 crashes when closing certain windows</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110953">Bug 110953</a> - Adding a redundant single-iteration do-while loop causes different image to be rendered</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110999">Bug 110999</a> - 19.1.0: assert in vkAllocateDescriptorSets using immutable samplers on Ivy Bridge</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111019">Bug 111019</a> - radv doesn't handle variable descriptor count properly</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Anuj Phogat (3):</p>
+<ul>
+  <li>Revert "i965/icl: Add WA_2204188704 to disable pixel shader panic dispatch"</li>
+  <li>Revert "anv/icl: Add WA_2204188704 to disable pixel shader panic dispatch"</li>
+  <li>Revert "iris/icl: Add WA_2204188704 to disable pixel shader panic dispatch"</li>
+</ul>
+
+<p>Arfrever Frehtes Taifersar Arahesis (1):</p>
+<ul>
+  <li>meson: Improve detection of Python when using Meson &gt;=0.50.</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (2):</p>
+<ul>
+  <li>radv: Only allocate supplied number of descriptors when variable.</li>
+  <li>radv: Fix interactions between variable descriptor count and inline uniform blocks.</li>
+</ul>
+
+<p>Caio Marcelo de Oliveira Filho (1):</p>
+<ul>
+  <li>spirv: Ignore ArrayStride in OpPtrAccessChain for Workgroup</li>
+</ul>
+
+<p>Dylan Baker (2):</p>
+<ul>
+  <li>meson: Add support for using cmake for finding LLVM</li>
+  <li>Revert "meson: Add support for using cmake for finding LLVM"</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>freedreno: Fix UBO load range detection on booleans.</li>
+  <li>freedreno: Fix up end range of unaligned UBO loads.</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>meson: bump required libdrm version to 2.4.81</li>
+</ul>
+
+<p>Gert Wollny (2):</p>
+<ul>
+  <li>gallium: Add CAP for opcode DIV</li>
+  <li>vl: Use CS composite shader only if TEX_LZ and DIV are supported</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>glsl: Don't increase the iteration count when there are no terminators</li>
+</ul>
+
+<p>James Clarke (1):</p>
+<ul>
+  <li>meson: GNU/kFreeBSD has DRM/KMS and requires -D_GNU_SOURCE</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>anv/descriptor_set: Only write texture swizzles if we have an image view</li>
+  <li>iris: Use a uint16_t for key sizes</li>
+</ul>
+
+<p>Jory Pratt (2):</p>
+<ul>
+  <li>util: Heap-allocate 256K zlib buffer</li>
+  <li>meson: Search for execinfo.h</li>
+</ul>
+
+<p>Juan A. Suarez Romero (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 19.1.1</li>
+  <li>intel: fix wrong format usage</li>
+  <li>Update version to 19.1.2</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>iris: Enable PIPE_CAP_SURFACE_REINTERPRET_BLOCKS</li>
+  <li>gallium: Make util_copy_image_view handle shader_access</li>
+</ul>
+
+<p>Lionel Landwerlin (2):</p>
+<ul>
+  <li>intel/compiler: fix derivative on y axis implementation</li>
+  <li>intel/compiler: don't use byte operands for src1 on ICL</li>
+</ul>
+
+<p>Nanley Chery (2):</p>
+<ul>
+  <li>intel: Add and use helpers for level0 extent</li>
+  <li>isl: Don't align phys_level0_sa by block dimension</li>
+</ul>
+
+<p>Nataraj Deshpande (1):</p>
+<ul>
+  <li>anv: Add HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED in vk_format</li>
+</ul>
+
+<p>Pierre-Eric Pelloux-Prayer (2):</p>
+<ul>
+  <li>mesa: delete framebuffer texture attachment sampler views</li>
+  <li>radeon/uvd: fix calc_ctx_size_h265_main10</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno/a5xx: fix batch leak in fd5 blitter path</li>
+</ul>
+
+<p>Sagar Ghuge (1):</p>
+<ul>
+  <li>glsl: Fix round64 conversion function</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: only enable VK_AMD_gpu_shader_{half_float,int16} on GFX9+</li>
+</ul>
+
+<p>Sergii Romantsov (1):</p>
+<ul>
+  <li>i965: leaking of upload-BO with push constants</li>
+</ul>
+
+<p>Ville Syrjälä (1):</p>
+<ul>
+  <li>anv/cmd_buffer: Reuse gen8 Cmd{Set, Reset}Event on gen7</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/19.1.3.html b/docs/relnotes/19.1.3.html
new file mode 100644
index 00000000000..abf0a8949c6
--- /dev/null
+++ b/docs/relnotes/19.1.3.html
@@ -0,0 +1,191 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 19.1.3 Release Notes / July 23, 2019</h1>
+
+<p>
+Mesa 19.1.3 is a bug fix release which fixes bugs found since the 19.1.2 release.
+</p>
+<p>
+Mesa 19.1.3 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+<h2>SHA256 checksums</h2>
+<pre>
+845460b2225d15c15d4a9743dec798ff0b7396b533011d43e774e67f7825b7e0  mesa-19.1.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109203">Bug 109203</a> - [cfl dxvk] GPU Crash Launching Monopoly Plus (Iris Plus 655 / Wine + DXVK)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109524">Bug 109524</a> - &quot;Invalid glsl version in shading_language_version()&quot; when trying to run directX games using wine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110309">Bug 110309</a> - [icl][bisected] regression on piglit arb_gpu_shader_int 64.execution.fs-ishl-then-* tests</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110663">Bug 110663</a> - threads_posix.h:96: undefined reference to `pthread_once'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110955">Bug 110955</a> - Mesa 18.2.8 implementation error: Invalid GLSL version in shading_language_version()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111010">Bug 111010</a> - Cemu Shader Cache Corruption Displaying Solid Color After commit 11e16ca7ce0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111071">Bug 111071</a> - SPIR-V shader processing fails with message about &quot;extra dangling SSA sources&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111075">Bug 111075</a> - Processing of SPIR-V shader causes device hang, sometimes leading to system reboot</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111097">Bug 111097</a> - Can not detect VK_ERROR_OUT_OF_DATE_KHR or VK_SUBOPTIMAL_KHR when window resizing</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Bas Nieuwenhuizen (3):</p>
+<ul>
+  <li>radv: Handle cmask being disallowed by addrlib.</li>
+  <li>anv: Add android dependencies on android.</li>
+  <li>radv: Only save the descriptor set if we have one.</li>
+</ul>
+
+<p>Caio Marcelo de Oliveira Filho (2):</p>
+<ul>
+  <li>anv: Fix pool allocator when first alloc needs to grow</li>
+  <li>spirv: Fix stride calculation when lowering Workgroup to offsets</li>
+</ul>
+
+<p>Chia-I Wu (2):</p>
+<ul>
+  <li>anv: fix VkExternalBufferProperties for unsupported handles</li>
+  <li>anv: fix VkExternalBufferProperties for host allocation</li>
+</ul>
+
+<p>Connor Abbott (1):</p>
+<ul>
+  <li>nir: Add a helper to determine if an intrinsic can be reordered</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>radv: fix crash in shader tracing.</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>freedreno: Fix assertion failures in context setup in shader-db mode.</li>
+</ul>
+
+<p>Gert Wollny (1):</p>
+<ul>
+  <li>softpipe: Remove unused static function</li>
+</ul>
+
+<p>Ian Romanick (4):</p>
+<ul>
+  <li>intel/vec4: Reswizzle VF immediates too</li>
+  <li>nir: Add unit tests for nir_opt_comparison_pre</li>
+  <li>nir: Use nir_src_bit_size instead of alu1-&gt;dest.dest.ssa.bit_size</li>
+  <li>mesa: Set minimum possible GLSL version</li>
+</ul>
+
+<p>Jason Ekstrand (13):</p>
+<ul>
+  <li>nir/instr_set: Expose nir_instrs_equal()</li>
+  <li>nir/loop_analyze: Fix phi-of-identical-alu detection</li>
+  <li>nir: Add more helpers for working with const values</li>
+  <li>nir/loop_analyze: Handle bit sizes correctly in calculate_iterations</li>
+  <li>nir/loop_analyze: Bail if we encounter swizzles</li>
+  <li>anv: Set Stateless Data Port Access MOCS</li>
+  <li>nir/opt_if: Clean up single-src phis in opt_if_loop_terminator</li>
+  <li>nir,intel: Add support for lowering 64-bit nir_opt_extract_*</li>
+  <li>anv: Account for dynamic stencil write disables in the PMA fix</li>
+  <li>nir/regs_to_ssa: Handle regs in phi sources properly</li>
+  <li>nir/loop_analyze: Refactor detection of limit vars</li>
+  <li>nir: Add some helpers for chasing SSA values properly</li>
+  <li>nir/loop_analyze: Properly handle swizzles in loop conditions</li>
+</ul>
+
+<p>Juan A. Suarez Romero (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 19.1.2</li>
+  <li>Update version to 19.1.3</li>
+</ul>
+
+<p>Lepton Wu (1):</p>
+<ul>
+  <li>virgl: Set meta data for textures from handle.</li>
+</ul>
+
+<p>Lionel Landwerlin (6):</p>
+<ul>
+  <li>vulkan/overlay: fix command buffer stats</li>
+  <li>vulkan/overlay: fix crash on freeing NULL command buffer</li>
+  <li>anv: fix crash in vkCmdClearAttachments with unused attachment</li>
+  <li>vulkan/wsi: update swapchain status on vkQueuePresent</li>
+  <li>anv: report timestampComputeAndGraphics true</li>
+  <li>anv: fix format mapping for depth/stencil formats</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi: don't set READ_ONLY for const_uploader to fix bindless texture hangs</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (1):</p>
+<ul>
+  <li>anv: fix alphaToCoverage when there is no color attachment</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: fix VGT_GS_MODE if VS uses the primitive ID</li>
+</ul>
+
+<p>Sergii Romantsov (1):</p>
+<ul>
+  <li>meta: memory leak of CopyPixels usage</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>mesa: save/restore SSO flag when using ARB_get_program_binary</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>meson: Add dep_thread dependency.</li>
+</ul>
+
+<p>Yevhenii Kolesnikov (1):</p>
+<ul>
+  <li>meta: leaking of BO with DrawPixels</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/19.1.4.html b/docs/relnotes/19.1.4.html
new file mode 100644
index 00000000000..690b49e7ee0
--- /dev/null
+++ b/docs/relnotes/19.1.4.html
@@ -0,0 +1,227 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 19.1.4 Release Notes / August 7, 2019</h1>
+
+<p>
+Mesa 19.1.4 is a bug fix release which fixes bugs found since the 19.1.3 release.
+</p>
+<p>
+Mesa 19.1.4 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+<h2>SHA256 checksums</h2>
+<pre>
+a6d268a7d9edcfd92b6da80f2e34e6e0a7baaa442efbeba2fc66c404943c6bfb  mesa-19.1.4.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109203">Bug 109203</a> - [cfl dxvk] GPU Crash Launching Monopoly Plus (Iris Plus 655 / Wine + DXVK)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109524">Bug 109524</a> - &quot;Invalid glsl version in shading_language_version()&quot; when trying to run directX games using wine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110309">Bug 110309</a> - [icl][bisected] regression on piglit arb_gpu_shader_int 64.execution.fs-ishl-then-* tests</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110663">Bug 110663</a> - threads_posix.h:96: undefined reference to `pthread_once'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110955">Bug 110955</a> - Mesa 18.2.8 implementation error: Invalid GLSL version in shading_language_version()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111010">Bug 111010</a> - Cemu Shader Cache Corruption Displaying Solid Color After commit 11e16ca7ce0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111071">Bug 111071</a> - SPIR-V shader processing fails with message about &quot;extra dangling SSA sources&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111075">Bug 111075</a> - Processing of SPIR-V shader causes device hang, sometimes leading to system reboot</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111097">Bug 111097</a> - Can not detect VK_ERROR_OUT_OF_DATE_KHR or VK_SUBOPTIMAL_KHR when window resizing</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Rodriguez (1):</p>
+<ul>
+  <li>radv: fix queries with WAIT_BIT returning VK_NOT_READY</li>
+</ul>
+
+<p>Andrii Simiklit (2):</p>
+<ul>
+  <li>intel/compiler: don't use a keyword struct for a class fs_reg</li>
+  <li>meson: add a warning for meson &lt; 0.46.0</li>
+</ul>
+
+<p>Arcady Goldmints-Orlov (1):</p>
+<ul>
+  <li>anv: report HOST_ALLOCATION as supported for images</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (3):</p>
+<ul>
+  <li>radv: Set correct metadata size for GFX9+.</li>
+  <li>radv: Take variable descriptor counts into account for buffer entries.</li>
+  <li>radv: Fix descriptor set allocation failure.</li>
+</ul>
+
+<p>Boyuan Zhang (4):</p>
+<ul>
+  <li>radeon/uvd: fix poc for hevc encode</li>
+  <li>radeon/vcn: fix poc for hevc encode</li>
+  <li>radeon/uvd: enable rate control for hevc encoding</li>
+  <li>radeon/vcn: enable rate control for hevc encoding</li>
+</ul>
+
+<p>Caio Marcelo de Oliveira Filho (1):</p>
+<ul>
+  <li>anv: Remove special allocation for anv_push_constants</li>
+</ul>
+
+<p>Connor Abbott (1):</p>
+<ul>
+  <li>nir: Allow qualifiers on copy_deref and image instructions</li>
+</ul>
+
+<p>Daniel Schürmann (1):</p>
+<ul>
+  <li>spirv: Fix order of barriers in SpvOpControlBarrier</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>st/nir: fix arb fragment stage conversion</li>
+</ul>
+
+<p>Dylan Baker (1):</p>
+<ul>
+  <li>meson: allow building all glx without any drivers</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>egl/drm: ensure the backing gbm is set before using it</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>freedreno: Fix data races with allocating/freeing struct ir3.</li>
+</ul>
+
+<p>Eric Engestrom (5):</p>
+<ul>
+  <li>nir: don't return void</li>
+  <li>util: fix no-op macro (bad number of arguments)</li>
+  <li>gallium+mesa: fix tgsi_semantic array type</li>
+  <li>scons+meson: suppress spammy build warning on MacOS</li>
+  <li>nir: remove explicit nir_intrinsic_index_flag values</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>intel/ir: Fix CFG corruption in opt_predicated_break().</li>
+</ul>
+
+<p>Ilia Mirkin (4):</p>
+<ul>
+  <li>gallium/vl: fix compute tgsi shaders to not process undefined components</li>
+  <li>nv50,nvc0: update sampler/view bind functions to accept NULL array</li>
+  <li>nvc0: allow a non-user buffer to be bound at position 0</li>
+  <li>nv50/ir: handle insn not being there for definition of CVT arg</li>
+</ul>
+
+<p>Jason Ekstrand (6):</p>
+<ul>
+  <li>intel/fs: Stop stack allocating large arrays</li>
+  <li>anv: Disable transform feedback on gen7</li>
+  <li>isl/formats: R8G8B8_UNORM_SRGB isn't supported on HSW</li>
+  <li>anv: Don't claim support for 24 and 48-bit formats on IVB</li>
+  <li>intel/fs: Use ALIGN16 instructions for all derivatives on gen &lt;= 7</li>
+  <li>intel/fs: Implement quad_swap_horizontal with a swizzle on gen7</li>
+</ul>
+
+<p>Juan A. Suarez Romero (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 19.1.3</li>
+  <li>Update version to 19.1.4</li>
+</ul>
+
+<p>Kenneth Graunke (4):</p>
+<ul>
+  <li>mesa: Fix ReadBuffers with pbuffers</li>
+  <li>egl: Quiet warning about front buffer rendering for pixmaps/pbuffers</li>
+  <li>egl: Make the 565 pbuffer-only config single buffered.</li>
+  <li>egl: Only expose 565 pbuffer configs if X can export them as DRI3 images</li>
+</ul>
+
+<p>Lionel Landwerlin (5):</p>
+<ul>
+  <li>anv: fix use of comma operator</li>
+  <li>nir: add access to image_deref intrinsics</li>
+  <li>spirv: wrap push ssa/pointer values</li>
+  <li>spirv: propagate access qualifiers through ssa &amp; pointer</li>
+  <li>spirv: don't discard access set by vtn_pointer_dereference</li>
+</ul>
+
+<p>Mark Menzynski (1):</p>
+<ul>
+  <li>nvc0/ir: Fix assert accessing null pointer</li>
+</ul>
+
+<p>Nataraj Deshpande (1):</p>
+<ul>
+  <li>egl/android: Update color_buffers querying for buffer age</li>
+</ul>
+
+<p>Nicolas Dufresne (1):</p>
+<ul>
+  <li>egl: Also query modifiers when exporting DMABuf</li>
+</ul>
+
+<p>Rhys Perry (1):</p>
+<ul>
+  <li>ac/nir: fix txf_ms with an offset</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: fix crash in vkCmdClearAttachments with unused attachment</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>mesa: add glsl_type ref to one_time_init and decref to atexit</li>
+</ul>
+
+<p>Yevhenii Kolesnikov (1):</p>
+<ul>
+  <li>main: Fix memleaks in mesa_use_program</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/19.1.5.html b/docs/relnotes/19.1.5.html
new file mode 100644
index 00000000000..f83440e7df5
--- /dev/null
+++ b/docs/relnotes/19.1.5.html
@@ -0,0 +1,119 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 19.1.5 Release Notes / August 23, 2019</h1>
+
+<p>
+Mesa 19.1.5 is a bug fix release which fixes bugs found since the 19.1.4 release.
+</p>
+<p>
+Mesa 19.1.5 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+<h2>SHA256 checksums</h2>
+<pre>
+7b54e14e35c7251b171b4cf9d84cbc1d760eafe00132117db193454999cd6eb4  mesa-19.1.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109630">Bug 109630</a> - vkQuake flickering geometry under Intel</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110395">Bug 110395</a> - Shadows are flickering in SuperTuxKart</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111113">Bug 111113</a> - ANGLE BlitFramebufferTest.MultisampleDepthClear/ES3_OpenGL fails on Intel Ubuntu19.04</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111267">Bug 111267</a> - [CM246] Flickering with multiple draw calls within the same graphics pipeline if a compute pipeline is present</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Bas Nieuwenhuizen (4):</p>
+<ul>
+  <li>radv: Do non-uniform lowering before bool lowering.</li>
+  <li>ac/nir: Use correct cast for readfirstlane and ptrs.</li>
+  <li>radv: Avoid binning RAVEN hangs.</li>
+  <li>radv: Avoid VEGA/RAVEN scissor bug in binning.</li>
+</ul>
+
+<p>Danylo Piliaiev (1):</p>
+<ul>
+  <li>i965: Emit a dummy MEDIA_VFE_STATE before switching from GPGPU to 3D</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>util: fix mem leak of program path</li>
+</ul>
+
+<p>Erik Faye-Lund (2):</p>
+<ul>
+  <li>gallium/dump: add missing query-type to short-list</li>
+  <li>gallium/dump: add missing query-type to short-list</li>
+</ul>
+
+<p>Greg V (2):</p>
+<ul>
+  <li>anv: remove unused Linux-specific include</li>
+  <li>intel/perf: use MAJOR_IN_SYSMACROS/MAJOR_IN_MKDEV</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>anv: Emit a dummy MEDIA_VFE_STATE before switching from GPGPU to 3D</li>
+</ul>
+
+<p>Juan A. Suarez Romero (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 19.1.4</li>
+  <li>cherry-ignore: panfrost: Make ctx-&gt;job useful</li>
+  <li>Update version to 19.1.5</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>radeonsi: disable SDMA image copies on dGPUs to fix corruption in games</li>
+  <li>radeonsi: fix an assertion failure: assert(!res-&gt;b.is_shared)</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>meson: Test for program_invocation_name</li>
+</ul>
+
+<p>Sergii Romantsov (1):</p>
+<ul>
+  <li>i965/clear: clear_value better precision</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/19.1.6.html b/docs/relnotes/19.1.6.html
new file mode 100644
index 00000000000..bf86b5a56e1
--- /dev/null
+++ b/docs/relnotes/19.1.6.html
@@ -0,0 +1,132 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 19.1.6 Release Notes / September 3, 2019</h1>
+
+<p>
+Mesa 19.1.6 is a bug fix release which fixes bugs found since the 19.1.5 release.
+</p>
+<p>
+Mesa 19.1.6 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+<h2>SHA256 checksums</h2>
+<pre>
+2a369b7b48545c6486e7e44913ad022daca097c8bd937bf30dcf3f17a94d3496  mesa-19.1.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104395">Bug 104395</a> - [CTS] GTF-GL46.gtf32.GL3Tests.packed_pixels.packed_pixels tests fail on 32bit Mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111213">Bug 111213</a> - VA-API nouveau SIGSEGV and asserts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111241">Bug 111241</a> - Shadertoy shader causing hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111411">Bug 111411</a> - SPIR-V shader leads to GPU hang, sometimes making machine unstable</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Rodriguez (1):</p>
+<ul>
+  <li>radv: additional query fixes</li>
+</ul>
+
+<p>Daniel Schürmann (1):</p>
+<ul>
+  <li>nir/lcssa: handle deref instructions properly</li>
+</ul>
+
+<p>Danylo Piliaiev (1):</p>
+<ul>
+  <li>nir/loop_unroll: Prepare loop for unrolling in wrapper_unroll</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>nir/algrbraic: Don't optimize open-coded bitfield reverse when lowering is enabled</li>
+  <li>intel/compiler: Request bitfield_reverse lowering on pre-Gen7 hardware</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>gallium/vl: use compute preference for all multimedia, not just blit</li>
+</ul>
+
+<p>Jonas Ådahl (1):</p>
+<ul>
+  <li>wayland/egl: Ensure correct buffer size when allocating</li>
+</ul>
+
+<p>Juan A. Suarez Romero (6):</p>
+<ul>
+  <li>docs: add sha256 checksums for 19.1.5</li>
+  <li>cherry-ignore: add explicit 19.2 only nominations</li>
+  <li>cherry-ignore: iris: Replace devinfo-&gt;gen with GEN_GEN</li>
+  <li>cherry-ignore: iris: Update fast clear colors on Gen9 with direct immediate writes.</li>
+  <li>cherry-ignore: iris: Avoid unnecessary resolves on transfer maps</li>
+  <li>Update version to 19.1.6</li>
+</ul>
+
+<p>Kenneth Graunke (6):</p>
+<ul>
+  <li>iris: Fix broken aux.possible/sampler_usages bitmask handling</li>
+  <li>iris: Drop copy format hacks from copy region based transfer path.</li>
+  <li>iris: Fix large timeout handling in rel2abs()</li>
+  <li>util: Add a _mesa_i64roundevenf() helper.</li>
+  <li>mesa: Fix _mesa_float_to_unorm() on 32-bit systems.</li>
+  <li>intel/compiler: Fix src0/desc setter ordering</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi: fix scratch buffer WAVESIZE setting leading to corruption</li>
+</ul>
+
+<p>Paulo Zanoni (1):</p>
+<ul>
+  <li>intel/fs: grab fail_msg from v32 instead of v16 when v32-&gt;run_cs fails</li>
+</ul>
+
+<p>Pierre-Eric Pelloux-Prayer (1):</p>
+<ul>
+  <li>glsl: replace 'x + (-x)' with constant 0</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>egl: reset blob cache set/get functions on terminate</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h
index af0ee9c5667..194b21eb2dd 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -766,7 +766,11 @@ struct __DRIuseInvalidateExtensionRec {
 #define __DRI_ATTRIB_YINVERTED			47
 #define __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE	48
 #define __DRI_ATTRIB_MUTABLE_RENDER_BUFFER	49 /* EGL_MUTABLE_RENDER_BUFFER_BIT_KHR */
-#define __DRI_ATTRIB_MAX			50
+#define __DRI_ATTRIB_RED_SHIFT			50
+#define __DRI_ATTRIB_GREEN_SHIFT		51
+#define __DRI_ATTRIB_BLUE_SHIFT			52
+#define __DRI_ATTRIB_ALPHA_SHIFT		53
+#define __DRI_ATTRIB_MAX			54
 
 /* __DRI_ATTRIB_RENDER_TYPE */
 #define __DRI_ATTRIB_RGBA_BIT			0x01	
@@ -1053,6 +1057,7 @@ enum dri_loader_cap {
     * only BGRA ordering can be exposed.
     */
    DRI_LOADER_CAP_RGBA_ORDERING,
+   DRI_LOADER_CAP_FP16,
 };
 
 struct __DRIdri2LoaderExtensionRec {
@@ -1293,6 +1298,8 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FORMAT_ABGR2101010  0x1011
 #define __DRI_IMAGE_FORMAT_SABGR8       0x1012
 #define __DRI_IMAGE_FORMAT_UYVY         0x1013
+#define __DRI_IMAGE_FORMAT_XBGR16161616F 0x1014
+#define __DRI_IMAGE_FORMAT_ABGR16161616F 0x1015
 
 #define __DRI_IMAGE_USE_SHARE		0x0001
 #define __DRI_IMAGE_USE_SCANOUT		0x0002
@@ -1338,6 +1345,8 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FOURCC_RGBX1010102	0x30335852
 #define __DRI_IMAGE_FOURCC_BGRA1010102	0x30334142
 #define __DRI_IMAGE_FOURCC_BGRX1010102	0x30335842
+#define __DRI_IMAGE_FOURCC_ABGR16161616F 0x48344241
+#define __DRI_IMAGE_FOURCC_XBGR16161616F 0x48344258
 #define __DRI_IMAGE_FOURCC_YUV410	0x39565559
 #define __DRI_IMAGE_FOURCC_YUV411	0x31315559
 #define __DRI_IMAGE_FOURCC_YUV420	0x32315559
diff --git a/include/drm-uapi/drm_fourcc.h b/include/drm-uapi/drm_fourcc.h
index bab20298f42..08b81ac6ebd 100644
--- a/include/drm-uapi/drm_fourcc.h
+++ b/include/drm-uapi/drm_fourcc.h
@@ -144,6 +144,17 @@ extern "C" {
 #define DRM_FORMAT_RGBA1010102	fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */
 #define DRM_FORMAT_BGRA1010102	fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */
 
+/*
+ * Floating point 64bpp RGB
+ * IEEE 754-2008 binary16 half-precision float
+ * [15:0] sign:exponent:mantissa 1:5:10
+ */
+#define DRM_FORMAT_XRGB16161616F fourcc_code('X', 'R', '4', 'H') /* [63:0] x:R:G:B 16:16:16:16 little endian */
+#define DRM_FORMAT_XBGR16161616F fourcc_code('X', 'B', '4', 'H') /* [63:0] x:B:G:R 16:16:16:16 little endian */
+
+#define DRM_FORMAT_ARGB16161616F fourcc_code('A', 'R', '4', 'H') /* [63:0] A:R:G:B 16:16:16:16 little endian */
+#define DRM_FORMAT_ABGR16161616F fourcc_code('A', 'B', '4', 'H') /* [63:0] A:B:G:R 16:16:16:16 little endian */
+
 /* packed YCbCr */
 #define DRM_FORMAT_YUYV		fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
 #define DRM_FORMAT_YVYU		fourcc_code('Y', 'V', 'Y', 'U') /* [31:0] Cb0:Y1:Cr0:Y0 8:8:8:8 little endian */
diff --git a/meson.build b/meson.build
index f72bdc946bf..09a1d386b59 100644
--- a/meson.build
+++ b/meson.build
@@ -107,7 +107,7 @@ with_any_opengl = with_opengl or with_gles1 or with_gles2
 # Only build shared_glapi if at least one OpenGL API is enabled
 with_shared_glapi = get_option('shared-glapi') and with_any_opengl
 
-system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'dragonfly', 'linux'].contains(host_machine.system())
+system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'gnu/kfreebsd', 'dragonfly', 'linux'].contains(host_machine.system())
 
 dri_drivers = get_option('dri-drivers')
 if dri_drivers.contains('auto')
@@ -190,6 +190,12 @@ if cc.get_id() == 'intel'
   endif
 endif
 
+#This message is needed until we bump meson version to 0.46 because of known 0.45.0 and 0.45.1 issue
+#https://bugs.freedesktop.org/show_bug.cgi?id=109791
+if meson.version().version_compare('< 0.46.0')
+    warning('''Meson < 0.46 doesn't automatically define `NDEBUG`; please update meson to at least 0.46.''')
+endif
+
 with_gallium = gallium_drivers.length() != 0 and gallium_drivers != ['']
 
 if with_gallium and system_has_kms_drm
@@ -244,6 +250,7 @@ endif
 
 if host_machine.system() == 'darwin'
   with_dri_platform = 'apple'
+  pre_args += '-DBUILDING_MESA'
 elif ['windows', 'cygwin'].contains(host_machine.system())
   with_dri_platform = 'windows'
 elif system_has_kms_drm
@@ -312,7 +319,7 @@ if with_glx == 'dri'
    endif
 endif
 
-if not (with_dri or with_gallium or with_glx == 'xlib' or with_glx == 'gallium-xlib')
+if not (with_dri or with_gallium or with_glx != 'disabled')
   with_gles1 = false
   with_gles2 = false
   with_opengl = false
@@ -353,12 +360,12 @@ else
   with_egl = false
 endif
 
-if with_egl and not (with_platform_drm or with_platform_surfaceless)
+if with_egl and not (with_platform_drm or with_platform_surfaceless or with_platform_android)
   if with_gallium_radeonsi
-    error('RadeonSI requires drm or surfaceless platform when using EGL')
+    error('RadeonSI requires the drm, surfaceless or android platform when using EGL')
   endif
   if with_gallium_virgl
-    error('Virgl requires drm or surfaceless platform when using EGL')
+    error('Virgl requires the drm, surfaceless or android platform when using EGL')
   endif
 endif
 
@@ -379,9 +386,7 @@ if with_glx != 'disabled'
       error('xlib conflicts with any dri driver')
     endif
   elif with_glx == 'dri'
-    if not with_dri
-      error('dri based GLX requires at least one DRI driver')
-    elif not with_shared_glapi
+    if not with_shared_glapi
       error('dri based GLX requires shared-glapi')
     endif
   endif
@@ -754,7 +759,11 @@ if with_platform_haiku
   pre_args += '-DHAVE_HAIKU_PLATFORM'
 endif
 
-prog_python = import('python3').find_python()
+if meson.version().version_compare('>=0.50')
+  prog_python = import('python').find_installation('python3')
+else
+  prog_python = import('python3').find_python()
+endif
 has_mako = run_command(
   prog_python, '-c',
   '''
@@ -836,7 +845,7 @@ if cc.compiles('int foo(void) __attribute__((__noreturn__));',
 endif
 
 # TODO: this is very incomplete
-if ['linux', 'cygwin', 'gnu'].contains(host_machine.system())
+if ['linux', 'cygwin', 'gnu', 'gnu/kfreebsd'].contains(host_machine.system())
   pre_args += '-D_GNU_SOURCE'
 endif
 
@@ -1040,7 +1049,7 @@ elif cc.has_header_symbol('sys/mkdev.h', 'major')
   pre_args += '-DMAJOR_IN_MKDEV'
 endif
 
-foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h', 'dlfcn.h']
+foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h', 'dlfcn.h', 'execinfo.h']
   if cc.compiles('#include <@0@>'.format(h), name : '@0@'.format(h))
     pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify())
   endif
@@ -1052,6 +1061,13 @@ foreach f : ['strtof', 'mkostemp', 'posix_memalign', 'timespec_get', 'memfd_crea
   endif
 endforeach
 
+if cc.has_header_symbol('errno.h', 'program_invocation_name',
+                        args : '-D_GNU_SOURCE')
+   pre_args += '-DHAVE_PROGRAM_INVOCATION_NAME'
+elif with_tools.contains('intel')
+  error('Intel tools require the program_invocation_name variable')
+endif
+
 # strtod locale support
 if cc.links('''
     #define _GNU_SOURCE
@@ -1163,7 +1179,7 @@ _drm_radeon_ver = '2.4.71'
 _drm_nouveau_ver = '2.4.66'
 _drm_etnaviv_ver = '2.4.89'
 _drm_intel_ver = '2.4.75'
-_drm_ver = '2.4.75'
+_drm_ver = '2.4.81'
 
 _libdrm_checks = [
   ['intel', with_dri_i915 or with_gallium_i915],
@@ -1258,6 +1274,7 @@ if _llvm != 'false'
       with_gallium_opencl or _llvm == 'true'
     ),
     static : not _shared_llvm,
+    method : 'config-tool',
   )
   with_llvm = dep_llvm.found()
 endif
diff --git a/scons/gallium.py b/scons/gallium.py
index 61bbeb2399f..7dae036e5ad 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -352,6 +352,7 @@ def generate(env):
                 '_DARWIN_C_SOURCE',
                 'GLX_USE_APPLEGL',
                 'GLX_DIRECT_RENDERING',
+                'BUILDING_MESA',
             ]
         else:
             cppdefines += [
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 22b771db774..d0f69f5176d 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -3438,6 +3438,8 @@ ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef la
 						LLVMConstInt(ctx->i32, i, 0), "");
 		}
 	}
+	if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
+		return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
 	return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
 }
 
@@ -4016,7 +4018,7 @@ ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
 
 	/* ws->result_reduce is already the correct value */
 	if (ws->enable_inclusive)
-		ws->result_inclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->src, ws->op);
+		ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op);
 	if (ws->enable_exclusive)
 		ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op);
 }
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 69446863b95..6063411310b 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -151,13 +151,14 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
 	LLVMTargetRef target = ac_get_llvm_target(triple);
 
 	snprintf(features, sizeof(features),
-		 "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s",
+		 "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s%s",
 		 HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling",
 		 tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
 		 tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
 		 tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
-		 tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
-	
+		 tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "",
+		 tm_options & AC_TM_NO_LOAD_STORE_OPT ? ",-load-store-opt" : "");
+
 	LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
 	                             target,
 	                             triple,
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 6d961c06f8a..ca00540da80 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -65,6 +65,7 @@ enum ac_target_machine_options {
 	AC_TM_CHECK_IR = (1 << 5),
 	AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
 	AC_TM_CREATE_LOW_OPT = (1 << 7),
+	AC_TM_NO_LOAD_STORE_OPT = (1 << 8),
 };
 
 enum ac_float_mode {
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 151e0d0f961..bbc2a522cef 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -38,6 +38,7 @@ struct ac_nir_context {
 	struct ac_shader_abi *abi;
 
 	gl_shader_stage stage;
+	shader_info *info;
 
 	LLVMValueRef *ssa_defs;
 
@@ -1395,6 +1396,22 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
 	}
 
 	args->attributes = AC_FUNC_ATTR_READNONE;
+	bool cs_derivs = ctx->stage == MESA_SHADER_COMPUTE &&
+			 ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
+	if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {
+		/* Prevent texture instructions with implicit derivatives from being
+		 * sinked into branches. */
+		switch (instr->op) {
+		case nir_texop_tex:
+		case nir_texop_txb:
+		case nir_texop_lod:
+			args->attributes |= AC_FUNC_ATTR_CONVERGENT;
+			break;
+		default:
+			break;
+		}
+	}
+
 	return ac_build_image_opcode(&ctx->ac, args);
 }
 
@@ -3730,7 +3747,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 		goto write_result;
 	}
 
-	if (args.offset && instr->op != nir_texop_txf) {
+	if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
 		LLVMValueRef offset[3], pack;
 		for (unsigned chan = 0; chan < 3; ++chan)
 			offset[chan] = ctx->ac.i32_0;
@@ -3864,7 +3881,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 			args.coords[sample_chan], fmask_ptr);
 	}
 
-	if (args.offset && instr->op == nir_texop_txf) {
+	if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) {
 		int num_offsets = instr->src[offset_src].src.ssa->num_components;
 		num_offsets = MIN2(num_offsets, instr->coord_components);
 		for (unsigned i = 0; i < num_offsets; ++i) {
@@ -4351,6 +4368,7 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
 	ctx.abi = abi;
 
 	ctx.stage = nir->info.stage;
+	ctx.info = &nir->info;
 
 	ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
 
diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build
index 532267343d1..2dd2a7f246e 100644
--- a/src/amd/vulkan/meson.build
+++ b/src/amd/vulkan/meson.build
@@ -129,21 +129,27 @@ if with_xlib_lease
   radv_flags += '-DVK_USE_PLATFORM_XLIB_XRANDR_EXT'
 endif
 
+if with_platform_android
+  radv_flags += [
+    '-DVK_USE_PLATFORM_ANDROID_KHR'
+  ]
+  libradv_files += files('radv_android.c')
+endif
+
 libvulkan_radeon = shared_library(
   'vulkan_radeon',
   [libradv_files, radv_entrypoints, radv_extensions_c, amd_vk_format_table_c, sha1_h, xmlpool_options_h],
   include_directories : [
-    inc_common, inc_amd, inc_amd_common, inc_compiler, inc_util, inc_vulkan_util,
-    inc_vulkan_wsi,
+    inc_common, inc_amd, inc_amd_common, inc_compiler, inc_util, inc_vulkan_wsi,
   ],
   link_with : [
-    libamd_common, libamdgpu_addrlib, libvulkan_util, libvulkan_wsi,
+    libamd_common, libamdgpu_addrlib, libvulkan_wsi,
     libmesa_util, libxmlconfig
   ],
   dependencies : [
     dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m,
     dep_valgrind, radv_deps,
-    idep_nir,
+    idep_nir, idep_vulkan_util,
   ],
   c_args : [c_vis_args, no_override_init_args, radv_flags],
   cpp_args : [cpp_vis_args, radv_flags],
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index b04c998fac2..90c6153e875 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -301,7 +301,6 @@ radv_cmd_buffer_destroy(struct radv_cmd_buffer *cmd_buffer)
 static VkResult
 radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 {
-
 	cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);
 
 	list_for_each_entry_safe(struct radv_cmd_buffer_upload, up,
@@ -326,6 +325,8 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 
 	cmd_buffer->record_result = VK_SUCCESS;
 
+	memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings));
+
 	for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) {
 		cmd_buffer->descriptors[i].dirty = 0;
 		cmd_buffer->descriptors[i].valid = 0;
@@ -565,8 +566,8 @@ radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer,
 
 	for_each_bit(i, descriptors_state->valid) {
 		struct radv_descriptor_set *set = descriptors_state->sets[i];
-		data[i * 2] = (uintptr_t)set;
-		data[i * 2 + 1] = (uintptr_t)set >> 32;
+		data[i * 2] = (uint64_t)(uintptr_t)set;
+		data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32;
 	}
 
 	radv_emit_write_data_packet(cmd_buffer, va, MAX_SETS * 2, data);
@@ -4663,6 +4664,9 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
 		assert(src_family == cmd_buffer->queue_family_index ||
 		       dst_family == cmd_buffer->queue_family_index);
 
+		if (src_family == VK_QUEUE_FAMILY_EXTERNAL)
+			return;
+
 		if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER)
 			return;
 
@@ -4824,7 +4828,7 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
 
 	radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
 
-	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18);
+	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 21);
 
 	/* Flags that only require a top-of-pipe event. */
 	VkPipelineStageFlags top_of_pipe_flags =
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index 17a2f3370c0..652a3b677d2 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -51,6 +51,7 @@ enum {
 	RADV_DEBUG_CHECKIR           = 0x200000,
 	RADV_DEBUG_NOTHREADLLVM      = 0x400000,
 	RADV_DEBUG_NOBINNING         = 0x800000,
+	RADV_DEBUG_NO_LOAD_STORE_OPT = 0x1000000,
 };
 
 enum {
diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c
index 4e9c73c94d0..33615af9784 100644
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -200,7 +200,7 @@ VkResult radv_CreateDescriptorSetLayout(
 			break;
 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
 			/* main descriptor + fmask descriptor + sampler */
-			set_layout->binding[b].size = 32 + 32 * max_sampled_image_descriptors;
+			set_layout->binding[b].size = 96;
 			binding_buffer_count = 1;
 			alignment = 32;
 			break;
@@ -247,7 +247,8 @@ VkResult radv_CreateDescriptorSetLayout(
 
 			/* Don't reserve space for the samplers if they're not accessed. */
 			if (set_layout->binding[b].immutable_samplers_equal) {
-				if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+				if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
+				    max_sampled_image_descriptors <= 2)
 					set_layout->binding[b].size -= 32;
 				else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER)
 					set_layout->binding[b].size -= 16;
@@ -476,8 +477,17 @@ radv_descriptor_set_create(struct radv_device *device,
 			   struct radv_descriptor_set **out_set)
 {
 	struct radv_descriptor_set *set;
+	uint32_t buffer_count = layout->buffer_count;
+	if (variable_count) {
+		unsigned stride = 1;
+		if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+		    layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+			stride = 0;
+		buffer_count = layout->binding[layout->binding_count - 1].buffer_offset +
+		               *variable_count * stride;
+	}
 	unsigned range_offset = sizeof(struct radv_descriptor_set) +
-		sizeof(struct radeon_winsys_bo *) * layout->buffer_count;
+		sizeof(struct radeon_winsys_bo *) * buffer_count;
 	unsigned mem_size = range_offset +
 		sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count;
 
@@ -502,7 +512,17 @@ radv_descriptor_set_create(struct radv_device *device,
 	}
 
 	set->layout = layout;
-	uint32_t layout_size = align_u32(layout->size, 32);
+	uint32_t layout_size = layout->size;
+	if (variable_count) {
+		assert(layout->has_variable_descriptors);
+		uint32_t stride = layout->binding[layout->binding_count - 1].size;
+		if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+			stride = 1;
+
+		layout_size = layout->binding[layout->binding_count - 1].offset +
+		              *variable_count * stride;
+	}
+	layout_size = align_u32(layout_size, 32);
 	if (layout_size) {
 		set->size = layout_size;
 
@@ -776,9 +796,13 @@ VkResult radv_AllocateDescriptorSets(
 		pDescriptorSets[i] = radv_descriptor_set_to_handle(set);
 	}
 
-	if (result != VK_SUCCESS)
+	if (result != VK_SUCCESS) {
 		radv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool,
 					i, pDescriptorSets);
+		for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
+			pDescriptorSets[i] = VK_NULL_HANDLE;
+		}
+	}
 	return result;
 }
 
diff --git a/src/amd/vulkan/radv_descriptor_set.h b/src/amd/vulkan/radv_descriptor_set.h
index 5fd19d94482..89be6e69068 100644
--- a/src/amd/vulkan/radv_descriptor_set.h
+++ b/src/amd/vulkan/radv_descriptor_set.h
@@ -104,7 +104,7 @@ radv_immutable_samplers(const struct radv_descriptor_set_layout *set,
 static inline unsigned
 radv_combined_image_descriptor_sampler_offset(const struct radv_descriptor_set_binding_layout *binding)
 {
-	return binding->size - ((!binding->immutable_samplers_equal) ? 32 : 0);
+	return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0);
 }
 
 static inline const struct radv_sampler_ycbcr_conversion *
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 10956ded66f..809675c44a0 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -464,6 +464,7 @@ static const struct debug_control radv_debug_options[] = {
 	{"checkir", RADV_DEBUG_CHECKIR},
 	{"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
 	{"nobinning", RADV_DEBUG_NOBINNING},
+	{"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT},
 	{NULL, 0}
 };
 
@@ -510,6 +511,13 @@ radv_handle_per_app_options(struct radv_instance *instance,
 	} else if (!strcmp(name, "DOOM_VFR")) {
 		/* Work around a Doom VFR game bug */
 		instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
+	} else if (!strcmp(name, "MonsterHunterWorld.exe")) {
+		/* Workaround for a WaW hazard when LLVM moves/merges
+		 * load/store memory operations.
+		 * See https://reviews.llvm.org/D61313
+		 */
+		if (HAVE_LLVM < 0x900)
+			instance->debug_flags |= RADV_DEBUG_NO_LOAD_STORE_OPT;
 	}
 }
 
@@ -1477,40 +1485,46 @@ radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
 	 * Note that the application heap usages are not really accurate (eg.
 	 * in presence of shared buffers).
 	 */
-	if (vram_size) {
-		heap_usage = device->ws->query_value(device->ws,
-						     RADEON_ALLOCATED_VRAM);
-
-		heap_budget = vram_size -
-			device->ws->query_value(device->ws, RADEON_VRAM_USAGE) +
-			heap_usage;
+	for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
+		uint32_t heap_index = device->memory_properties.memoryTypes[i].heapIndex;
 
-		memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = heap_budget;
-		memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] = heap_usage;
-	}
+		switch (device->mem_type_indices[i]) {
+		case RADV_MEM_TYPE_VRAM:
+			heap_usage = device->ws->query_value(device->ws,
+							     RADEON_ALLOCATED_VRAM);
 
-	if (visible_vram_size) {
-		heap_usage = device->ws->query_value(device->ws,
-						     RADEON_ALLOCATED_VRAM_VIS);
+			heap_budget = vram_size -
+				device->ws->query_value(device->ws, RADEON_VRAM_USAGE) +
+				heap_usage;
 
-		heap_budget = visible_vram_size -
-			device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
-			heap_usage;
+			memoryBudget->heapBudget[heap_index] = heap_budget;
+			memoryBudget->heapUsage[heap_index] = heap_usage;
+			break;
+		case RADV_MEM_TYPE_VRAM_CPU_ACCESS:
+			heap_usage = device->ws->query_value(device->ws,
+							     RADEON_ALLOCATED_VRAM_VIS);
 
-		memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_budget;
-		memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_usage;
-	}
+			heap_budget = visible_vram_size -
+				device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
+				heap_usage;
 
-	if (gtt_size) {
-		heap_usage = device->ws->query_value(device->ws,
-						     RADEON_ALLOCATED_GTT);
+			memoryBudget->heapBudget[heap_index] = heap_budget;
+			memoryBudget->heapUsage[heap_index] = heap_usage;
+			break;
+		case RADV_MEM_TYPE_GTT_WRITE_COMBINE:
+			heap_usage = device->ws->query_value(device->ws,
+							     RADEON_ALLOCATED_GTT);
 
-		heap_budget = gtt_size -
-			device->ws->query_value(device->ws, RADEON_GTT_USAGE) +
-			heap_usage;
+			heap_budget = gtt_size -
+				device->ws->query_value(device->ws, RADEON_GTT_USAGE) +
+				heap_usage;
 
-		memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] = heap_budget;
-		memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] = heap_usage;
+			memoryBudget->heapBudget[heap_index] = heap_budget;
+			memoryBudget->heapUsage[heap_index] = heap_usage;
+			break;
+		default:
+			break;
+		}
 	}
 
 	/* The heapBudget and heapUsage values must be zero for array elements
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index 23e76bfcc11..b1921f53ada 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -127,8 +127,8 @@ def __init__(self, name, ext_version, enable):
     Extension('VK_EXT_ycbcr_image_arrays',                1, True),
     Extension('VK_AMD_draw_indirect_count',               1, True),
     Extension('VK_AMD_gcn_shader',                        1, True),
-    Extension('VK_AMD_gpu_shader_half_float',             1, 'device->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800'),
-    Extension('VK_AMD_gpu_shader_int16',                  1, 'device->rad_info.chip_class >= VI'),
+    Extension('VK_AMD_gpu_shader_half_float',             1, 'device->rad_info.chip_class >= GFX9 && HAVE_LLVM >= 0x0800'),
+    Extension('VK_AMD_gpu_shader_int16',                  1, 'device->rad_info.chip_class >= GFX9'),
     Extension('VK_AMD_rasterization_order',               1, 'device->has_out_of_order_rast'),
     Extension('VK_AMD_shader_core_properties',            1, True),
     Extension('VK_AMD_shader_info',                       1, True),
diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c
index 5af172c8e7f..d6a5872d2cc 100644
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -547,7 +547,7 @@ static bool radv_is_storage_image_format_supported(struct radv_physical_device *
 	}
 }
 
-static bool radv_is_buffer_format_supported(VkFormat format, bool *scaled)
+bool radv_is_buffer_format_supported(VkFormat format, bool *scaled)
 {
 	const struct vk_format_description *desc = vk_format_description(format);
 	unsigned data_format, num_format;
@@ -559,7 +559,8 @@ static bool radv_is_buffer_format_supported(VkFormat format, bool *scaled)
 	num_format = radv_translate_buffer_numformat(desc,
 						     vk_format_get_first_non_void_channel(format));
 
-	*scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) || (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED);
+	if (scaled)
+		*scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) || (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED);
 	return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID &&
 		num_format != ~0;
 }
@@ -635,7 +636,8 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
 	const struct vk_format_description *desc = vk_format_description(format);
 	bool blendable;
 	bool scaled = false;
-	if (!desc) {
+	/* TODO: implement some software emulation of SUBSAMPLED formats. */
+	if (!desc || desc->layout == VK_FORMAT_LAYOUT_SUBSAMPLED) {
 		out_properties->linearTilingFeatures = linear;
 		out_properties->optimalTilingFeatures = tiled;
 		out_properties->bufferFeatures = buffer;
@@ -655,6 +657,7 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
 		uint32_t tiling = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
 		                  VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
 		                  VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
+		                  VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT |
 		                  VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
 
 		/* The subsampled formats have no support for linear filters. */
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 92409d147f1..a5a7aa3767f 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -729,7 +729,8 @@ radv_query_opaque_metadata(struct radv_device *device,
 		for (i = 0; i <= image->info.levels - 1; i++)
 			md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
 		md->size_metadata = (11 + image->info.levels - 1) * 4;
-	}
+	} else
+		md->size_metadata = 10 * 4;
 }
 
 void
@@ -860,6 +861,11 @@ radv_image_alloc_cmask(struct radv_device *device,
 	uint32_t clear_value_size = 0;
 	radv_image_get_cmask_info(device, image, &image->cmask);
 
+	if (!image->cmask.size)
+		return;
+
+	assert(image->cmask.alignment);
+
 	image->cmask.offset = align64(image->size, image->cmask.alignment);
 	/* + 8 for storing the clear values */
 	if (!image->clear_value_offset) {
diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c
index ec4fc4a6d4b..0606d49392f 100644
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -81,7 +81,7 @@ radv_meta_save(struct radv_meta_saved_state *state,
 
 	if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
 		state->old_descriptor_set0 = descriptors_state->sets[0];
-		if (!state->old_descriptor_set0)
+		if (!(descriptors_state->valid & 1) || !state->old_descriptor_set0)
 			state->flags &= ~RADV_META_SAVE_DESCRIPTORS;
 	}
 
diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
index 40ecfe001d1..15eac4793ba 100644
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -650,6 +650,7 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
 	if (radv_image_has_htile(iview->image) &&
 	    iview->base_mip == 0 &&
 	    iview->base_layer == 0 &&
+	    iview->layer_count == iview->image->info.array_size &&
 	    radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
 	    radv_image_extent_compare(iview->image, &iview->extent))
 		return true;
@@ -1575,6 +1576,9 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer,
 			emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
 		}
 	} else {
+		if (!subpass->depth_stencil_attachment)
+			return;
+
 		const uint32_t pass_att = subpass->depth_stencil_attachment->attachment;
 		if (pass_att == VK_ATTACHMENT_UNUSED)
 			return;
diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c
index 8081057d9df..9b92f64dc89 100644
--- a/src/amd/vulkan/radv_meta_copy.c
+++ b/src/amd/vulkan/radv_meta_copy.c
@@ -187,6 +187,24 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
 							&pRegions[r].imageSubresource,
 							pRegions[r].imageSubresource.aspectMask);
 
+		if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) {
+			uint32_t queue_mask = radv_image_queue_family_mask(image,
+			                                                   cmd_buffer->queue_family_index,
+			                                                   cmd_buffer->queue_family_index);
+			MAYBE_UNUSED bool compressed = radv_layout_dcc_compressed(image, layout, queue_mask);
+			if (compressed) {
+				radv_decompress_dcc(cmd_buffer, image, &(VkImageSubresourceRange) {
+								.aspectMask = pRegions[r].imageSubresource.aspectMask,
+								.baseMipLevel = pRegions[r].imageSubresource.mipLevel,
+								.levelCount = 1,
+								.baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer,
+								.layerCount = pRegions[r].imageSubresource.layerCount,
+			                                });
+			}
+			img_bsurf.format = vk_format_for_size(vk_format_get_blocksize(img_bsurf.format));
+			img_bsurf.current_layout = VK_IMAGE_LAYOUT_GENERAL;
+		}
+
 		struct radv_meta_blit2d_buffer buf_bsurf = {
 			.bs = img_bsurf.bs,
 			.format = img_bsurf.format,
@@ -313,6 +331,24 @@ meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
 							&pRegions[r].imageSubresource,
 							pRegions[r].imageSubresource.aspectMask);
 
+		if (!radv_is_buffer_format_supported(img_info.format, NULL)) {
+			uint32_t queue_mask = radv_image_queue_family_mask(image,
+			                                                   cmd_buffer->queue_family_index,
+			                                                   cmd_buffer->queue_family_index);
+			MAYBE_UNUSED bool compressed = radv_layout_dcc_compressed(image, layout, queue_mask);
+			if (compressed) {
+				radv_decompress_dcc(cmd_buffer, image, &(VkImageSubresourceRange) {
+								.aspectMask = pRegions[r].imageSubresource.aspectMask,
+								.baseMipLevel = pRegions[r].imageSubresource.mipLevel,
+								.levelCount = 1,
+								.baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer,
+								.layerCount = pRegions[r].imageSubresource.layerCount,
+			                                });
+			}
+			img_info.format = vk_format_for_size(vk_format_get_blocksize(img_info.format));
+			img_info.current_layout = VK_IMAGE_LAYOUT_GENERAL;
+		}
+
 		struct radv_meta_blit2d_buffer buf_info = {
 			.bs = img_info.bs,
 			.format = img_info.format,
diff --git a/src/amd/vulkan/radv_meta_fmask_expand.c b/src/amd/vulkan/radv_meta_fmask_expand.c
index f4b55328929..425f473fe7f 100644
--- a/src/amd/vulkan/radv_meta_fmask_expand.c
+++ b/src/amd/vulkan/radv_meta_fmask_expand.c
@@ -24,6 +24,7 @@
 
 #include "radv_meta.h"
 #include "radv_private.h"
+#include "vk_format.h"
 
 static nir_shader *
 build_fmask_expand_compute_shader(struct radv_device *device, int samples)
@@ -132,7 +133,7 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
 					     .image = radv_image_to_handle(image),
 					     .viewType = radv_meta_get_view_type(image),
-					     .format = image->vk_format,
+					     .format = vk_format_no_srgb(image->vk_format),
 					     .subresourceRange = {
 						     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
 						     .baseMipLevel = 0,
diff --git a/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c b/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c
index 77f2e6ac66e..5d771c2fc2e 100644
--- a/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c
+++ b/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c
@@ -156,6 +156,73 @@ convert_ycbcr(struct ycbcr_state *state,
 	                converted_channels[2], nir_imm_float(b, 1.0f));
 }
 
+static nir_ssa_def *
+get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture)
+{
+	nir_builder *b = state->builder;
+	const struct glsl_type *type = texture->type;
+	nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
+
+	tex->op = nir_texop_txs;
+	tex->sampler_dim = glsl_get_sampler_dim(type);
+	tex->is_array = glsl_sampler_type_is_array(type);
+	tex->is_shadow = glsl_sampler_type_is_shadow(type);
+	tex->dest_type = nir_type_int;
+
+	tex->src[0].src_type = nir_tex_src_texture_deref;
+	tex->src[0].src = nir_src_for_ssa(&texture->dest.ssa);
+
+	nir_ssa_dest_init(&tex->instr, &tex->dest,
+	                  nir_tex_instr_dest_size(tex), 32, NULL);
+	nir_builder_instr_insert(b, &tex->instr);
+
+	return nir_i2f32(b, &tex->dest.ssa);
+}
+
+static nir_ssa_def *
+implicit_downsampled_coord(nir_builder *b,
+                           nir_ssa_def *value,
+                           nir_ssa_def *max_value,
+                           int div_scale)
+{
+	return nir_fadd(b,
+	                value,
+	                nir_fdiv(b,
+	                         nir_imm_float(b, 1.0f),
+	                         nir_fmul(b,
+	                                  nir_imm_float(b, div_scale),
+	                                  max_value)));
+}
+
+static nir_ssa_def *
+implicit_downsampled_coords(struct ycbcr_state *state,
+                            nir_ssa_def *old_coords)
+{
+	nir_builder *b = state->builder;
+	const struct radv_sampler_ycbcr_conversion *conversion = state->conversion;
+	nir_ssa_def *image_size = NULL;
+	nir_ssa_def *comp[4] = { NULL, };
+	const struct vk_format_description *fmt_desc = vk_format_description(state->conversion->format);
+	const unsigned divisors[2] = {fmt_desc->width_divisor, fmt_desc->height_divisor};
+
+	for (int c = 0; c < old_coords->num_components; c++) {
+		if (c < ARRAY_SIZE(divisors) && divisors[c] > 1 &&
+		    conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) {
+			if (!image_size)
+				image_size = get_texture_size(state, state->tex_deref);
+
+			comp[c] = implicit_downsampled_coord(b,
+			                                     nir_channel(b, old_coords, c),
+			                                     nir_channel(b, image_size, c),
+			                                     divisors[c]);
+		} else {
+			comp[c] = nir_channel(b, old_coords, c);
+		}
+	}
+
+	return nir_vec(b, comp, old_coords->num_components);
+}
+
 static nir_ssa_def *
 create_plane_tex_instr_implicit(struct ycbcr_state *state,
                                 uint32_t plane)
@@ -163,10 +230,23 @@ create_plane_tex_instr_implicit(struct ycbcr_state *state,
 	nir_builder *b = state->builder;
 	nir_tex_instr *old_tex = state->origin_tex;
 	nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs+ 1);
-
 	for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
 		tex->src[i].src_type = old_tex->src[i].src_type;
-		nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex);
+
+		switch (old_tex->src[i].src_type) {
+		case nir_tex_src_coord:
+			if (plane && true/*state->conversion->chroma_reconstruction*/) {
+				assert(old_tex->src[i].src.is_ssa);
+				tex->src[i].src =
+					nir_src_for_ssa(implicit_downsampled_coords(state,
+					                                            old_tex->src[i].src.ssa));
+				break;
+			}
+		/* fall through */
+		default:
+			nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex);
+			break;
+		}
 	}
 
 	tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane));
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index d83f0bd547f..5201f46b3a8 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2019,16 +2019,34 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi,
 
 	assert(stride % type_size == 0);
 
-	if (!index)
-		index = ctx->ac.i32_0;
+	LLVMValueRef adjusted_index = index;
+	if (!adjusted_index)
+		adjusted_index = ctx->ac.i32_0;
 
-	index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
+	adjusted_index = LLVMBuildMul(builder, adjusted_index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
 
 	list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0));
 	list = LLVMBuildPointerCast(builder, list,
 				    ac_array_in_const32_addr_space(type), "");
 
-	return ac_build_load_to_sgpr(&ctx->ac, list, index);
+	LLVMValueRef descriptor = ac_build_load_to_sgpr(&ctx->ac, list, adjusted_index);
+
+	/* 3 plane formats always have same size and format for plane 1 & 2, so
+	 * use the tail from plane 1 so that we can store only the first 16 bytes
+	 * of the last plane. */
+	if (desc_type == AC_DESC_PLANE_2) {
+		LLVMValueRef descriptor2 = radv_get_sampler_desc(abi, descriptor_set, base_index, constant_index, index, AC_DESC_PLANE_1,image, write, bindless);
+
+		LLVMValueRef components[8];
+		for (unsigned i = 0; i < 4; ++i)
+			components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i);
+
+		for (unsigned i = 4; i < 8; ++i)
+			components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
+		descriptor = ac_build_gather_values(&ctx->ac, components, 8);
+	}
+
+	return descriptor;
 }
 
 /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
@@ -3592,9 +3610,10 @@ ac_setup_rings(struct radv_shader_context *ctx)
 
 unsigned
 radv_nir_get_max_workgroup_size(enum chip_class chip_class,
+				gl_shader_stage stage,
 				const struct nir_shader *nir)
 {
-	switch (nir->info.stage) {
+	switch (stage) {
 	case MESA_SHADER_TESS_CTRL:
 		return chip_class >= CIK ? 128 : 64;
 	case MESA_SHADER_GEOMETRY:
@@ -3605,6 +3624,8 @@ radv_nir_get_max_workgroup_size(enum chip_class chip_class,
 		return 0;
 	}
 
+	if (!nir)
+		return chip_class >= GFX9 ? 128 : 64;
 	unsigned max_workgroup_size = nir->info.cs.local_size[0] *
 		nir->info.cs.local_size[1] *
 		nir->info.cs.local_size[2];
@@ -3671,7 +3692,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
 	for (int i = 0; i < shader_count; ++i) {
 		ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
 		                              radv_nir_get_max_workgroup_size(ctx.options->chip_class,
-		                                                            shaders[i]));
+									      shaders[i]->info.stage,
+									      shaders[i]));
 	}
 
 	create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 5383f00e754..cfa374cd437 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -524,7 +524,7 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
 		col_format |= cf << (4 * i);
 	}
 
-	if (!col_format && blend->need_src_alpha & (1 << 0)) {
+	if (!(col_format & 0xf) && blend->need_src_alpha & (1 << 0)) {
 		/* When a subpass doesn't have any color attachments, write the
 		 * alpha channel of MRT0 when alpha coverage is enabled because
 		 * the depth attachment needs it.
@@ -542,10 +542,13 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
 		}
 	}
 
-	blend->cb_shader_mask = ac_get_cb_shader_mask(col_format);
-
+	/* The output for dual source blending should have the same format as
+	 * the first output.
+	 */
 	if (blend->mrt0_is_dual_src)
 		col_format |= (col_format & 0xf) << 4;
+
+	blend->cb_shader_mask = ac_get_cb_shader_mask(col_format);
 	blend->spi_shader_col_format = col_format;
 }
 
@@ -1417,11 +1420,13 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 
 	const  VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
 			vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
-	if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+	if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
 		dynamic->discard_rectangle.count = discard_rectangle_info->discardRectangleCount;
-		typed_memcpy(dynamic->discard_rectangle.rectangles,
-		             discard_rectangle_info->pDiscardRectangles,
-		             discard_rectangle_info->discardRectangleCount);
+		if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+			typed_memcpy(dynamic->discard_rectangle.rectangles,
+			             discard_rectangle_info->pDiscardRectangles,
+			             discard_rectangle_info->discardRectangleCount);
+		}
 	}
 
 	pipeline->dynamic_state.mask = states;
@@ -2177,12 +2182,12 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
 	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
 		if (nir[i]) {
-			NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
 			NIR_PASS_V(nir[i], nir_lower_non_uniform_access,
 			                   nir_lower_non_uniform_ubo_access |
 			                   nir_lower_non_uniform_ssbo_access |
 			                   nir_lower_non_uniform_texture_access |
 			                   nir_lower_non_uniform_image_access);
+			NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
 		}
 
 		if (radv_can_dump_shader(device, modules[i], false))
@@ -2668,8 +2673,10 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs,
 		break;
 	case CHIP_RAVEN:
 	case CHIP_RAVEN2:
-		context_states_per_bin = 6;
-		persistent_states_per_bin = 32;
+		/* The context states are affected by the scissor bug. */
+		context_states_per_bin = pipeline->device->physical_device->has_scissor_bug ? 1 : 6;
+		/* 32 causes hangs for RAVEN. */
+		persistent_states_per_bin = 16;
 		fpovs_per_batch = 63;
 		break;
 	default:
@@ -2706,7 +2713,6 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
 	const VkPipelineDepthStencilStateCreateInfo *vkds = pCreateInfo->pDepthStencilState;
 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
-	struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
 	struct radv_render_pass_attachment *attachment = NULL;
 	uint32_t db_depth_control = 0, db_stencil_control = 0;
 	uint32_t db_render_control = 0, db_render_override2 = 0;
@@ -2755,8 +2761,7 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
 	db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
 			      S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
 
-	if (!pCreateInfo->pRasterizationState->depthClampEnable &&
-	    ps->info.info.ps.writes_z) {
+	if (!pCreateInfo->pRasterizationState->depthClampEnable) {
 		/* From VK_EXT_depth_range_unrestricted spec:
 		 *
 		 * "The behavior described in Primitive Clipping still applies.
@@ -2927,8 +2932,11 @@ radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs,
                                    struct radv_pipeline *pipeline)
 {
 	const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
-
 	uint32_t vgt_primitiveid_en = false;
+	const struct radv_shader_variant *vs =
+		pipeline->shaders[MESA_SHADER_TESS_EVAL] ?
+		pipeline->shaders[MESA_SHADER_TESS_EVAL] :
+		pipeline->shaders[MESA_SHADER_VERTEX];
 	uint32_t vgt_gs_mode = 0;
 
 	if (radv_pipeline_has_gs(pipeline)) {
@@ -2937,7 +2945,7 @@ radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs,
 
 		vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out,
 		                             pipeline->device->physical_device->rad_info.chip_class);
-	} else if (outinfo->export_prim_id) {
+	} else if (outinfo->export_prim_id || vs->info.info.uses_prim_id) {
 		vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
 		vgt_primitiveid_en = true;
 	}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index df85d0cf889..31c829d345b 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1456,6 +1456,7 @@ uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *de
 					  int first_non_void);
 uint32_t radv_translate_buffer_numformat(const struct vk_format_description *desc,
 					 int first_non_void);
+bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
 uint32_t radv_translate_colorformat(VkFormat format);
 uint32_t radv_translate_color_numformat(VkFormat format,
 					const struct vk_format_description *desc,
@@ -1993,6 +1994,7 @@ void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
 			     const struct radv_nir_compiler_options *options);
 
 unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
+					 gl_shader_stage stage,
 					 const struct nir_shader *nir);
 
 /* radv_shader_info.h */
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index 63a2ab773a8..08314e09a9f 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -40,18 +40,6 @@
 static const int pipelinestat_block_size = 11 * 8;
 static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0, 8, 9, 10};
 
-static unsigned get_max_db(struct radv_device *device)
-{
-	unsigned num_db = device->physical_device->rad_info.num_render_backends;
-	MAYBE_UNUSED unsigned rb_mask = device->physical_device->rad_info.enabled_rb_mask;
-
-	/* Otherwise we need to change the query reset procedure */
-	assert(rb_mask == ((1ull << num_db) - 1));
-
-	return num_db;
-}
-
-
 static nir_ssa_def *nir_test_flag(nir_builder *b, nir_ssa_def *flags, uint32_t flag)
 {
 	return nir_i2b(b, nir_iand(b, flags, nir_imm_int(b, flag)));
@@ -108,12 +96,14 @@ build_occlusion_query_shader(struct radv_device *device) {
 	 * 	uint64_t dst_offset = dst_stride * global_id.x;
 	 * 	bool available = true;
 	 * 	for (int i = 0; i < db_count; ++i) {
-	 * 		uint64_t start = src_buf[src_offset + 16 * i];
-	 * 		uint64_t end = src_buf[src_offset + 16 * i + 8];
-	 * 		if ((start & (1ull << 63)) && (end & (1ull << 63)))
-	 * 			result += end - start;
-	 * 		else
-	 * 			available = false;
+	 *		if (enabled_rb_mask & (1 << i)) {
+	 *			uint64_t start = src_buf[src_offset + 16 * i];
+	 *			uint64_t end = src_buf[src_offset + 16 * i + 8];
+	 *			if ((start & (1ull << 63)) && (end & (1ull << 63)))
+	 *				result += end - start;
+	 *			else
+	 *				available = false;
+	 *		}
 	 * 	}
 	 * 	uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
 	 * 	if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
@@ -139,7 +129,8 @@ build_occlusion_query_shader(struct radv_device *device) {
 	nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start");
 	nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end");
 	nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
-	unsigned db_count = get_max_db(device);
+	unsigned enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
+	unsigned db_count = device->physical_device->rad_info.num_render_backends;
 
 	nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags");
 
@@ -187,6 +178,16 @@ build_occlusion_query_shader(struct radv_device *device) {
 	nir_ssa_def *current_outer_count = nir_load_var(&b, outer_counter);
 	radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count));
 
+	nir_ssa_def *enabled_cond =
+		nir_iand(&b, nir_imm_int(&b, enabled_rb_mask),
+			     nir_ishl(&b, nir_imm_int(&b, 1), current_outer_count));
+
+	nir_if *enabled_if = nir_if_create(b.shader);
+	enabled_if->condition = nir_src_for_ssa(nir_i2b(&b, enabled_cond));
+	nir_cf_node_insert(b.cursor, &enabled_if->cf_node);
+
+	b.cursor = nir_after_cf_list(&enabled_if->then_list);
+
 	nir_ssa_def *load_offset = nir_imul(&b, current_outer_count, nir_imm_int(&b, 16));
 	load_offset = nir_iadd(&b, input_base, load_offset);
 
@@ -1044,7 +1045,7 @@ VkResult radv_CreateQueryPool(
 
 	switch(pCreateInfo->queryType) {
 	case VK_QUERY_TYPE_OCCLUSION:
-		pool->stride = 16 * get_max_db(device);
+		pool->stride = 16 * device->physical_device->rad_info.num_render_backends;
 		break;
 	case VK_QUERY_TYPE_PIPELINE_STATISTICS:
 		pool->stride = pipelinestat_block_size * 2;
@@ -1128,17 +1129,18 @@ VkResult radv_GetQueryPoolResults(
 			if (flags & VK_QUERY_RESULT_WAIT_BIT)
 				while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query))
 					;
-			available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
+			available = *(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
 		}
 
 		switch (pool->type) {
 		case VK_QUERY_TYPE_TIMESTAMP: {
-			available = *(uint64_t *)src != TIMESTAMP_NOT_READY;
+			volatile uint64_t const *src64 = (volatile uint64_t const *)src;
+			available = *src64 != TIMESTAMP_NOT_READY;
 
 			if (flags & VK_QUERY_RESULT_WAIT_BIT) {
-				while (*(volatile uint64_t *)src == TIMESTAMP_NOT_READY)
+				while (*src64 == TIMESTAMP_NOT_READY)
 					;
-				available = *(uint64_t *)src != TIMESTAMP_NOT_READY;
+				available = true;
 			}
 
 			if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
@@ -1146,23 +1148,28 @@ VkResult radv_GetQueryPoolResults(
 
 			if (flags & VK_QUERY_RESULT_64_BIT) {
 				if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-					*(uint64_t*)dest = *(uint64_t*)src;
+					*(uint64_t*)dest = *src64;
 				dest += 8;
 			} else {
 				if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
-					*(uint32_t*)dest = *(uint32_t*)src;
+					*(uint32_t*)dest = *(volatile uint32_t*)src;
 				dest += 4;
 			}
 			break;
 		}
 		case VK_QUERY_TYPE_OCCLUSION: {
 			volatile uint64_t const *src64 = (volatile uint64_t const *)src;
+			uint32_t db_count = device->physical_device->rad_info.num_render_backends;
+			uint32_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
 			uint64_t sample_count = 0;
-			int db_count = get_max_db(device);
 			available = 1;
 
 			for (int i = 0; i < db_count; ++i) {
 				uint64_t start, end;
+
+				if (!(enabled_rb_mask & (1 << i)))
+					continue;
+
 				do {
 					start = src64[2 * i];
 					end = src64[2 * i + 1];
@@ -1193,8 +1200,8 @@ VkResult radv_GetQueryPoolResults(
 			if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
 				result = VK_NOT_READY;
 
-			const uint64_t *start = (uint64_t*)src;
-			const uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size);
+			const volatile uint64_t *start = (uint64_t*)src;
+			const volatile uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size);
 			if (flags & VK_QUERY_RESULT_64_BIT) {
 				uint64_t *dst = (uint64_t*)dest;
 				dest += util_bitcount(pool->pipeline_stats_mask) * 8;
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 648fb6586f7..1f9fa487688 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -624,6 +624,8 @@ shader_variant_create(struct radv_device *device,
 		tm_options |= AC_TM_SISCHED;
 	if (options->check_ir)
 		tm_options |= AC_TM_CHECK_IR;
+	if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
+		tm_options |= AC_TM_NO_LOAD_STORE_OPT;
 
 	thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
 	radv_init_llvm_once();
@@ -763,7 +765,7 @@ generate_shader_stats(struct radv_device *device,
 				     lds_increment);
 	} else if (stage == MESA_SHADER_COMPUTE) {
 		unsigned max_workgroup_size =
-				radv_nir_get_max_workgroup_size(chip_class, variant->nir);
+			radv_nir_get_max_workgroup_size(chip_class, stage, variant->nir);
 		lds_per_wave = (conf->lds_size * lds_increment) /
 			       DIV_ROUND_UP(max_workgroup_size, 64);
 	}
diff --git a/src/broadcom/compiler/vir_opt_redundant_flags.c b/src/broadcom/compiler/vir_opt_redundant_flags.c
index 61ebf5dfa24..8749f3cd647 100644
--- a/src/broadcom/compiler/vir_opt_redundant_flags.c
+++ b/src/broadcom/compiler/vir_opt_redundant_flags.c
@@ -102,7 +102,7 @@ vir_opt_redundant_flags_block(struct v3d_compile *c, struct qblock *block)
         vir_for_each_inst(inst, block) {
                 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
                     inst->qpu.flags.auf != V3D_QPU_UF_NONE ||
-                    inst->qpu.flags.auf != V3D_QPU_UF_NONE) {
+                    inst->qpu.flags.muf != V3D_QPU_UF_NONE) {
                         last_flags = NULL;
                         continue;
                 }
diff --git a/src/compiler/Android.glsl.gen.mk b/src/compiler/Android.glsl.gen.mk
index 3b94ea7bd2f..1308de2db97 100644
--- a/src/compiler/Android.glsl.gen.mk
+++ b/src/compiler/Android.glsl.gen.mk
@@ -90,8 +90,6 @@ $(intermediates)/glsl/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glsl/glcpp/glcpp-lex.l
 $(intermediates)/glsl/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glsl/glcpp/glcpp-parse.y
 	$(call glsl_local-y-to-c-and-h)
 
-$(LOCAL_PATH)/glsl/ir.h: $(intermediates)/glsl/ir_expression_operation.h
-
 $(intermediates)/glsl/ir_expression_operation.h: $(LOCAL_PATH)/glsl/ir_expression_operation.py
 	@mkdir -p $(dir $@)
 	$(hide) $(MESA_PYTHON2) $< enum > $@
diff --git a/src/compiler/Android.glsl.mk b/src/compiler/Android.glsl.mk
index 0aabafa2673..37b3cb80251 100644
--- a/src/compiler/Android.glsl.mk
+++ b/src/compiler/Android.glsl.mk
@@ -48,7 +48,7 @@ LOCAL_STATIC_LIBRARIES := \
 	libmesa_nir
 
 LOCAL_MODULE := libmesa_glsl
-
+LOCAL_CFLAGS += -Wno-error
 include $(LOCAL_PATH)/Android.glsl.gen.mk
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
diff --git a/src/compiler/Android.nir.gen.mk b/src/compiler/Android.nir.gen.mk
index 894fb12c4be..26115f446a3 100644
--- a/src/compiler/Android.nir.gen.mk
+++ b/src/compiler/Android.nir.gen.mk
@@ -76,8 +76,6 @@ $(intermediates)/nir/nir_opcodes.h: $(nir_opcodes_h_deps)
 	@mkdir -p $(dir $@)
 	$(hide) $(MESA_PYTHON2) $(nir_opcodes_h_gen) $< > $@
 
-$(LOCAL_PATH)/nir/nir.h: $(intermediates)/nir/nir_opcodes.h
-
 nir_opcodes_c_gen := $(LOCAL_PATH)/nir/nir_opcodes_c.py
 nir_opcodes_c_deps := \
 	$(LOCAL_PATH)/nir/nir_opcodes.py \
diff --git a/src/compiler/Android.nir.mk b/src/compiler/Android.nir.mk
index 75a247a245d..59da5dbdc1c 100644
--- a/src/compiler/Android.nir.mk
+++ b/src/compiler/Android.nir.mk
@@ -41,6 +41,9 @@ LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/src/gallium/include \
 	$(MESA_TOP)/src/gallium/auxiliary
 
+LOCAL_CFLAGS := \
+        -Wno-missing-braces
+
 LOCAL_STATIC_LIBRARIES := libmesa_compiler
 
 LOCAL_MODULE := libmesa_nir
diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 9bebc3d8867..005256725ff 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -244,6 +244,7 @@ NIR_FILES = \
 	nir/nir_lower_constant_initializers.c \
 	nir/nir_lower_double_ops.c \
 	nir/nir_lower_drawpixels.c \
+	nir/nir_lower_fb_read.c \
 	nir/nir_lower_fragcoord_wtrans.c \
 	nir/nir_lower_frexp.c \
 	nir/nir_lower_global_vars_to_local.c \
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl
index 415dde3907c..c92577c4e4c 100644
--- a/src/compiler/glsl/float64.glsl
+++ b/src/compiler/glsl/float64.glsl
@@ -1681,17 +1681,22 @@ __fround64(uint64_t __a)
 
    if (unbiasedExp < 20) {
       if (unbiasedExp < 0) {
+         if ((aHi & 0x80000000u) != 0u && aLo == 0u) {
+            return 0;
+         }
          aHi &= 0x80000000u;
-         if (unbiasedExp == -1 && aLo != 0u)
-            aHi |= (1023u << 20);
+         if ((a.y & 0x000FFFFFu) == 0u && a.x == 0u) {
+            aLo = 0u;
+            return packUint2x32(uvec2(aLo, aHi));
+         }
+         aHi = mix(aHi, (aHi | 0x3FF00000u), unbiasedExp == -1);
          aLo = 0u;
       } else {
          uint maskExp = 0x000FFFFFu >> unbiasedExp;
-         /* a is an integral value */
-         if (((aHi & maskExp) == 0u) && (aLo == 0u))
-            return __a;
-
+         uint lastBit = maskExp + 1;
          aHi += 0x00080000u >> unbiasedExp;
+         if ((aHi & maskExp) == 0u)
+            aHi &= ~lastBit;
          aHi &= ~maskExp;
          aLo = 0u;
       }
@@ -1708,9 +1713,7 @@ __fround64(uint64_t __a)
       aLo &= ~maskExp;
    }
 
-   a.x = aLo;
-   a.y = aHi;
-   return packUint2x32(a);
+   return packUint2x32(uvec2(aLo, aHi));
 }
 
 uint64_t
diff --git a/src/compiler/glsl/gl_nir_lower_buffers.c b/src/compiler/glsl/gl_nir_lower_buffers.c
index b9195329f4a..595eb6d9bdf 100644
--- a/src/compiler/glsl/gl_nir_lower_buffers.c
+++ b/src/compiler/glsl/gl_nir_lower_buffers.c
@@ -48,7 +48,6 @@ get_block_array_index(nir_builder *b, nir_deref_instr *deref,
 
       if (nir_src_is_const(deref->arr.index)) {
          unsigned arr_index = nir_src_as_uint(deref->arr.index);
-         arr_index = MIN2(arr_index, arr_size - 1);
 
          /* We're walking the deref from the tail so prepend the array index */
          block_name = ralloc_asprintf(b->shader, "[%u]%s", arr_index,
diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y
index 1c095cb66f9..c951d9526ac 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -224,10 +224,12 @@ expanded_line:
 			glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro);
 		_glcpp_parser_skip_stack_change_if (parser, & @1, "elif", $2.value);
 	}
-|	LINE_EXPANDED integer_constant NEWLINE {
+|	LINE_EXPANDED expression NEWLINE {
+		if (parser->is_gles && $2.undefined_macro)
+			glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro);
 		parser->has_new_line_number = 1;
-		parser->new_line_number = $2;
-		_mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2);
+		parser->new_line_number = $2.value;
+		_mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2.value);
 	}
 |	LINE_EXPANDED integer_constant integer_constant NEWLINE {
 		parser->has_new_line_number = 1;
@@ -238,6 +240,17 @@ expanded_line:
 					   "#line %" PRIiMAX " %" PRIiMAX "\n",
 					    $2, $3);
 	}
+|	LINE_EXPANDED '(' expression ')' '(' expression ')' NEWLINE {
+		if (parser->is_gles && $3.undefined_macro)
+			glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $3.undefined_macro);
+		if (parser->is_gles && $6.undefined_macro)
+			glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $6.undefined_macro);
+		parser->has_new_line_number = 1;
+		parser->new_line_number = $3.value;
+		parser->has_new_source_number = 1;
+		parser->new_source_number = $6.value;
+		_mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX " %" PRIiMAX "\n", $3.value, $6.value);
+	}
 ;
 
 define:
diff --git a/src/compiler/glsl/link_uniform_block_active_visitor.cpp b/src/compiler/glsl/link_uniform_block_active_visitor.cpp
index 368981852c0..5bf0a8bc5a7 100644
--- a/src/compiler/glsl/link_uniform_block_active_visitor.cpp
+++ b/src/compiler/glsl/link_uniform_block_active_visitor.cpp
@@ -103,6 +103,8 @@ process_arrays(void *mem_ctx, ir_dereference_array *ir,
       if (*ub_array_ptr == NULL) {
          *ub_array_ptr = rzalloc(mem_ctx, struct uniform_block_array_elements);
          (*ub_array_ptr)->ir = ir;
+         (*ub_array_ptr)->total_num_array_elements =
+                      ir->array->type->arrays_of_arrays_size();
       }
 
       struct uniform_block_array_elements *ub_array = *ub_array_ptr;
@@ -199,6 +201,7 @@ link_uniform_block_active_visitor::visit(ir_variable *var)
                                              (*ub_array)->array_elements,
                                              unsigned,
                                              (*ub_array)->num_array_elements);
+      (*ub_array)->total_num_array_elements = type->arrays_of_arrays_size();
 
       for (unsigned i = 0; i < (*ub_array)->num_array_elements; i++) {
          (*ub_array)->array_elements[i] = i;
diff --git a/src/compiler/glsl/link_uniform_block_active_visitor.h b/src/compiler/glsl/link_uniform_block_active_visitor.h
index fbac65d5b67..462a2efdb9b 100644
--- a/src/compiler/glsl/link_uniform_block_active_visitor.h
+++ b/src/compiler/glsl/link_uniform_block_active_visitor.h
@@ -30,6 +30,15 @@
 struct uniform_block_array_elements {
    unsigned *array_elements;
    unsigned num_array_elements;
+   /**
+    * Size of the array before array-trimming optimizations.
+    *
+    * Locations are only assigned to active array elements, but the location
+    * values are calculated as if all elements are active. The total number
+    * of elements in an array including the elements in arrays of arrays before
+    * inactive elements are removed is needed to be perform that calculation.
+    */
+   unsigned total_num_array_elements;
 
    ir_dereference_array *ir;
 
diff --git a/src/compiler/glsl/link_uniform_blocks.cpp b/src/compiler/glsl/link_uniform_blocks.cpp
index 45f1c0fe98d..85a246cb7c4 100644
--- a/src/compiler/glsl/link_uniform_blocks.cpp
+++ b/src/compiler/glsl/link_uniform_blocks.cpp
@@ -222,7 +222,7 @@ static void process_block_array_leaf(const char *name, gl_uniform_block *blocks,
                                      gl_uniform_buffer_variable *variables,
                                      const struct link_uniform_block_active *const b,
                                      unsigned *block_index,
-                                     unsigned *binding_offset,
+                                     unsigned binding_offset,
                                      unsigned linearized_index,
                                      struct gl_context *ctx,
                                      struct gl_shader_program *prog);
@@ -237,25 +237,28 @@ process_block_array(struct uniform_block_array_elements *ub_array, char **name,
                     size_t name_length, gl_uniform_block *blocks,
                     ubo_visitor *parcel, gl_uniform_buffer_variable *variables,
                     const struct link_uniform_block_active *const b,
-                    unsigned *block_index, unsigned *binding_offset,
+                    unsigned *block_index, unsigned binding_offset,
                     struct gl_context *ctx, struct gl_shader_program *prog,
                     unsigned first_index)
 {
    for (unsigned j = 0; j < ub_array->num_array_elements; j++) {
       size_t new_length = name_length;
 
+      unsigned int element_idx = ub_array->array_elements[j];
       /* Append the subscript to the current variable name */
-      ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]",
-                                   ub_array->array_elements[j]);
+      ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", element_idx);
 
       if (ub_array->array) {
+         unsigned boffset = binding_offset + (element_idx *
+                            ub_array->array->total_num_array_elements);
          process_block_array(ub_array->array, name, new_length, blocks,
                              parcel, variables, b, block_index,
-                             binding_offset, ctx, prog, first_index);
+                             boffset, ctx, prog, first_index);
       } else {
+         unsigned boffset = binding_offset + element_idx;
          process_block_array_leaf(*name, blocks,
                                   parcel, variables, b, block_index,
-                                  binding_offset, *block_index - first_index,
+                                  boffset, *block_index - first_index,
                                   ctx, prog);
       }
    }
@@ -266,7 +269,7 @@ process_block_array_leaf(const char *name,
                          gl_uniform_block *blocks,
                          ubo_visitor *parcel, gl_uniform_buffer_variable *variables,
                          const struct link_uniform_block_active *const b,
-                         unsigned *block_index, unsigned *binding_offset,
+                         unsigned *block_index, unsigned binding_offset,
                          unsigned linearized_index,
                          struct gl_context *ctx, struct gl_shader_program *prog)
 {
@@ -283,7 +286,7 @@ process_block_array_leaf(const char *name,
     *    block binding and each subsequent element takes the next consecutive
     *    uniform block binding point.
     */
-   blocks[i].Binding = (b->has_binding) ? b->binding + *binding_offset : 0;
+   blocks[i].Binding = (b->has_binding) ? b->binding + binding_offset : 0;
 
    blocks[i].UniformBufferSize = 0;
    blocks[i]._Packing = glsl_interface_packing(type->interface_packing);
@@ -307,7 +310,6 @@ process_block_array_leaf(const char *name,
       (unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms);
 
    *block_index = *block_index + 1;
-   *binding_offset = *binding_offset + 1;
 }
 
 /* This function resizes the array types of the block so that later we can use
@@ -370,7 +372,6 @@ create_buffer_blocks(void *mem_ctx, struct gl_context *ctx,
       if ((create_ubo_blocks && !b->is_shader_storage) ||
           (!create_ubo_blocks && b->is_shader_storage)) {
 
-         unsigned binding_offset = 0;
          if (b->array != NULL) {
             char *name = ralloc_strdup(NULL,
                                        block_type->without_array()->name);
@@ -378,12 +379,12 @@ create_buffer_blocks(void *mem_ctx, struct gl_context *ctx,
 
             assert(b->has_instance_name);
             process_block_array(b->array, &name, name_length, blocks, &parcel,
-                                variables, b, &i, &binding_offset, ctx, prog,
+                                variables, b, &i, 0, ctx, prog,
                                 i);
             ralloc_free(name);
          } else {
             process_block_array_leaf(block_type->name, blocks, &parcel,
-                                     variables, b, &i, &binding_offset,
+                                     variables, b, &i, 0,
                                      0, ctx, prog);
          }
       }
@@ -440,6 +441,7 @@ link_uniform_blocks(void *mem_ctx,
            GLSL_INTERFACE_PACKING_PACKED)) {
          b->type = resize_block_array(b->type, b->array);
          b->var->type = b->type;
+         b->var->data.max_array_access = b->type->length - 1;
       }
 
       block_size.num_active_uniforms = 0;
diff --git a/src/compiler/glsl/loop_unroll.cpp b/src/compiler/glsl/loop_unroll.cpp
index 874f4185681..7e97c3cddf1 100644
--- a/src/compiler/glsl/loop_unroll.cpp
+++ b/src/compiler/glsl/loop_unroll.cpp
@@ -180,6 +180,11 @@ loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations)
    void *const mem_ctx = ralloc_parent(ir);
    loop_variable_state *const ls = this->state->get(ir);
 
+   /* If there are no terminators, then the loop iteration count must be 1.
+    * This is the 'do { } while (false);' case.
+    */
+   assert(!ls->terminators.is_empty() || iterations == 1);
+
    ir_instruction *first_ir =
       (ir_instruction *) ir->body_instructions.get_head();
 
@@ -221,7 +226,8 @@ loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations)
     * the loop, or it the exit branch contains instructions. This ensures we
     * execute any instructions before the terminator or in its exit branch.
     */
-   if (limit_if != first_ir->as_if() || exit_branch_has_instructions)
+   if (!ls->terminators.is_empty() &&
+       (limit_if != first_ir->as_if() || exit_branch_has_instructions))
       iterations++;
 
    for (int i = 0; i < iterations; i++) {
diff --git a/src/compiler/glsl/opt_algebraic.cpp b/src/compiler/glsl/opt_algebraic.cpp
index ff4be269578..3147d25aea8 100644
--- a/src/compiler/glsl/opt_algebraic.cpp
+++ b/src/compiler/glsl/opt_algebraic.cpp
@@ -507,6 +507,18 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       if (is_vec_zero(op_const[1]))
 	 return ir->operands[0];
 
+      /* Replace (x + (-x)) with constant 0 */
+      for (int i = 0; i < 2; i++) {
+         if (op_expr[i]) {
+            if (op_expr[i]->operation == ir_unop_neg) {
+               ir_rvalue *other = ir->operands[(i + 1) % 2];
+               if (other && op_expr[i]->operands[0]->equals(other)) {
+                  return ir_constant::zero(ir, ir->type);
+               }
+            }
+         }
+      }
+
       /* Reassociate addition of constants so that we can do constant
        * folding.
        */
diff --git a/src/compiler/glsl/shader_cache.cpp b/src/compiler/glsl/shader_cache.cpp
index 581098b88f0..97049043345 100644
--- a/src/compiler/glsl/shader_cache.cpp
+++ b/src/compiler/glsl/shader_cache.cpp
@@ -165,9 +165,8 @@ shader_cache_read_program_metadata(struct gl_context *ctx,
    prog->FragDataIndexBindings->iterate(create_binding_str, &buf);
    ralloc_asprintf_append(&buf, "tf: %d ", prog->TransformFeedback.BufferMode);
    for (unsigned int i = 0; i < prog->TransformFeedback.NumVarying; i++) {
-      ralloc_asprintf_append(&buf, "%s:%d ",
-                             prog->TransformFeedback.VaryingNames[i],
-                             prog->TransformFeedback.BufferStride[i]);
+      ralloc_asprintf_append(&buf, "%s ",
+                             prog->TransformFeedback.VaryingNames[i]);
    }
 
    /* SSO has an effect on the linked program so include this when generating
diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp
index 9938b3df450..8e5087e2e1a 100644
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -50,7 +50,7 @@ glsl_type::glsl_type(GLenum gl_type,
    gl_type(gl_type),
    base_type(base_type), sampled_type(GLSL_TYPE_VOID),
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   interface_packing(0), interface_row_major(row_major),
+   interface_packing(0), interface_row_major(row_major), packed(0),
    vector_elements(vector_elements), matrix_columns(matrix_columns),
    length(0), explicit_stride(explicit_stride)
 {
@@ -85,7 +85,7 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
    base_type(base_type), sampled_type(type),
    sampler_dimensionality(dim), sampler_shadow(shadow),
    sampler_array(array), interface_packing(0),
-   interface_row_major(0),
+   interface_row_major(0), packed(0),
    length(0), explicit_stride(0)
 {
    this->mem_ctx = ralloc_context(NULL);
@@ -134,7 +134,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
    base_type(GLSL_TYPE_INTERFACE), sampled_type(GLSL_TYPE_VOID),
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
    interface_packing((unsigned) packing),
-   interface_row_major((unsigned) row_major),
+   interface_row_major((unsigned) row_major), packed(0),
    vector_elements(0), matrix_columns(0),
    length(num_fields), explicit_stride(0)
 {
@@ -159,7 +159,7 @@ glsl_type::glsl_type(const glsl_type *return_type,
    gl_type(0),
    base_type(GLSL_TYPE_FUNCTION), sampled_type(GLSL_TYPE_VOID),
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0), packed(0),
    vector_elements(0), matrix_columns(0),
    length(num_params), explicit_stride(0)
 {
@@ -188,7 +188,7 @@ glsl_type::glsl_type(const char *subroutine_name) :
    gl_type(0),
    base_type(GLSL_TYPE_SUBROUTINE), sampled_type(GLSL_TYPE_VOID),
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0), packed(0),
    vector_elements(1), matrix_columns(1),
    length(0), explicit_stride(0)
 {
@@ -534,7 +534,7 @@ glsl_type::glsl_type(const glsl_type *array, unsigned length,
                      unsigned explicit_stride) :
    base_type(GLSL_TYPE_ARRAY), sampled_type(GLSL_TYPE_VOID),
    sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
-   interface_packing(0), interface_row_major(0),
+   interface_packing(0), interface_row_major(0), packed(0),
    vector_elements(0), matrix_columns(0),
    length(length), name(NULL), explicit_stride(explicit_stride)
 {
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index a8faeb9c018..18aa44ab9c2 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -299,4 +299,16 @@ if with_tests
       link_with : libmesa_util,
     )
   )
+
+  test(
+    'comparison_pre',
+    executable(
+      'comparison_pre',
+      files('tests/comparison_pre_tests.cpp'),
+      c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+      include_directories : [inc_common],
+      dependencies : [dep_thread, idep_gtest, idep_nir],
+      link_with : libmesa_util,
+    )
+  )
 endif
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 5b75585498e..87a66d35b62 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -1204,6 +1204,41 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
    return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
 }
 
+nir_const_value
+nir_const_value_for_float(double f, unsigned bit_size)
+{
+   nir_const_value v;
+   memset(&v, 0, sizeof(v));
+
+   switch (bit_size) {
+   case 16:
+      v.u16 = _mesa_float_to_half(f);
+      break;
+   case 32:
+      v.f32 = f;
+      break;
+   case 64:
+      v.f64 = f;
+      break;
+   default:
+      unreachable("Invalid bit size");
+   }
+
+   return v;
+}
+
+double
+nir_const_value_as_float(nir_const_value value, unsigned bit_size)
+{
+   switch (bit_size) {
+   case 16: return _mesa_half_to_float(value.u16);
+   case 32: return value.f32;
+   case 64: return value.f64;
+   default:
+      unreachable("Invalid bit size");
+   }
+}
+
 int64_t
 nir_src_comp_as_int(nir_src src, unsigned comp)
 {
@@ -1997,6 +2032,8 @@ void
 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src,
                             bool bindless)
 {
+   enum gl_access_qualifier access = nir_intrinsic_access(intrin);
+
    switch (intrin->intrinsic) {
 #define CASE(op) \
    case nir_intrinsic_image_deref_##op: \
@@ -2028,7 +2065,7 @@ nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src,
 
    nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
    nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
-   nir_intrinsic_set_access(intrin, var->data.image.access);
+   nir_intrinsic_set_access(intrin, access | var->data.image.access);
    nir_intrinsic_set_format(intrin, var->data.image.format);
 
    nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 37161e83e4d..2c5abe47220 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -140,6 +140,106 @@ typedef union {
       arr[i] = c[i].m; \
 } while (false)
 
+static inline nir_const_value
+nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size)
+{
+   nir_const_value v;
+   memset(&v, 0, sizeof(v));
+
+   switch (bit_size) {
+   case 1:  v.b   = x;  break;
+   case 8:  v.u8  = x;  break;
+   case 16: v.u16 = x;  break;
+   case 32: v.u32 = x;  break;
+   case 64: v.u64 = x;  break;
+   default:
+      unreachable("Invalid bit size");
+   }
+
+   return v;
+}
+
+static inline nir_const_value
+nir_const_value_for_int(int64_t i, unsigned bit_size)
+{
+   nir_const_value v;
+   memset(&v, 0, sizeof(v));
+
+   assert(bit_size <= 64);
+   if (bit_size < 64) {
+      assert(i >= (-(1ll << (bit_size - 1))));
+      assert(i < (1ll << (bit_size - 1)));
+   }
+
+   return nir_const_value_for_raw_uint(i, bit_size);
+}
+
+static inline nir_const_value
+nir_const_value_for_uint(uint64_t u, unsigned bit_size)
+{
+   nir_const_value v;
+   memset(&v, 0, sizeof(v));
+
+   assert(bit_size <= 64);
+   if (bit_size < 64)
+      assert(u < (1ull << bit_size));
+
+   return nir_const_value_for_raw_uint(u, bit_size);
+}
+
+static inline nir_const_value
+nir_const_value_for_bool(bool b, unsigned bit_size)
+{
+   /* Booleans use a 0/-1 convention */
+   return nir_const_value_for_int(-(int)b, bit_size);
+}
+
+/* This one isn't inline because it requires half-float conversion */
+nir_const_value nir_const_value_for_float(double b, unsigned bit_size);
+
+static inline int64_t
+nir_const_value_as_int(nir_const_value value, unsigned bit_size)
+{
+   switch (bit_size) {
+   /* int1_t uses 0/-1 convention */
+   case 1:  return -(int)value.b;
+   case 8:  return value.i8;
+   case 16: return value.i16;
+   case 32: return value.i32;
+   case 64: return value.i64;
+   default:
+      unreachable("Invalid bit size");
+   }
+}
+
+static inline int64_t
+nir_const_value_as_uint(nir_const_value value, unsigned bit_size)
+{
+   switch (bit_size) {
+   case 1:  return value.b;
+   case 8:  return value.u8;
+   case 16: return value.u16;
+   case 32: return value.u32;
+   case 64: return value.u64;
+   default:
+      unreachable("Invalid bit size");
+   }
+}
+
+static inline bool
+nir_const_value_as_bool(nir_const_value value, unsigned bit_size)
+{
+   int64_t i = nir_const_value_as_int(value, bit_size);
+
+   /* Booleans of any size use 0/-1 convention */
+   assert(i == 0 || i == -1);
+
+   return i;
+}
+
+/* This one isn't inline because it requires half-float conversion */
+double nir_const_value_as_float(nir_const_value value, unsigned bit_size);
+
 typedef struct nir_constant {
    /**
     * Value of the constant.
@@ -1281,6 +1381,10 @@ typedef enum {
     */
    NIR_INTRINSIC_DESC_TYPE = 19,
 
+   /* Separate source/dest access flags for copies */
+   NIR_INTRINSIC_SRC_ACCESS,
+   NIR_INTRINSIC_DST_ACCESS,
+
    NIR_INTRINSIC_NUM_INDEX_FLAGS,
 
 } nir_intrinsic_index_flag;
@@ -1381,6 +1485,8 @@ INTRINSIC_IDX_ACCESSORS(param_idx, PARAM_IDX, unsigned)
 INTRINSIC_IDX_ACCESSORS(image_dim, IMAGE_DIM, enum glsl_sampler_dim)
 INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool)
 INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier)
+INTRINSIC_IDX_ACCESSORS(src_access, SRC_ACCESS, enum gl_access_qualifier)
+INTRINSIC_IDX_ACCESSORS(dst_access, DST_ACCESS, enum gl_access_qualifier)
 INTRINSIC_IDX_ACCESSORS(format, FORMAT, unsigned)
 INTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned)
 INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned)
@@ -1416,6 +1522,16 @@ nir_intrinsic_align(const nir_intrinsic_instr *intrin)
 void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr,
                                  nir_ssa_def *handle, bool bindless);
 
+/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */
+static inline bool
+nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
+{
+   const nir_intrinsic_info *info =
+      &nir_intrinsic_infos[instr->intrinsic];
+   return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+          (info->flags & NIR_INTRINSIC_CAN_REORDER);
+}
+
 /**
  * \group texture information
  *
@@ -1815,6 +1931,85 @@ NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
                 nir_parallel_copy_instr, instr,
                 type, nir_instr_type_parallel_copy)
 
+typedef struct {
+   nir_ssa_def *def;
+   unsigned comp;
+} nir_ssa_scalar;
+
+static inline bool
+nir_ssa_scalar_is_const(nir_ssa_scalar s)
+{
+   return s.def->parent_instr->type == nir_instr_type_load_const;
+}
+
+static inline nir_const_value
+nir_ssa_scalar_as_const_value(nir_ssa_scalar s)
+{
+   assert(s.comp < s.def->num_components);
+   nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr);
+   return load->value[s.comp];
+}
+
+#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix)                     \
+static inline type                                                   \
+nir_ssa_scalar_as_##suffix(nir_ssa_scalar s)                         \
+{                                                                    \
+   return nir_const_value_as_##suffix(                               \
+      nir_ssa_scalar_as_const_value(s), s.def->bit_size);            \
+}
+
+NIR_DEFINE_SCALAR_AS_CONST(int64_t,    int)
+NIR_DEFINE_SCALAR_AS_CONST(uint64_t,   uint)
+NIR_DEFINE_SCALAR_AS_CONST(bool,       bool)
+NIR_DEFINE_SCALAR_AS_CONST(double,     float)
+
+#undef NIR_DEFINE_SCALAR_AS_CONST
+
+static inline bool
+nir_ssa_scalar_is_alu(nir_ssa_scalar s)
+{
+   return s.def->parent_instr->type == nir_instr_type_alu;
+}
+
+static inline nir_op
+nir_ssa_scalar_alu_op(nir_ssa_scalar s)
+{
+   return nir_instr_as_alu(s.def->parent_instr)->op;
+}
+
+static inline nir_ssa_scalar
+nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx)
+{
+   nir_ssa_scalar out = { NULL, 0 };
+
+   nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
+   assert(alu_src_idx < nir_op_infos[alu->op].num_inputs);
+
+   /* Our component must be written */
+   assert(s.comp < s.def->num_components);
+   assert(alu->dest.write_mask & (1u << s.comp));
+
+   assert(alu->src[alu_src_idx].src.is_ssa);
+   out.def = alu->src[alu_src_idx].src.ssa;
+
+   if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) {
+      /* The ALU src is unsized so the source component follows the
+       * destination component.
+       */
+      out.comp = alu->src[alu_src_idx].swizzle[s.comp];
+   } else {
+      /* This is a sized source so all source components work together to
+       * produce all the destination components.  Since we need to return a
+       * scalar, this only works if the source is a scalar.
+       */
+      assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1);
+      out.comp = alu->src[alu_src_idx].swizzle[0];
+   }
+   assert(out.comp < out.def->num_components);
+
+   return out;
+}
+
 /*
  * Control flow
  *
@@ -2196,6 +2391,7 @@ typedef enum {
    nir_lower_minmax64 = (1 << 10),
    nir_lower_shift64 = (1 << 11),
    nir_lower_imul_2x32_64 = (1 << 12),
+   nir_lower_extract64 = (1 << 13),
 } nir_lower_int64_options;
 
 typedef enum {
@@ -2785,6 +2981,7 @@ NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref)
 
 bool nir_src_is_dynamically_uniform(nir_src src);
 bool nir_srcs_equal(nir_src src1, nir_src src2);
+bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2);
 void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
 void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
 void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
@@ -3487,6 +3684,9 @@ bool nir_lower_phis_to_regs_block(nir_block *block);
 bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
 bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl);
 
+/* This is here for unit tests. */
+bool nir_opt_comparison_pre_impl(nir_function_impl *impl);
+
 bool nir_opt_comparison_pre(nir_shader *shader);
 
 bool nir_opt_algebraic(nir_shader *shader);
@@ -3535,6 +3735,7 @@ bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
                              bool indirect_load_ok, bool expensive_alu_ok);
 
 bool nir_opt_remove_phis(nir_shader *shader);
+bool nir_opt_remove_phis_block(nir_block *block);
 
 bool nir_opt_shrink_load(nir_shader *shader);
 
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index ced009a66c7..f56e8beff28 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -1124,15 +1124,28 @@ nir_store_deref(nir_builder *build, nir_deref_instr *deref,
 }
 
 static inline void
-nir_copy_deref(nir_builder *build, nir_deref_instr *dest, nir_deref_instr *src)
+nir_copy_deref_with_access(nir_builder *build, nir_deref_instr *dest,
+                           nir_deref_instr *src,
+                           enum gl_access_qualifier dest_access,
+                           enum gl_access_qualifier src_access)
 {
    nir_intrinsic_instr *copy =
       nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_deref);
    copy->src[0] = nir_src_for_ssa(&dest->dest.ssa);
    copy->src[1] = nir_src_for_ssa(&src->dest.ssa);
+   nir_intrinsic_set_dst_access(copy, dest_access);
+   nir_intrinsic_set_src_access(copy, src_access);
    nir_builder_instr_insert(build, &copy->instr);
 }
 
+static inline void
+nir_copy_deref(nir_builder *build, nir_deref_instr *dest, nir_deref_instr *src)
+{
+   nir_copy_deref_with_access(build, dest, src,
+                              (enum gl_access_qualifier) 0,
+                              (enum gl_access_qualifier) 0);
+}
+
 static inline nir_ssa_def *
 nir_load_var(nir_builder *build, nir_variable *var)
 {
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index eec10a1b847..f4000321575 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -151,9 +151,11 @@ nir_variable_clone(const nir_variable *var, nir_shader *shader)
    nvar->name = ralloc_strdup(nvar, var->name);
    nvar->data = var->data;
    nvar->num_state_slots = var->num_state_slots;
-   nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
-   memcpy(nvar->state_slots, var->state_slots,
-          var->num_state_slots * sizeof(nir_state_slot));
+   if (var->num_state_slots) {
+      nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
+      memcpy(nvar->state_slots, var->state_slots,
+             var->num_state_slots * sizeof(nir_state_slot));
+   }
    if (var->constant_initializer) {
       nvar->constant_initializer =
          nir_constant_clone(var->constant_initializer, nvar);
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py
index f26fd0a3ea2..ed420c300f2 100644
--- a/src/compiler/nir/nir_constant_expressions.py
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -414,7 +414,8 @@ def get_const_field(type_):
    switch (op) {
 % for name in sorted(opcodes.keys()):
    case nir_op_${name}:
-      return evaluate_${name}(dest, num_components, bit_width, src);
+      evaluate_${name}(dest, num_components, bit_width, src);
+      return;
 % endfor
    default:
       unreachable("shouldn't get here");
diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c
index f1e6eee7745..835c39cff93 100644
--- a/src/compiler/nir/nir_deref.c
+++ b/src/compiler/nir/nir_deref.c
@@ -124,17 +124,15 @@ nir_deref_instr_has_indirect(nir_deref_instr *instr)
 unsigned
 nir_deref_instr_ptr_as_array_stride(nir_deref_instr *deref)
 {
-   assert(deref->deref_type == nir_deref_type_ptr_as_array);
-   nir_deref_instr *parent = nir_deref_instr_parent(deref);
-   switch (parent->deref_type) {
+   switch (deref->deref_type) {
    case nir_deref_type_array:
-      return glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
+      return glsl_get_explicit_stride(nir_deref_instr_parent(deref)->type);
    case nir_deref_type_ptr_as_array:
-      return nir_deref_instr_ptr_as_array_stride(parent);
+      return nir_deref_instr_ptr_as_array_stride(nir_deref_instr_parent(deref));
    case nir_deref_type_cast:
-      return parent->cast.ptr_stride;
+      return deref->cast.ptr_stride;
    default:
-      unreachable("Invalid parent for ptr_as_array deref");
+      return 0;
    }
 }
 
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index bd62bc974ed..e2a0b32cab0 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -25,6 +25,64 @@
 #include "nir_vla.h"
 #include "util/half_float.h"
 
+static bool
+src_is_ssa(nir_src *src, void *data)
+{
+   (void) data;
+   return src->is_ssa;
+}
+
+static bool
+dest_is_ssa(nir_dest *dest, void *data)
+{
+   (void) data;
+   return dest->is_ssa;
+}
+
+static inline bool
+instr_each_src_and_dest_is_ssa(const nir_instr *instr)
+{
+   if (!nir_foreach_dest((nir_instr *)instr, dest_is_ssa, NULL) ||
+       !nir_foreach_src((nir_instr *)instr, src_is_ssa, NULL))
+      return false;
+
+   return true;
+}
+
+/* This function determines if uses of an instruction can safely be rewritten
+ * to use another identical instruction instead. Note that this function must
+ * be kept in sync with hash_instr() and nir_instrs_equal() -- only
+ * instructions that pass this test will be handed on to those functions, and
+ * conversely they must handle everything that this function returns true for.
+ */
+static bool
+instr_can_rewrite(const nir_instr *instr)
+{
+   /* We only handle SSA. */
+   assert(instr_each_src_and_dest_is_ssa(instr));
+
+   switch (instr->type) {
+   case nir_instr_type_alu:
+   case nir_instr_type_deref:
+   case nir_instr_type_tex:
+   case nir_instr_type_load_const:
+   case nir_instr_type_phi:
+      return true;
+   case nir_instr_type_intrinsic:
+      return nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr));
+   case nir_instr_type_call:
+   case nir_instr_type_jump:
+   case nir_instr_type_ssa_undef:
+      return false;
+   case nir_instr_type_parallel_copy:
+   default:
+      unreachable("Invalid instruction type");
+   }
+
+   return false;
+}
+
+
 #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
 
 static uint32_t
@@ -430,12 +488,16 @@ nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
       if (const2 == NULL)
          return false;
 
+      if (nir_src_bit_size(alu1->src[src1].src) !=
+          nir_src_bit_size(alu2->src[src2].src))
+         return false;
+
       /* FINISHME: Apply the swizzle? */
       return nir_const_value_negative_equal(const1,
                                             const2,
                                             nir_ssa_alu_instr_src_components(alu1, src1),
                                             nir_op_infos[alu1->op].input_types[src1],
-                                            alu1->dest.dest.ssa.bit_size);
+                                            nir_src_bit_size(alu1->src[src1].src));
    }
 
    uint8_t alu1_swizzle[4] = {0};
@@ -503,9 +565,11 @@ nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
  * the same hash for (ignoring collisions, of course).
  */
 
-static bool
+bool
 nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
 {
+   assert(instr_can_rewrite(instr1) && instr_can_rewrite(instr2));
+
    if (instr1->type != instr2->type)
       return false;
 
@@ -701,68 +765,6 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
    unreachable("All cases in the above switch should return");
 }
 
-static bool
-src_is_ssa(nir_src *src, void *data)
-{
-   (void) data;
-   return src->is_ssa;
-}
-
-static bool
-dest_is_ssa(nir_dest *dest, void *data)
-{
-   (void) data;
-   return dest->is_ssa;
-}
-
-static inline bool
-instr_each_src_and_dest_is_ssa(nir_instr *instr)
-{
-   if (!nir_foreach_dest(instr, dest_is_ssa, NULL) ||
-       !nir_foreach_src(instr, src_is_ssa, NULL))
-      return false;
-
-   return true;
-}
-
-/* This function determines if uses of an instruction can safely be rewritten
- * to use another identical instruction instead. Note that this function must
- * be kept in sync with hash_instr() and nir_instrs_equal() -- only
- * instructions that pass this test will be handed on to those functions, and
- * conversely they must handle everything that this function returns true for.
- */
-
-static bool
-instr_can_rewrite(nir_instr *instr)
-{
-   /* We only handle SSA. */
-   assert(instr_each_src_and_dest_is_ssa(instr));
-
-   switch (instr->type) {
-   case nir_instr_type_alu:
-   case nir_instr_type_deref:
-   case nir_instr_type_tex:
-   case nir_instr_type_load_const:
-   case nir_instr_type_phi:
-      return true;
-   case nir_instr_type_intrinsic: {
-      const nir_intrinsic_info *info =
-         &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
-      return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
-             (info->flags & NIR_INTRINSIC_CAN_REORDER);
-   }
-   case nir_instr_type_call:
-   case nir_instr_type_jump:
-   case nir_instr_type_ssa_undef:
-      return false;
-   case nir_instr_type_parallel_copy:
-   default:
-      unreachable("Invalid instruction type");
-   }
-
-   return false;
-}
-
 static nir_ssa_def *
 nir_instr_get_dest_ssa_def(nir_instr *instr)
 {
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index 3a0470c2ca1..a0c115ff84d 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -111,6 +111,8 @@ def __init__(self, name, src_components, dest_components,
 IMAGE_ARRAY = "NIR_INTRINSIC_IMAGE_ARRAY"
 # Access qualifiers for image and memory access intrinsics
 ACCESS = "NIR_INTRINSIC_ACCESS"
+DST_ACCESS = "NIR_INTRINSIC_DST_ACCESS"
+SRC_ACCESS = "NIR_INTRINSIC_SRC_ACCESS"
 # Image format for image intrinsics
 FORMAT = "NIR_INTRINSIC_FORMAT"
 # Offset or address alignment
@@ -152,7 +154,7 @@ def intrinsic(name, src_comp=[], dest_comp=-1, indices=[],
 intrinsic("load_deref", dest_comp=0, src_comp=[-1],
           indices=[ACCESS], flags=[CAN_ELIMINATE])
 intrinsic("store_deref", src_comp=[-1, 0], indices=[WRMASK, ACCESS])
-intrinsic("copy_deref", src_comp=[-1, -1])
+intrinsic("copy_deref", src_comp=[-1, -1], indices=[DST_ACCESS, SRC_ACCESS])
 
 # Interpolation of input.  The interp_deref_at* intrinsics are similar to the
 # load_var intrinsic acting on a shader input except that they interpolate the
@@ -333,7 +335,8 @@ def atomic3(name):
 # either one or two additional scalar arguments with the same meaning as in
 # the ARB_shader_image_load_store specification.
 def image(name, src_comp=[], **kwargs):
-    intrinsic("image_deref_" + name, src_comp=[1] + src_comp, **kwargs)
+    intrinsic("image_deref_" + name, src_comp=[1] + src_comp,
+              indices=[ACCESS], **kwargs)
     intrinsic("image_" + name, src_comp=[1] + src_comp,
               indices=[IMAGE_DIM, IMAGE_ARRAY, FORMAT, ACCESS], **kwargs)
     intrinsic("bindless_image_" + name, src_comp=[1] + src_comp,
diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index 0ae9533e007..d484c1439e3 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -32,7 +32,10 @@ typedef enum {
    basic_induction
 } nir_loop_variable_type;
 
-struct nir_basic_induction_var;
+typedef struct nir_basic_induction_var {
+   nir_alu_instr *alu;                      /* The def of the alu-operation */
+   nir_ssa_def *def_outside_loop;           /* The phi-src outside the loop */
+} nir_basic_induction_var;
 
 typedef struct {
    /* A link for the work list */
@@ -57,13 +60,6 @@ typedef struct {
 
 } nir_loop_variable;
 
-typedef struct nir_basic_induction_var {
-   nir_op alu_op;                           /* The type of alu-operation    */
-   nir_loop_variable *alu_def;              /* The def of the alu-operation */
-   nir_loop_variable *invariant;            /* The invariant alu-operand    */
-   nir_loop_variable *def_outside_loop;     /* The phi-src outside the loop */
-} nir_basic_induction_var;
-
 typedef struct {
    /* The loop we store information for */
    nir_loop *loop;
@@ -274,6 +270,44 @@ compute_invariance_information(loop_info_state *state)
    }
 }
 
+/* If all of the instruction sources point to identical ALU instructions (as
+ * per nir_instrs_equal), return one of the ALU instructions.  Otherwise,
+ * return NULL.
+ */
+static nir_alu_instr *
+phi_instr_as_alu(nir_phi_instr *phi)
+{
+   nir_alu_instr *first = NULL;
+   nir_foreach_phi_src(src, phi) {
+      assert(src->src.is_ssa);
+      if (src->src.ssa->parent_instr->type != nir_instr_type_alu)
+         return NULL;
+
+      nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr);
+      if (first == NULL) {
+         first = alu;
+      } else {
+         if (!nir_instrs_equal(&first->instr, &alu->instr))
+            return NULL;
+      }
+   }
+
+   return first;
+}
+
+static bool
+alu_src_has_identity_swizzle(nir_alu_instr *alu, unsigned src_idx)
+{
+   assert(nir_op_infos[alu->op].input_sizes[src_idx] == 0);
+   assert(alu->dest.dest.is_ssa);
+   for (unsigned i = 0; i < alu->dest.dest.ssa.num_components; i++) {
+      if (alu->src[src_idx].swizzle[i] != i)
+         return false;
+   }
+
+   return true;
+}
+
 static bool
 compute_induction_information(loop_info_state *state)
 {
@@ -298,6 +332,7 @@ compute_induction_information(loop_info_state *state)
       nir_phi_instr *phi = nir_instr_as_phi(var->def->parent_instr);
       nir_basic_induction_var *biv = rzalloc(state, nir_basic_induction_var);
 
+      nir_loop_variable *alu_src_var = NULL;
       nir_foreach_phi_src(src, phi) {
          nir_loop_variable *src_var = get_loop_var(src->src.ssa, state);
 
@@ -313,60 +348,44 @@ compute_induction_information(loop_info_state *state)
          if (is_var_phi(src_var)) {
             nir_phi_instr *src_phi =
                nir_instr_as_phi(src_var->def->parent_instr);
-
-            nir_op alu_op = nir_num_opcodes; /* avoid uninitialized warning */
-            nir_ssa_def *alu_srcs[2] = {0};
-            nir_foreach_phi_src(src2, src_phi) {
-               nir_loop_variable *src_var2 =
-                  get_loop_var(src2->src.ssa, state);
-
-               if (!src_var2->in_if_branch || !is_var_alu(src_var2))
+            nir_alu_instr *src_phi_alu = phi_instr_as_alu(src_phi);
+            if (src_phi_alu) {
+               src_var = get_loop_var(&src_phi_alu->dest.dest.ssa, state);
+               if (!src_var->in_if_branch)
                   break;
-
-               nir_alu_instr *alu =
-                  nir_instr_as_alu(src_var2->def->parent_instr);
-               if (nir_op_infos[alu->op].num_inputs != 2)
-                  break;
-
-               if (alu->src[0].src.ssa == alu_srcs[0] &&
-                   alu->src[1].src.ssa == alu_srcs[1] &&
-                   alu->op == alu_op) {
-                  /* Both branches perform the same calculation so we can use
-                   * one of them to find the induction variable.
-                   */
-                  src_var = src_var2;
-               } else {
-                  alu_srcs[0] = alu->src[0].src.ssa;
-                  alu_srcs[1] = alu->src[1].src.ssa;
-                  alu_op = alu->op;
-               }
             }
          }
 
-         if (!src_var->in_loop) {
-            biv->def_outside_loop = src_var;
-         } else if (is_var_alu(src_var)) {
+         if (!src_var->in_loop && !biv->def_outside_loop) {
+            biv->def_outside_loop = src_var->def;
+         } else if (is_var_alu(src_var) && !biv->alu) {
+            alu_src_var = src_var;
             nir_alu_instr *alu = nir_instr_as_alu(src_var->def->parent_instr);
 
             if (nir_op_infos[alu->op].num_inputs == 2) {
-               biv->alu_def = src_var;
-               biv->alu_op = alu->op;
-
                for (unsigned i = 0; i < 2; i++) {
-                  /* Is one of the operands const, and the other the phi */
-                  if (alu->src[i].src.ssa->parent_instr->type == nir_instr_type_load_const &&
-                      alu->src[1-i].src.ssa == &phi->dest.ssa)
-                     biv->invariant = get_loop_var(alu->src[i].src.ssa, state);
+                  /* Is one of the operands const, and the other the phi.  The
+                   * phi source can't be swizzled in any way.
+                   */
+                  if (nir_src_is_const(alu->src[i].src) &&
+                      alu->src[1-i].src.ssa == &phi->dest.ssa &&
+                      alu_src_has_identity_swizzle(alu, 1 - i))
+                     biv->alu = alu;
                }
             }
+
+            if (!biv->alu)
+               break;
+         } else {
+            biv->alu = NULL;
+            break;
          }
       }
 
-      if (biv->alu_def && biv->def_outside_loop && biv->invariant &&
-          is_var_constant(biv->def_outside_loop)) {
-         assert(is_var_constant(biv->invariant));
-         biv->alu_def->type = basic_induction;
-         biv->alu_def->ind = biv;
+      if (biv->alu && biv->def_outside_loop &&
+          biv->def_outside_loop->parent_instr->type == nir_instr_type_load_const) {
+         alu_src_var->type = basic_induction;
+         alu_src_var->ind = biv;
          var->type = basic_induction;
          var->ind = biv;
 
@@ -493,7 +512,7 @@ find_array_access_via_induction(loop_info_state *state,
 
 static bool
 guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
-                 nir_loop_variable *basic_ind)
+                 nir_ssa_scalar basic_ind)
 {
    unsigned min_array_size = 0;
 
@@ -514,8 +533,10 @@ guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
                find_array_access_via_induction(state,
                                                nir_src_as_deref(intrin->src[0]),
                                                &array_idx);
-            if (basic_ind == array_idx &&
+            if (array_idx && basic_ind.def == array_idx->def &&
                 (min_array_size == 0 || min_array_size > array_size)) {
+               /* Array indices are scalars */
+               assert(basic_ind.def->num_components == 1);
                min_array_size = array_size;
             }
 
@@ -526,8 +547,10 @@ guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
                find_array_access_via_induction(state,
                                                nir_src_as_deref(intrin->src[1]),
                                                &array_idx);
-            if (basic_ind == array_idx &&
+            if (array_idx && basic_ind.def == array_idx->def &&
                 (min_array_size == 0 || min_array_size > array_size)) {
+               /* Array indices are scalars */
+               assert(basic_ind.def->num_components == 1);
                min_array_size = array_size;
             }
          }
@@ -535,7 +558,8 @@ guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
    }
 
    if (min_array_size) {
-      limit_val->i32 = min_array_size;
+      *limit_val = nir_const_value_for_uint(min_array_size,
+                                            basic_ind.def->bit_size);
       return true;
    }
 
@@ -543,71 +567,84 @@ guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
 }
 
 static bool
-try_find_limit_of_alu(nir_loop_variable *limit, nir_const_value *limit_val,
+try_find_limit_of_alu(nir_ssa_scalar limit, nir_const_value *limit_val,
                       nir_loop_terminator *terminator, loop_info_state *state)
 {
-   if(!is_var_alu(limit))
+   if (!nir_ssa_scalar_is_alu(limit))
       return false;
 
-   nir_alu_instr *limit_alu = nir_instr_as_alu(limit->def->parent_instr);
-
-   if (limit_alu->op == nir_op_imin ||
-       limit_alu->op == nir_op_fmin) {
-      limit = get_loop_var(limit_alu->src[0].src.ssa, state);
-
-      if (!is_var_constant(limit))
-         limit = get_loop_var(limit_alu->src[1].src.ssa, state);
-
-      if (!is_var_constant(limit))
-         return false;
-
-      *limit_val = nir_instr_as_load_const(limit->def->parent_instr)->value[0];
-
-      terminator->exact_trip_count_unknown = true;
-
-      return true;
+   nir_op limit_op = nir_ssa_scalar_alu_op(limit);
+   if (limit_op == nir_op_imin || limit_op == nir_op_fmin) {
+      for (unsigned i = 0; i < 2; i++) {
+         nir_ssa_scalar src = nir_ssa_scalar_chase_alu_src(limit, i);
+         if (nir_ssa_scalar_is_const(src)) {
+            *limit_val = nir_ssa_scalar_as_const_value(src);
+            terminator->exact_trip_count_unknown = true;
+            return true;
+         }
+      }
    }
 
    return false;
 }
 
+static nir_const_value
+eval_const_unop(nir_op op, unsigned bit_size, nir_const_value src0)
+{
+   assert(nir_op_infos[op].num_inputs == 1);
+   nir_const_value dest;
+   nir_const_value *src[1] = { &src0 };
+   nir_eval_const_opcode(op, &dest, 1, bit_size, src);
+   return dest;
+}
+
+static nir_const_value
+eval_const_binop(nir_op op, unsigned bit_size,
+                 nir_const_value src0, nir_const_value src1)
+{
+   assert(nir_op_infos[op].num_inputs == 2);
+   nir_const_value dest;
+   nir_const_value *src[2] = { &src0, &src1 };
+   nir_eval_const_opcode(op, &dest, 1, bit_size, src);
+   return dest;
+}
+
 static int32_t
-get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
-              nir_const_value *limit)
+get_iteration(nir_op cond_op, nir_const_value initial, nir_const_value step,
+              nir_const_value limit, unsigned bit_size)
 {
-   int32_t iter;
+   nir_const_value span, iter;
 
    switch (cond_op) {
    case nir_op_ige:
    case nir_op_ilt:
    case nir_op_ieq:
-   case nir_op_ine: {
-      int32_t initial_val = initial->i32;
-      int32_t span = limit->i32 - initial_val;
-      iter = span / step->i32;
+   case nir_op_ine:
+      span = eval_const_binop(nir_op_isub, bit_size, limit, initial);
+      iter = eval_const_binop(nir_op_idiv, bit_size, span, step);
       break;
-   }
+
    case nir_op_uge:
-   case nir_op_ult: {
-      uint32_t initial_val = initial->u32;
-      uint32_t span = limit->u32 - initial_val;
-      iter = span / step->u32;
+   case nir_op_ult:
+      span = eval_const_binop(nir_op_isub, bit_size, limit, initial);
+      iter = eval_const_binop(nir_op_udiv, bit_size, span, step);
       break;
-   }
+
    case nir_op_fge:
    case nir_op_flt:
    case nir_op_feq:
-   case nir_op_fne: {
-      float initial_val = initial->f32;
-      float span = limit->f32 - initial_val;
-      iter = span / step->f32;
+   case nir_op_fne:
+      span = eval_const_binop(nir_op_fsub, bit_size, limit, initial);
+      iter = eval_const_binop(nir_op_fdiv, bit_size, span, step);
+      iter = eval_const_unop(nir_op_f2i64, bit_size, iter);
       break;
-   }
+
    default:
       return -1;
    }
 
-   return iter;
+   uint64_t iter_u64 = nir_const_value_as_uint(iter, bit_size);
+   return iter_u64 > INT_MAX ? -1 : (int)iter_u64;
 }
 
 static bool
@@ -618,18 +655,18 @@ test_iterations(int32_t iter_int, nir_const_value *step,
 {
    assert(nir_op_infos[cond_op].num_inputs == 2);
 
-   nir_const_value iter_src = {0, };
+   nir_const_value iter_src;
    nir_op mul_op;
    nir_op add_op;
    switch (induction_base_type) {
    case nir_type_float:
-      iter_src.f32 = (float) iter_int;
+      iter_src = nir_const_value_for_float(iter_int, bit_size);
       mul_op = nir_op_fmul;
       add_op = nir_op_fadd;
       break;
    case nir_type_int:
    case nir_type_uint:
-      iter_src.i32 = iter_int;
+      iter_src = nir_const_value_for_int(iter_int, bit_size);
       mul_op = nir_op_imul;
       add_op = nir_op_iadd;
       break;
@@ -662,14 +699,12 @@ test_iterations(int32_t iter_int, nir_const_value *step,
 
 static int
 calculate_iterations(nir_const_value *initial, nir_const_value *step,
-                     nir_const_value *limit, nir_loop_variable *alu_def,
-                     nir_alu_instr *cond_alu, nir_op alu_op, bool limit_rhs,
+                     nir_const_value *limit, nir_alu_instr *alu,
+                     nir_ssa_scalar cond, nir_op alu_op, bool limit_rhs,
                      bool invert_cond)
 {
    assert(initial != NULL && step != NULL && limit != NULL);
 
-   nir_alu_instr *alu = nir_instr_as_alu(alu_def->def->parent_instr);
-
    /* nir_op_isub should have been lowered away by this point */
    assert(alu->op != nir_op_isub);
 
@@ -701,12 +736,16 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step,
     * condition and if so we assume we need to step the initial value.
     */
    unsigned trip_offset = 0;
-   if (cond_alu->src[0].src.ssa == alu_def->def ||
-       cond_alu->src[1].src.ssa == alu_def->def) {
+   nir_alu_instr *cond_alu = nir_instr_as_alu(cond.def->parent_instr);
+   if (cond_alu->src[0].src.ssa == &alu->dest.dest.ssa ||
+       cond_alu->src[1].src.ssa == &alu->dest.dest.ssa) {
       trip_offset = 1;
    }
 
-   int iter_int = get_iteration(alu_op, initial, step, limit);
+   assert(nir_src_bit_size(alu->src[0].src) ==
+          nir_src_bit_size(alu->src[1].src));
+   unsigned bit_size = nir_src_bit_size(alu->src[0].src);
+   int iter_int = get_iteration(alu_op, *initial, *step, *limit, bit_size);
 
    /* If iter_int is negative the loop is ill-formed or is the conditional is
     * unsigned with a huge iteration count so don't bother going any further.
@@ -723,9 +762,6 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step,
     *
     *    for (float x = 0.0; x != 0.9; x += 0.2);
     */
-   assert(nir_src_bit_size(alu->src[0].src) ==
-          nir_src_bit_size(alu->src[1].src));
-   unsigned bit_size = nir_src_bit_size(alu->src[0].src);
    for (int bias = -1; bias <= 1; bias++) {
       const int iter_bias = iter_int + bias;
 
@@ -740,9 +776,9 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step,
 }
 
 static nir_op
-inverse_comparison(nir_alu_instr *alu)
+inverse_comparison(nir_op alu_op)
 {
-   switch (alu->op) {
+   switch (alu_op) {
    case nir_op_fge:
       return nir_op_flt;
    case nir_op_ige:
@@ -769,95 +805,97 @@ inverse_comparison(nir_alu_instr *alu)
 }
 
 static bool
-is_supported_terminator_condition(nir_alu_instr *alu)
+is_supported_terminator_condition(nir_ssa_scalar cond)
 {
+   if (!nir_ssa_scalar_is_alu(cond))
+      return false;
+
+   nir_alu_instr *alu = nir_instr_as_alu(cond.def->parent_instr);
    return nir_alu_instr_is_comparison(alu) &&
           nir_op_infos[alu->op].num_inputs == 2;
 }
 
 static bool
-get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind,
-                             nir_loop_variable **limit,
+get_induction_and_limit_vars(nir_ssa_scalar cond,
+                             nir_ssa_scalar *ind,
+                             nir_ssa_scalar *limit,
+                             bool *limit_rhs,
                              loop_info_state *state)
 {
-   bool limit_rhs = true;
-
-   /* We assume that the limit is the "right" operand */
-   *ind = get_loop_var(alu->src[0].src.ssa, state);
-   *limit = get_loop_var(alu->src[1].src.ssa, state);
-
-   if ((*ind)->type != basic_induction) {
-      /* We had it the wrong way, flip things around */
-      *ind = get_loop_var(alu->src[1].src.ssa, state);
-      *limit = get_loop_var(alu->src[0].src.ssa, state);
-      limit_rhs = false;
+   nir_ssa_scalar rhs, lhs;
+   lhs = nir_ssa_scalar_chase_alu_src(cond, 0);
+   rhs = nir_ssa_scalar_chase_alu_src(cond, 1);
+
+   if (get_loop_var(lhs.def, state)->type == basic_induction) {
+      *ind = lhs;
+      *limit = rhs;
+      *limit_rhs = true;
+      return true;
+   } else if (get_loop_var(rhs.def, state)->type == basic_induction) {
+      *ind = rhs;
+      *limit = lhs;
+      *limit_rhs = false;
+      return true;
+   } else {
+      return false;
    }
-
-   return limit_rhs;
 }
 
-static void
-try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
-                                 nir_loop_variable **ind,
-                                 nir_loop_variable **limit,
+static bool
+try_find_trip_count_vars_in_iand(nir_ssa_scalar *cond,
+                                 nir_ssa_scalar *ind,
+                                 nir_ssa_scalar *limit,
                                  bool *limit_rhs,
                                  loop_info_state *state)
 {
-   assert((*alu)->op == nir_op_ieq || (*alu)->op == nir_op_inot);
-
-   nir_ssa_def *iand_def = (*alu)->src[0].src.ssa;
+   const nir_op alu_op = nir_ssa_scalar_alu_op(*cond);
+   assert(alu_op == nir_op_ieq || alu_op == nir_op_inot);
 
-   if ((*alu)->op == nir_op_ieq) {
-      nir_ssa_def *zero_def = (*alu)->src[1].src.ssa;
+   nir_ssa_scalar iand = nir_ssa_scalar_chase_alu_src(*cond, 0);
 
-      if (iand_def->parent_instr->type != nir_instr_type_alu ||
-          zero_def->parent_instr->type != nir_instr_type_load_const) {
+   if (alu_op == nir_op_ieq) {
+      nir_ssa_scalar zero = nir_ssa_scalar_chase_alu_src(*cond, 1);
 
+      if (!nir_ssa_scalar_is_alu(iand) || !nir_ssa_scalar_is_const(zero)) {
          /* Maybe we had it the wrong way, flip things around */
-         iand_def = (*alu)->src[1].src.ssa;
-         zero_def = (*alu)->src[0].src.ssa;
+         nir_ssa_scalar tmp = zero;
+         zero = iand;
+         iand = tmp;
 
          /* If we still didn't find what we need then return */
-         if (zero_def->parent_instr->type != nir_instr_type_load_const)
-            return;
+         if (!nir_ssa_scalar_is_const(zero))
+            return false;
       }
 
       /* If the loop is not breaking on (x && y) == 0 then return */
-      nir_const_value *zero =
-         nir_instr_as_load_const(zero_def->parent_instr)->value;
-      if (zero[0].i32 != 0)
-         return;
+      if (nir_ssa_scalar_as_uint(zero) != 0)
+         return false;
    }
 
-   if (iand_def->parent_instr->type != nir_instr_type_alu)
-      return;
+   if (!nir_ssa_scalar_is_alu(iand))
+      return false;
 
-   nir_alu_instr *iand = nir_instr_as_alu(iand_def->parent_instr);
-   if (iand->op != nir_op_iand)
-      return;
+   if (nir_ssa_scalar_alu_op(iand) != nir_op_iand)
+      return false;
 
    /* Check if iand src is a terminator condition and try get induction var
     * and trip limit var.
     */
-   nir_ssa_def *src = iand->src[0].src.ssa;
-   if (src->parent_instr->type == nir_instr_type_alu) {
-      *alu = nir_instr_as_alu(src->parent_instr);
-      if (is_supported_terminator_condition(*alu))
-         *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
-   }
+   bool found_induction_var = false;
+   for (unsigned i = 0; i < 2; i++) {
+      nir_ssa_scalar src = nir_ssa_scalar_chase_alu_src(iand, i);
+      if (is_supported_terminator_condition(src) &&
+          get_induction_and_limit_vars(src, ind, limit, limit_rhs, state)) {
+         *cond = src;
+         found_induction_var = true;
 
-   /* Try the other iand src if needed */
-   if (*ind == NULL || (*ind && (*ind)->type != basic_induction) ||
-       !is_var_constant(*limit)) {
-      src = iand->src[1].src.ssa;
-      if (src->parent_instr->type == nir_instr_type_alu) {
-         nir_alu_instr *tmp_alu = nir_instr_as_alu(src->parent_instr);
-         if (is_supported_terminator_condition(tmp_alu)) {
-            *alu = tmp_alu;
-            *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
-         }
+         /* If we've found one with a constant limit, stop. */
+         if (nir_ssa_scalar_is_const(*limit))
+            return true;
       }
    }
+
+   return found_induction_var;
 }
 
 /* Run through each of the terminators of the loop and try to infer a possible
@@ -877,8 +915,10 @@ find_trip_count(loop_info_state *state)
    list_for_each_entry(nir_loop_terminator, terminator,
                        &state->loop->info->loop_terminator_list,
                        loop_terminator_link) {
+      assert(terminator->nif->condition.is_ssa);
+      nir_ssa_scalar cond = { terminator->nif->condition.ssa, 0 };
 
-      if (terminator->conditional_instr->type != nir_instr_type_alu) {
+      if (!nir_ssa_scalar_is_alu(cond)) {
          /* If we get here the loop is dead and will get cleaned up by the
           * nir_opt_dead_cf pass.
           */
@@ -886,43 +926,35 @@ find_trip_count(loop_info_state *state)
          continue;
       }
 
-      nir_alu_instr *alu = nir_instr_as_alu(terminator->conditional_instr);
-      nir_op alu_op = alu->op;
+      nir_op alu_op = nir_ssa_scalar_alu_op(cond);
 
       bool limit_rhs;
-      nir_loop_variable *basic_ind = NULL;
-      nir_loop_variable *limit;
-      if (alu->op == nir_op_inot || alu->op == nir_op_ieq) {
-         nir_alu_instr *new_alu = alu;
-         try_find_trip_count_vars_in_iand(&new_alu, &basic_ind, &limit,
-                                          &limit_rhs, state);
+      nir_ssa_scalar basic_ind = { NULL, 0 };
+      nir_ssa_scalar limit;
+      if ((alu_op == nir_op_inot || alu_op == nir_op_ieq) &&
+          try_find_trip_count_vars_in_iand(&cond, &basic_ind, &limit,
+                                           &limit_rhs, state)) {
 
          /* The loop is exiting on (x && y) == 0 so we need to get the
           * inverse of x or y (i.e. which ever contained the induction var) in
           * order to compute the trip count.
           */
-         if (basic_ind && basic_ind->type == basic_induction) {
-            alu = new_alu;
-            alu_op = inverse_comparison(alu);
-            trip_count_known = false;
-            terminator->exact_trip_count_unknown = true;
-         }
+         alu_op = inverse_comparison(nir_ssa_scalar_alu_op(cond));
+         trip_count_known = false;
+         terminator->exact_trip_count_unknown = true;
       }
 
-      if (!basic_ind) {
-         if (!is_supported_terminator_condition(alu)) {
-            trip_count_known = false;
-            continue;
+      if (!basic_ind.def) {
+         if (is_supported_terminator_condition(cond)) {
+            get_induction_and_limit_vars(cond, &basic_ind,
+                                         &limit, &limit_rhs, state);
          }
-
-         limit_rhs = get_induction_and_limit_vars(alu, &basic_ind, &limit,
-                                                  state);
       }
 
       /* The comparison has to have a basic induction variable for us to be
        * able to find trip counts.
        */
-      if (basic_ind->type != basic_induction) {
+      if (!basic_ind.def) {
          trip_count_known = false;
          continue;
       }
@@ -931,9 +963,8 @@ find_trip_count(loop_info_state *state)
 
       /* Attempt to find a constant limit for the loop */
       nir_const_value limit_val;
-      if (is_var_constant(limit)) {
-         limit_val =
-            nir_instr_as_load_const(limit->def->parent_instr)->value[0];
+      if (nir_ssa_scalar_is_const(limit)) {
+         limit_val = nir_ssa_scalar_as_const_value(limit);
       } else {
          trip_count_known = false;
 
@@ -955,17 +986,38 @@ find_trip_count(loop_info_state *state)
        * Thats all thats needed to calculate the trip-count
        */
 
-      nir_const_value *initial_val =
-         nir_instr_as_load_const(basic_ind->ind->def_outside_loop->
-                                    def->parent_instr)->value;
+      nir_basic_induction_var *ind_var =
+         get_loop_var(basic_ind.def, state)->ind;
+
+      /* The basic induction var might be a vector but, because we guarantee
+       * earlier that the phi source has a scalar swizzle, we can take the
+       * component from basic_ind.
+       */
+      nir_ssa_scalar initial_s = { ind_var->def_outside_loop, basic_ind.comp };
+      nir_ssa_scalar alu_s = { &ind_var->alu->dest.dest.ssa, basic_ind.comp };
+
+      nir_const_value initial_val = nir_ssa_scalar_as_const_value(initial_s);
 
-      nir_const_value *step_val =
-         nir_instr_as_load_const(basic_ind->ind->invariant->def->
-                                    parent_instr)->value;
+      /* We are guaranteed by earlier code that at least one of these sources
+       * is a constant but we don't know which.
+       */
+      nir_const_value step_val;
+      memset(&step_val, 0, sizeof(step_val));
+      UNUSED bool found_step_value = false;
+      assert(nir_op_infos[ind_var->alu->op].num_inputs == 2);
+      for (unsigned i = 0; i < 2; i++) {
+         nir_ssa_scalar alu_src = nir_ssa_scalar_chase_alu_src(alu_s, i);
+         if (nir_ssa_scalar_is_const(alu_src)) {
+            found_step_value = true;
+            step_val = nir_ssa_scalar_as_const_value(alu_src);
+            break;
+         }
+      }
+      assert(found_step_value);
 
-      int iterations = calculate_iterations(initial_val, step_val,
+      int iterations = calculate_iterations(&initial_val, &step_val,
                                             &limit_val,
-                                            basic_ind->ind->alu_def, alu,
+                                            ind_var->alu, cond,
                                             alu_op, limit_rhs,
                                             terminator->continue_from_then);
 
diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c
index b3b78c6649a..84ec2a77f1e 100644
--- a/src/compiler/nir/nir_lower_int64.c
+++ b/src/compiler/nir/nir_lower_int64.c
@@ -629,6 +629,34 @@ lower_irem64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
    return nir_bcsel(b, n_is_neg, nir_ineg(b, r), r);
 }
 
+static nir_ssa_def *
+lower_extract(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *c)
+{
+   assert(op == nir_op_extract_u8 || op == nir_op_extract_i8 ||
+          op == nir_op_extract_u16 || op == nir_op_extract_i16);
+
+   const int chunk = nir_src_as_uint(nir_src_for_ssa(c));
+   const int chunk_bits =
+      (op == nir_op_extract_u8 || op == nir_op_extract_i8) ? 8 : 16;
+   const int num_chunks_in_32 = 32 / chunk_bits;
+
+   nir_ssa_def *extract32;
+   if (chunk < num_chunks_in_32) {
+      extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_x(b, x),
+                                   nir_imm_int(b, chunk),
+                                   NULL, NULL);
+   } else {
+      extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_y(b, x),
+                                   nir_imm_int(b, chunk - num_chunks_in_32),
+                                   NULL, NULL);
+   }
+
+   if (op == nir_op_extract_i8 || op == nir_op_extract_i16)
+      return lower_i2i64(b, extract32);
+   else
+      return lower_u2u64(b, extract32);
+}
+
 nir_lower_int64_options
 nir_lower_int64_op_to_options_mask(nir_op opcode)
 {
@@ -685,6 +713,11 @@ nir_lower_int64_op_to_options_mask(nir_op opcode)
    case nir_op_ishr:
    case nir_op_ushr:
       return nir_lower_shift64;
+   case nir_op_extract_u8:
+   case nir_op_extract_i8:
+   case nir_op_extract_u16:
+   case nir_op_extract_i16:
+      return nir_lower_extract64;
    default:
       return 0;
    }
@@ -779,6 +812,11 @@ lower_int64_alu_instr(nir_builder *b, nir_alu_instr *alu)
       return lower_ishr64(b, src[0], src[1]);
    case nir_op_ushr:
       return lower_ushr64(b, src[0], src[1]);
+   case nir_op_extract_u8:
+   case nir_op_extract_i8:
+   case nir_op_extract_u16:
+   case nir_op_extract_i16:
+      return lower_extract(b, alu->op, src[0], src[1]);
    default:
       unreachable("Invalid ALU opcode to lower");
    }
diff --git a/src/compiler/nir/nir_lower_non_uniform_access.c b/src/compiler/nir/nir_lower_non_uniform_access.c
index 6aa11f9bebe..0ab32100ef2 100644
--- a/src/compiler/nir/nir_lower_non_uniform_access.c
+++ b/src/compiler/nir/nir_lower_non_uniform_access.c
@@ -34,6 +34,7 @@ read_first_invocation(nir_builder *b, nir_ssa_def *x)
    first->src[0] = nir_src_for_ssa(x);
    nir_ssa_dest_init(&first->instr, &first->dest,
                      x->num_components, x->bit_size, NULL);
+   nir_builder_instr_insert(b, &first->instr);
    return &first->dest.ssa;
 }
 
@@ -128,8 +129,8 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
    nir_builder b;
    nir_builder_init(&b, impl);
 
-   nir_foreach_block(block, impl) {
-      nir_foreach_instr(instr, block) {
+   nir_foreach_block_safe(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
          switch (instr->type) {
          case nir_instr_type_tex: {
             nir_tex_instr *tex = nir_instr_as_tex(instr);
diff --git a/src/compiler/nir/nir_lower_regs_to_ssa.c b/src/compiler/nir/nir_lower_regs_to_ssa.c
index 0db11ff1d1c..76ed1287379 100644
--- a/src/compiler/nir/nir_lower_regs_to_ssa.c
+++ b/src/compiler/nir/nir_lower_regs_to_ssa.c
@@ -251,9 +251,17 @@ nir_lower_regs_to_ssa_impl(nir_function_impl *impl)
 
    nir_foreach_block(block, impl) {
       nir_foreach_instr(instr, block) {
-         if (instr->type == nir_instr_type_alu) {
+         switch (instr->type) {
+         case nir_instr_type_alu:
             rewrite_alu_instr(nir_instr_as_alu(instr), &state);
-         } else {
+            break;
+
+         case nir_instr_type_phi:
+            /* We rewrite sources as a separate pass */
+            nir_foreach_dest(instr, rewrite_dest, &state);
+            break;
+
+         default:
             nir_foreach_src(instr, rewrite_src, &state);
             nir_foreach_dest(instr, rewrite_dest, &state);
          }
@@ -262,6 +270,28 @@ nir_lower_regs_to_ssa_impl(nir_function_impl *impl)
       nir_if *following_if = nir_block_get_following_if(block);
       if (following_if)
          rewrite_if_condition(following_if, &state);
+
+      /* Handle phi sources that source from this block.  We have to do this
+       * as a separate pass because the phi builder assumes that uses and
+       * defs are processed in an order that respects dominance.  When we have
+       * loops, a phi source may be a back-edge so we have to handle it as if
+       * it were one of the last instructions in the predecessor block.
+       */
+      for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
+         if (block->successors[i] == NULL)
+            continue;
+
+         nir_foreach_instr(instr, block->successors[i]) {
+            if (instr->type != nir_instr_type_phi)
+               break;
+
+            nir_phi_instr *phi = nir_instr_as_phi(instr);
+            nir_foreach_phi_src(phi_src, phi) {
+               if (phi_src->pred == block)
+                  rewrite_src(&phi_src->src, &state);
+            }
+         }
+      }
    }
 
    nir_phi_builder_finish(phi_build);
diff --git a/src/compiler/nir/nir_lower_var_copies.c b/src/compiler/nir/nir_lower_var_copies.c
index 0ba398698f0..e6ade733eba 100644
--- a/src/compiler/nir/nir_lower_var_copies.c
+++ b/src/compiler/nir/nir_lower_var_copies.c
@@ -56,7 +56,9 @@ emit_deref_copy_load_store(nir_builder *b,
                            nir_deref_instr *dst_deref,
                            nir_deref_instr **dst_deref_arr,
                            nir_deref_instr *src_deref,
-                           nir_deref_instr **src_deref_arr)
+                           nir_deref_instr **src_deref_arr,
+                           enum gl_access_qualifier dst_access,
+                           enum gl_access_qualifier src_access)
 {
    if (dst_deref_arr || src_deref_arr) {
       assert(dst_deref_arr && src_deref_arr);
@@ -79,14 +81,16 @@ emit_deref_copy_load_store(nir_builder *b,
                                     nir_build_deref_array_imm(b, dst_deref, i),
                                     dst_deref_arr + 1,
                                     nir_build_deref_array_imm(b, src_deref, i),
-                                    src_deref_arr + 1);
+                                    src_deref_arr + 1, dst_access, src_access);
       }
    } else {
       assert(glsl_get_bare_type(dst_deref->type) ==
              glsl_get_bare_type(src_deref->type));
       assert(glsl_type_is_vector_or_scalar(dst_deref->type));
 
-      nir_store_deref(b, dst_deref, nir_load_deref(b, src_deref), ~0);
+      nir_store_deref_with_access(b, dst_deref,
+                                  nir_load_deref_with_access(b, src_deref, src_access),
+                                  ~0, src_access);
    }
 }
 
@@ -106,7 +110,9 @@ nir_lower_deref_copy_instr(nir_builder *b, nir_intrinsic_instr *copy)
 
    b->cursor = nir_before_instr(&copy->instr);
    emit_deref_copy_load_store(b, dst_path.path[0], &dst_path.path[1],
-                                 src_path.path[0], &src_path.path[1]);
+                                 src_path.path[0], &src_path.path[1],
+                                 nir_intrinsic_dst_access(copy),
+                                 nir_intrinsic_src_access(copy));
 
    nir_deref_path_finish(&dst_path);
    nir_deref_path_finish(&src_path);
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 55e46b04466..ce1298ccab1 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -985,7 +985,7 @@ def bitfield_reverse(u):
 
     return step5
 
-optimizations += [(bitfield_reverse('x@32'), ('bitfield_reverse', 'x'))]
+optimizations += [(bitfield_reverse('x@32'), ('bitfield_reverse', 'x'), '!options->lower_bitfield_reverse')]
 
 # For any float comparison operation, "cmp", if you have "a == a && a cmp b"
 # then the "a == a" is redundant because it's equivalent to "a is not NaN"
@@ -1086,9 +1086,6 @@ def bitfield_reverse(u):
    (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
    (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
 
-   (('b2f(is_used_more_than_once)', ('inot', 'a@1')), ('bcsel', a, 0.0, 1.0)),
-   (('fneg(is_used_more_than_once)', ('b2f', ('inot', 'a@1'))), ('bcsel', a, -0.0, -1.0)),
-
    # we do these late so that we don't get in the way of creating ffmas
    (('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))),
    (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
diff --git a/src/compiler/nir/nir_opt_comparison_pre.c b/src/compiler/nir/nir_opt_comparison_pre.c
index eee496251a7..a7a227ce371 100644
--- a/src/compiler/nir/nir_opt_comparison_pre.c
+++ b/src/compiler/nir/nir_opt_comparison_pre.c
@@ -346,7 +346,7 @@ comparison_pre_block(nir_block *block, struct block_queue *bq, nir_builder *bld)
    return progress;
 }
 
-static bool
+bool
 nir_opt_comparison_pre_impl(nir_function_impl *impl)
 {
    struct block_queue bq;
diff --git a/src/compiler/nir/nir_opt_dead_cf.c b/src/compiler/nir/nir_opt_dead_cf.c
index b0e9723d36c..33f3565c564 100644
--- a/src/compiler/nir/nir_opt_dead_cf.c
+++ b/src/compiler/nir/nir_opt_dead_cf.c
@@ -216,7 +216,7 @@ node_is_dead(nir_cf_node *node)
 
       nir_foreach_instr(instr, block) {
          if (instr->type == nir_instr_type_call)
-            return true;
+            return false;
 
          /* Return instructions can cause us to skip over other side-effecting
           * instructions after the loop, so consider them to have side effects
diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c
index e7d3f8ec424..aeae2ad6401 100644
--- a/src/compiler/nir/nir_opt_gcm.c
+++ b/src/compiler/nir/nir_opt_gcm.c
@@ -152,11 +152,7 @@ gcm_pin_instructions_block(nir_block *block, struct gcm_state *state)
          break;
 
       case nir_instr_type_intrinsic: {
-         const nir_intrinsic_info *info =
-            &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
-
-         if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
-             (info->flags & NIR_INTRINSIC_CAN_REORDER)) {
+         if (nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr))) {
             instr->pass_flags = 0;
          } else {
             instr->pass_flags = GCM_INSTR_PINNED;
diff --git a/src/compiler/nir/nir_opt_idiv_const.c b/src/compiler/nir/nir_opt_idiv_const.c
index 8f70821ca2f..688186779e6 100644
--- a/src/compiler/nir/nir_opt_idiv_const.c
+++ b/src/compiler/nir/nir_opt_idiv_const.c
@@ -65,15 +65,17 @@ build_umod(nir_builder *b, nir_ssa_def *n, uint64_t d)
 static nir_ssa_def *
 build_idiv(nir_builder *b, nir_ssa_def *n, int64_t d)
 {
+   uint64_t abs_d = d < 0 ? -d : d;
+
    if (d == 0) {
       return nir_imm_intN_t(b, 0, n->bit_size);
    } else if (d == 1) {
       return n;
    } else if (d == -1) {
       return nir_ineg(b, n);
-   } else if (util_is_power_of_two_or_zero64(d)) {
-      uint64_t abs_d = d < 0 ? -d : d;
-      nir_ssa_def *uq = nir_ishr(b, n, nir_imm_int(b, util_logbase2_64(abs_d)));
+   } else if (util_is_power_of_two_or_zero64(abs_d)) {
+      nir_ssa_def *uq = nir_ushr(b, nir_iabs(b, n),
+                                    nir_imm_int(b, util_logbase2_64(abs_d)));
       nir_ssa_def *n_neg = nir_ilt(b, n, nir_imm_intN_t(b, 0, n->bit_size));
       nir_ssa_def *neg = d < 0 ? nir_inot(b, n_neg) : n_neg;
       return nir_bcsel(b, neg, nir_ineg(b, uq), uq);
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index f674185f1e2..912580be840 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -1040,6 +1040,13 @@ opt_if_loop_terminator(nir_if *nif)
    if (!nir_is_trivial_loop_if(nif, break_blk))
       return false;
 
+   /* Even though this if statement has a jump on one side, we may still have
+    * phis afterwards.  Single-source phis can be produced by loop unrolling
+    * or dead control-flow passes and are perfectly legal.  Run a quick phi
+    * removal on the block after the if to clean up any such phis.
+    */
+   nir_opt_remove_phis_block(nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)));
+
    /* Finally, move the continue from branch after the if-statement. */
    nir_cf_list tmp;
    nir_cf_extract(&tmp, nir_before_block(first_continue_from_blk),
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
index c0198390749..9e697fc4b45 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -560,31 +560,7 @@ wrapper_unroll(nir_loop *loop)
            nir_after_block(nir_if_last_else_block(terminator->nif));
       }
    } else {
-      nir_block *blk_after_loop =
-         nir_cursor_current_block(nir_after_cf_node(&loop->cf_node));
-
-      /* There may still be some single src phis following the loop that
-       * have not yet been cleaned up by another pass. Tidy those up
-       * before unrolling the loop.
-       */
-      nir_foreach_instr_safe(instr, blk_after_loop) {
-         if (instr->type != nir_instr_type_phi)
-            break;
-
-         nir_phi_instr *phi = nir_instr_as_phi(instr);
-         assert(exec_list_length(&phi->srcs) == 1);
-
-         nir_phi_src *phi_src =
-            exec_node_data(nir_phi_src, exec_list_get_head(&phi->srcs), node);
-
-         nir_ssa_def_rewrite_uses(&phi->dest.ssa, phi_src->src);
-         nir_instr_remove(instr);
-      }
-
-      /* Remove break at end of the loop */
-      nir_block *last_loop_blk = nir_loop_last_block(loop);
-      nir_instr *break_instr = nir_block_last_instr(last_loop_blk);
-      nir_instr_remove(break_instr);
+      loop_prepare_for_unroll(loop);
    }
 
    /* Pluck out the loop body. */
diff --git a/src/compiler/nir/nir_opt_move_load_ubo.c b/src/compiler/nir/nir_opt_move_load_ubo.c
index a32f1704427..f36a62a5308 100644
--- a/src/compiler/nir/nir_opt_move_load_ubo.c
+++ b/src/compiler/nir/nir_opt_move_load_ubo.c
@@ -91,7 +91,7 @@ move_load_ubo(nir_block *block)
       }
    }
 
-   return false;
+   return progress;
 }
 
 bool
diff --git a/src/compiler/nir/nir_opt_remove_phis.c b/src/compiler/nir/nir_opt_remove_phis.c
index 9efbf422624..b03a0ab41b3 100644
--- a/src/compiler/nir/nir_opt_remove_phis.c
+++ b/src/compiler/nir/nir_opt_remove_phis.c
@@ -139,6 +139,14 @@ remove_phis_block(nir_block *block, nir_builder *b)
    return progress;
 }
 
+bool
+nir_opt_remove_phis_block(nir_block *block)
+{
+   nir_builder b;
+   nir_builder_init(&b, nir_cf_node_get_function(&block->cf_node));
+   return remove_phis_block(block, &b);
+}
+
 static bool
 nir_opt_remove_phis_impl(nir_function_impl *impl)
 {
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 4056bd77455..92707d72990 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -771,6 +771,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
       [NIR_INTRINSIC_IMAGE_DIM] = "image_dim",
       [NIR_INTRINSIC_IMAGE_ARRAY] = "image_array",
       [NIR_INTRINSIC_ACCESS] = "access",
+      [NIR_INTRINSIC_SRC_ACCESS] = "src-access",
+      [NIR_INTRINSIC_DST_ACCESS] = "dst-access",
       [NIR_INTRINSIC_FORMAT] = "format",
       [NIR_INTRINSIC_ALIGN_MUL] = "align_mul",
       [NIR_INTRINSIC_ALIGN_OFFSET] = "align_offset",
diff --git a/src/compiler/nir/nir_propagate_invariant.c b/src/compiler/nir/nir_propagate_invariant.c
index 103b2422b83..a0cfde67891 100644
--- a/src/compiler/nir/nir_propagate_invariant.c
+++ b/src/compiler/nir/nir_propagate_invariant.c
@@ -65,12 +65,21 @@ add_cf_node(nir_cf_node *cf, struct set *invariants)
 static void
 add_var(nir_variable *var, struct set *invariants)
 {
-   _mesa_set_add(invariants, var);
+   /* Because we pass the result of nir_intrinsic_get_var directly to this
+    * function, it's possible for var to be NULL if, for instance, there's a
+    * cast somewhere in the chain.
+    */
+   if (var != NULL)
+      _mesa_set_add(invariants, var);
 }
 
 static bool
 var_is_invariant(nir_variable *var, struct set * invariants)
 {
+   /* Because we pass the result of nir_intrinsic_get_var directly to this
+    * function, it's possible for var to be NULL if, for instance, there's a
+    * cast somewhere in the chain.
+    */
    return var && (var->data.invariant || _mesa_set_search(invariants, var));
 }
 
diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h
index 1624508993d..e3fdc08c4d7 100644
--- a/src/compiler/nir/nir_search_helpers.h
+++ b/src/compiler/nir/nir_search_helpers.h
@@ -143,22 +143,6 @@ is_not_const(nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components,
    return !nir_src_is_const(instr->src[src].src);
 }
 
-static inline bool
-is_used_more_than_once(nir_alu_instr *instr)
-{
-   bool zero_if_use = list_empty(&instr->dest.dest.ssa.if_uses);
-   bool zero_use = list_empty(&instr->dest.dest.ssa.uses);
-
-   if (zero_use && zero_if_use)
-      return false;
-   else if (zero_use && list_is_singular(&instr->dest.dest.ssa.if_uses))
-      return false;
-   else if (zero_if_use && list_is_singular(&instr->dest.dest.ssa.uses))
-      return false;
-
-   return true;
-}
-
 static inline bool
 is_used_once(nir_alu_instr *instr)
 {
diff --git a/src/compiler/nir/nir_split_var_copies.c b/src/compiler/nir/nir_split_var_copies.c
index 355a4e56d01..10b71c16c17 100644
--- a/src/compiler/nir/nir_split_var_copies.c
+++ b/src/compiler/nir/nir_split_var_copies.c
@@ -64,21 +64,25 @@
 
 static void
 split_deref_copy_instr(nir_builder *b,
-                       nir_deref_instr *dst, nir_deref_instr *src)
+                       nir_deref_instr *dst, nir_deref_instr *src,
+                       enum gl_access_qualifier dst_access,
+                       enum gl_access_qualifier src_access)
 {
    assert(glsl_get_bare_type(dst->type) ==
           glsl_get_bare_type(src->type));
    if (glsl_type_is_vector_or_scalar(src->type)) {
-      nir_copy_deref(b, dst, src);
+      nir_copy_deref_with_access(b, dst, src, dst_access, src_access);
    } else if (glsl_type_is_struct_or_ifc(src->type)) {
       for (unsigned i = 0; i < glsl_get_length(src->type); i++) {
          split_deref_copy_instr(b, nir_build_deref_struct(b, dst, i),
-                                   nir_build_deref_struct(b, src, i));
+                                   nir_build_deref_struct(b, src, i),
+                                   dst_access, src_access);
       }
    } else {
       assert(glsl_type_is_matrix(src->type) || glsl_type_is_array(src->type));
       split_deref_copy_instr(b, nir_build_deref_array_wildcard(b, dst),
-                                nir_build_deref_array_wildcard(b, src));
+                                nir_build_deref_array_wildcard(b, src),
+                                dst_access, src_access);
    }
 }
 
@@ -105,7 +109,9 @@ split_var_copies_impl(nir_function_impl *impl)
             nir_instr_as_deref(copy->src[0].ssa->parent_instr);
          nir_deref_instr *src =
             nir_instr_as_deref(copy->src[1].ssa->parent_instr);
-         split_deref_copy_instr(&b, dst, src);
+         split_deref_copy_instr(&b, dst, src,
+                                nir_intrinsic_dst_access(copy),
+                                nir_intrinsic_src_access(copy));
 
          progress = true;
       }
diff --git a/src/compiler/nir/nir_to_lcssa.c b/src/compiler/nir/nir_to_lcssa.c
index 7948b117927..6057c66586d 100644
--- a/src/compiler/nir/nir_to_lcssa.c
+++ b/src/compiler/nir/nir_to_lcssa.c
@@ -111,9 +111,6 @@ convert_loop_exit_for_ssa(nir_ssa_def *def, void *void_state)
    if (all_uses_inside_loop)
       return true;
 
-   /* We don't want derefs ending up in phi sources */
-   assert(def->parent_instr->type != nir_instr_type_deref);
-
    /* Initialize a phi-instruction */
    nir_phi_instr *phi = nir_phi_instr_create(state->shader);
    nir_ssa_dest_init(&phi->instr, &phi->dest,
@@ -131,6 +128,25 @@ convert_loop_exit_for_ssa(nir_ssa_def *def, void *void_state)
    }
 
    nir_instr_insert_before_block(block_after_loop, &phi->instr);
+   nir_ssa_def *dest = &phi->dest.ssa;
+
+   /* deref instructions need a cast after the phi */
+   if (def->parent_instr->type == nir_instr_type_deref) {
+      nir_deref_instr *cast =
+         nir_deref_instr_create(state->shader, nir_deref_type_cast);
+
+      nir_deref_instr *instr = nir_instr_as_deref(def->parent_instr);
+      cast->mode = instr->mode;
+      cast->type = instr->type;
+      cast->parent = nir_src_for_ssa(&phi->dest.ssa);
+      cast->cast.ptr_stride = nir_deref_instr_ptr_as_array_stride(instr);
+
+      nir_ssa_dest_init(&cast->instr, &cast->dest,
+                        phi->dest.ssa.num_components,
+                        phi->dest.ssa.bit_size, NULL);
+      nir_instr_insert(nir_after_phis(block_after_loop), &cast->instr);
+      dest = &cast->dest.ssa;
+   }
 
    /* Run through all uses and rewrite those outside the loop to point to
     * the phi instead of pointing to the ssa-def.
@@ -142,15 +158,13 @@ convert_loop_exit_for_ssa(nir_ssa_def *def, void *void_state)
       }
 
       if (!is_use_inside_loop(use, state->loop)) {
-         nir_instr_rewrite_src(use->parent_instr, use,
-                               nir_src_for_ssa(&phi->dest.ssa));
+         nir_instr_rewrite_src(use->parent_instr, use, nir_src_for_ssa(dest));
       }
    }
 
    nir_foreach_if_use_safe(use, def) {
       if (!is_if_use_inside_loop(use, state->loop)) {
-         nir_if_rewrite_condition(use->parent_if,
-                                  nir_src_for_ssa(&phi->dest.ssa));
+         nir_if_rewrite_condition(use->parent_if, nir_src_for_ssa(dest));
       }
    }
 
diff --git a/src/compiler/nir/tests/comparison_pre_tests.cpp b/src/compiler/nir/tests/comparison_pre_tests.cpp
new file mode 100644
index 00000000000..fe1cc23fb3b
--- /dev/null
+++ b/src/compiler/nir/tests/comparison_pre_tests.cpp
@@ -0,0 +1,531 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <gtest/gtest.h>
+#include "nir.h"
+#include "nir_builder.h"
+
+class comparison_pre_test : public ::testing::Test {
+protected:
+   comparison_pre_test()
+   {
+      static const nir_shader_compiler_options options = { };
+      nir_builder_init_simple_shader(&bld, NULL, MESA_SHADER_VERTEX, &options);
+
+      v1 = nir_imm_vec4(&bld, -2.0, -1.0,  1.0,  2.0);
+      v2 = nir_imm_vec4(&bld,  2.0,  1.0, -1.0, -2.0);
+      v3 = nir_imm_vec4(&bld,  3.0,  4.0,  5.0,  6.0);
+   }
+
+   ~comparison_pre_test()
+   {
+      ralloc_free(bld.shader);
+   }
+
+   struct nir_builder bld;
+
+   nir_ssa_def *v1;
+   nir_ssa_def *v2;
+   nir_ssa_def *v3;
+
+   const uint8_t xxxx[4] = { 0, 0, 0, 0 };
+   const uint8_t wwww[4] = { 3, 3, 3, 3 };
+};
+
+TEST_F(comparison_pre_test, a_lt_b_vs_neg_a_plus_b)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 1 ssa_6 = flt ssa_5, ssa_3
+    *
+    * if ssa_6 {
+    *    vec1 32 ssa_7 = fneg ssa_5
+    *    vec1 32 ssa_8 = fadd ssa_7, ssa_3
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 32 ssa_9 = fneg ssa_5
+    * vec1 32 ssa_10 = fadd ssa_3, ssa_9
+    * vec1 32 ssa_11 = load_const (0.0)
+    * vec1 1 ssa_12 = flt ssa_11, ssa_10
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    *    vec1 32 ssa_7 = fneg ssa_5
+    * } else {
+    * }
+    */
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, a, one);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, nir_fneg(&bld, a), one);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, a_lt_b_vs_a_minus_b)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 1 ssa_6 = flt ssa_3, ssa_5
+    *
+    * if ssa_6 {
+    *    vec1 32 ssa_7 = fneg ssa_5
+    *    vec1 32 ssa_8 = fadd ssa_3, ssa_7
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 32 ssa_9 = fneg ssa_5
+    * vec1 32 ssa_10 = fadd ssa_3, ssa_9
+    * vec1 32 ssa_11 = load_const (0.0)
+    * vec1 1 ssa_12 = flt ssa_10, ssa_11
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    *    vec1 32 ssa_7 = fneg ssa_5
+    * } else {
+    * }
+    */
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *b = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, one, b);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, one, nir_fneg(&bld, b));
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, neg_a_lt_b_vs_a_plus_b)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 32 ssa_6 = fneg ssa_5
+    * vec1 1 ssa_7 = flt ssa_6, ssa_3
+    *
+    * if ssa_7 {
+    *    vec1 32 ssa_8 = fadd ssa_5, ssa_3
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 32 ssa_9 = fneg ssa_5
+    * vec1 32 ssa_9 = fneg ssa_6
+    * vec1 32 ssa_10 = fadd ssa_3, ssa_9
+    * vec1 32 ssa_11 = load_const ( 0.0)
+    * vec1 1 ssa_12 = flt ssa_11, ssa_10
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    * } else {
+    * }
+    */
+
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, nir_fneg(&bld, a), one);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, a, one);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, a_lt_neg_b_vs_a_plus_b)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 32 ssa_6 = fneg ssa_5
+    * vec1 1 ssa_7 = flt ssa_3, ssa_6
+    *
+    * if ssa_7 {
+    *    vec1 32 ssa_8 = fadd ssa_3, ssa_5
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 32 ssa_9 = fneg ssa_5
+    * vec1 32 ssa_9 = fneg ssa_6
+    * vec1 32 ssa_10 = fadd ssa_3, ssa_9
+    * vec1 32 ssa_11 = load_const ( 0.0)
+    * vec1 1 ssa_12 = flt ssa_10, ssa_11
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    * } else {
+    * }
+    */
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *b = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, one, nir_fneg(&bld, b));
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, one, b);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, imm_lt_b_vs_neg_imm_plus_b)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 1 ssa_7 = flt ssa_3, ssa_6
+    *
+    * if ssa_7 {
+    *    vec1 32 ssa_8 = fadd ssa_4, ssa_6
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 32 ssa_9 = fneg ssa_3
+    * vec1 32 ssa_10 = fadd ssa_6, ssa_9
+    * vec1 32 ssa_11 = load_const ( 0.0)
+    * vec1 1 ssa_12 = flt ssa_11, ssa_10
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    * } else {
+    * }
+    */
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f);
+   nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, one, a);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, neg_one, a);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, a_lt_imm_vs_a_minus_imm)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 1 ssa_7 = flt ssa_6, ssa_3
+    *
+    * if ssa_6 {
+    *    vec1 32 ssa_8 = fadd ssa_6, ssa_4
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 32 ssa_9 = fneg ssa_3
+    * vec1 32 ssa_10 = fadd ssa_6, ssa_9
+    * vec1 32 ssa_11 = load_const ( 0.0)
+    * vec1 1 ssa_12 = flt ssa_10, ssa_11
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    * } else {
+    * }
+    */
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f);
+   nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, a, one);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, a, neg_one);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, neg_imm_lt_a_vs_a_plus_imm)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 1 ssa_7 = flt ssa_4, ssa_6
+    *
+    * if ssa_7 {
+    *    vec1 32 ssa_8 = fadd ssa_6, ssa_3
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 32 ssa_9 = fneg ssa_4
+    * vec1 32 ssa_10 = fadd ssa_6, ssa_9
+    * vec1 32 ssa_11 = load_const ( 0.0)
+    * vec1 1 ssa_12 = flt ssa_11, ssa_10
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    * } else {
+    * }
+    */
+
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f);
+   nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, neg_one, a);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, a, one);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, a_lt_neg_imm_vs_a_plus_imm)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 1 ssa_7 = flt ssa_6, ssa_4
+    *
+    * if ssa_7 {
+    *    vec1 32 ssa_8 = fadd ssa_6, ssa_3
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 32 ssa_9 = fneg ssa_4
+    * vec1 32 ssa_10 = fadd ssa_6, ssa_9
+    * vec1 32 ssa_11 = load_const ( 0.0)
+    * vec1 1 ssa_12 = flt ssa_10, ssa_11
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    * } else {
+    * }
+    */
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f);
+   nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, a, neg_one);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, a, one);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, non_scalar_add_result)
+{
+   /* The optimization pass should not do anything because the result of the
+    * fadd is not a scalar.
+    *
+    * Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec4 32 ssa_3 = fadd ssa_0, ssa_2
+    * vec1 1 ssa_4 = flt ssa_0.x, ssa_3.x
+    *
+    * if ssa_4 {
+    *    vec2 32 ssa_5 = fadd ssa_1.xx, ssa_3.xx
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * No change.
+    */
+   nir_ssa_def *a = nir_fadd(&bld, v1, v3);
+
+   nir_alu_instr *flt = nir_alu_instr_create(bld.shader, nir_op_flt);
+
+   flt->src[0].src = nir_src_for_ssa(v1);
+   flt->src[1].src = nir_src_for_ssa(a);
+
+   memcpy(&flt->src[0].swizzle, xxxx, sizeof(xxxx));
+   memcpy(&flt->src[1].swizzle, xxxx, sizeof(xxxx));
+
+   nir_builder_alu_instr_finish_and_insert(&bld, flt);
+
+   flt->dest.dest.ssa.num_components = 1;
+   flt->dest.write_mask = 1;
+
+   nir_if *nif = nir_push_if(&bld, &flt->dest.dest.ssa);
+
+   nir_alu_instr *fadd = nir_alu_instr_create(bld.shader, nir_op_fadd);
+
+   fadd->src[0].src = nir_src_for_ssa(v2);
+   fadd->src[1].src = nir_src_for_ssa(a);
+
+   memcpy(&fadd->src[0].swizzle, xxxx, sizeof(xxxx));
+   memcpy(&fadd->src[1].swizzle, xxxx, sizeof(xxxx));
+
+   nir_builder_alu_instr_finish_and_insert(&bld, fadd);
+
+   fadd->dest.dest.ssa.num_components = 2;
+   fadd->dest.write_mask = 3;
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl));
+}
diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c
index 5e91f8815e8..e993ec32382 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1422,15 +1422,17 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
             default:
                break;
             }
-         }
-
-         if (storage_class == SpvStorageClassWorkgroup &&
-             b->options->lower_workgroup_access_to_offsets) {
+         } else if (storage_class == SpvStorageClassWorkgroup &&
+                    b->options->lower_workgroup_access_to_offsets) {
+            /* Workgroup is laid out by the implementation. */
             uint32_t size, align;
             val->type->deref = vtn_type_layout_std430(b, val->type->deref,
                                                       &size, &align);
             val->type->length = size;
             val->type->align = align;
+
+            /* Override any ArrayStride previously set. */
+            val->type->stride = vtn_align_u32(size, align);
          }
       }
       break;
@@ -2089,19 +2091,17 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
          vtn_value(b, w[4], vtn_value_type_pointer)->pointer;
       return;
    } else if (opcode == SpvOpImage) {
-      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_pointer);
       struct vtn_value *src_val = vtn_untyped_value(b, w[3]);
       if (src_val->value_type == vtn_value_type_sampled_image) {
-         val->pointer = src_val->sampled_image->image;
+         vtn_push_value_pointer(b, w[2], src_val->sampled_image->image);
       } else {
          vtn_assert(src_val->value_type == vtn_value_type_pointer);
-         val->pointer = src_val->pointer;
+         vtn_push_value_pointer(b, w[2], src_val->pointer);
       }
       return;
    }
 
    struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type;
-   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
 
    struct vtn_sampled_image sampled;
    struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]);
@@ -2415,8 +2415,9 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
       }
    }
 
-   val->ssa = vtn_create_ssa_value(b, ret_type->type);
-   val->ssa->def = &instr->dest.ssa;
+   struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, ret_type->type);
+   ssa->def = &instr->dest.ssa;
+   vtn_push_ssa(b, w[2], ret_type, ssa);
 
    nir_builder_instr_insert(&b->nb, &instr->instr);
 }
@@ -2606,6 +2607,8 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
       intrin->src[2] = nir_src_for_ssa(image.sample);
    }
 
+   nir_intrinsic_set_access(intrin, image.image->access);
+
    switch (opcode) {
    case SpvOpAtomicLoad:
    case SpvOpImageQuerySize:
@@ -2644,7 +2647,6 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
    }
 
    if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) {
-      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
       struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
 
       unsigned dest_components = glsl_get_vector_elements(type->type);
@@ -2661,7 +2663,8 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
       if (intrin->num_components != dest_components)
          result = nir_channels(&b->nb, result, (1 << dest_components) - 1);
 
-      val->ssa = vtn_create_ssa_value(b, type->type);
+      struct vtn_value *val =
+         vtn_push_ssa(b, w[2], type, vtn_create_ssa_value(b, type->type));
       val->ssa->def = result;
    } else {
       nir_builder_instr_insert(&b->nb, &intrin->instr);
@@ -2972,10 +2975,10 @@ vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode,
                         glsl_get_vector_elements(type->type),
                         glsl_get_bit_size(type->type), NULL);
 
-      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
-      val->ssa = rzalloc(b, struct vtn_ssa_value);
-      val->ssa->def = &atomic->dest.ssa;
-      val->ssa->type = type->type;
+      struct vtn_ssa_value *ssa = rzalloc(b, struct vtn_ssa_value);
+      ssa->def = &atomic->dest.ssa;
+      ssa->type = type->type;
+      vtn_push_ssa(b, w[2], type, ssa);
    }
 
    nir_builder_instr_insert(&b->nb, &atomic->instr);
@@ -3215,65 +3218,65 @@ static void
 vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
                      const uint32_t *w, unsigned count)
 {
-   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
-   const struct glsl_type *type =
-      vtn_value(b, w[1], vtn_value_type_type)->type->type;
-   val->ssa = vtn_create_ssa_value(b, type);
+   struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
+   struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type);
 
    switch (opcode) {
    case SpvOpVectorExtractDynamic:
-      val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def,
-                                                 vtn_ssa_value(b, w[4])->def);
+      ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def,
+                                            vtn_ssa_value(b, w[4])->def);
       break;
 
    case SpvOpVectorInsertDynamic:
-      val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def,
-                                                vtn_ssa_value(b, w[4])->def,
-                                                vtn_ssa_value(b, w[5])->def);
+      ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def,
+                                           vtn_ssa_value(b, w[4])->def,
+                                           vtn_ssa_value(b, w[5])->def);
       break;
 
    case SpvOpVectorShuffle:
-      val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type),
-                                         vtn_ssa_value(b, w[3])->def,
-                                         vtn_ssa_value(b, w[4])->def,
-                                         w + 5);
+      ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type->type),
+                                    vtn_ssa_value(b, w[3])->def,
+                                    vtn_ssa_value(b, w[4])->def,
+                                    w + 5);
       break;
 
    case SpvOpCompositeConstruct: {
       unsigned elems = count - 3;
       assume(elems >= 1);
-      if (glsl_type_is_vector_or_scalar(type)) {
+      if (glsl_type_is_vector_or_scalar(type->type)) {
          nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS];
          for (unsigned i = 0; i < elems; i++)
             srcs[i] = vtn_ssa_value(b, w[3 + i])->def;
-         val->ssa->def =
-            vtn_vector_construct(b, glsl_get_vector_elements(type),
+         ssa->def =
+            vtn_vector_construct(b, glsl_get_vector_elements(type->type),
                                  elems, srcs);
       } else {
-         val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
+         ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
          for (unsigned i = 0; i < elems; i++)
-            val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
+            ssa->elems[i] = vtn_ssa_value(b, w[3 + i]);
       }
       break;
    }
    case SpvOpCompositeExtract:
-      val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
-                                       w + 4, count - 4);
+      ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]),
+                                  w + 4, count - 4);
       break;
 
    case SpvOpCompositeInsert:
-      val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
-                                      vtn_ssa_value(b, w[3]),
-                                      w + 5, count - 5);
+      ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
+                                 vtn_ssa_value(b, w[3]),
+                                 w + 5, count - 5);
       break;
 
    case SpvOpCopyObject:
-      val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
+      ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
       break;
 
    default:
       vtn_fail_with_opcode("unknown composite operation", opcode);
    }
+
+   vtn_push_ssa(b, w[2], type, ssa);
 }
 
 static void
@@ -3389,13 +3392,13 @@ vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode,
    }
 
    case SpvOpControlBarrier: {
-      SpvScope execution_scope = vtn_constant_uint(b, w[1]);
-      if (execution_scope == SpvScopeWorkgroup)
-         vtn_emit_barrier(b, nir_intrinsic_barrier);
-
       SpvScope memory_scope = vtn_constant_uint(b, w[2]);
       SpvMemorySemanticsMask memory_semantics = vtn_constant_uint(b, w[3]);
       vtn_emit_memory_barrier(b, memory_scope, memory_semantics);
+
+      SpvScope execution_scope = vtn_constant_uint(b, w[1]);
+      if (execution_scope == SpvScopeWorkgroup)
+         vtn_emit_barrier(b, nir_intrinsic_barrier);
       break;
    }
 
diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c
index 323e3da52c4..b5d4aeb5210 100644
--- a/src/compiler/spirv/vtn_cfg.c
+++ b/src/compiler/spirv/vtn_cfg.c
@@ -328,17 +328,12 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode,
       } else if (type->base_type == vtn_base_type_pointer &&
                  type->type != NULL) {
          /* This is a pointer with an actual storage type */
-         struct vtn_value *val =
-            vtn_push_value(b, w[2], vtn_value_type_pointer);
          nir_ssa_def *ssa_ptr = nir_load_param(&b->nb, b->func_param_idx++);
-         val->pointer = vtn_pointer_from_ssa(b, ssa_ptr, type);
+         vtn_push_value_pointer(b, w[2], vtn_pointer_from_ssa(b, ssa_ptr, type));
       } else if (type->base_type == vtn_base_type_pointer ||
                  type->base_type == vtn_base_type_image ||
                  type->base_type == vtn_base_type_sampler) {
-         struct vtn_value *val =
-            vtn_push_value(b, w[2], vtn_value_type_pointer);
-         val->pointer =
-            vtn_load_param_pointer(b, type, b->func_param_idx++);
+         vtn_push_value_pointer(b, w[2], vtn_load_param_pointer(b, type, b->func_param_idx++));
       } else {
          /* We're a regular SSA value. */
          struct vtn_ssa_value *value = vtn_create_ssa_value(b, type->type);
diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h
index cfe2893e04f..0f1f30e3d0a 100644
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -269,6 +269,9 @@ struct vtn_ssa_value {
    struct vtn_ssa_value *transposed;
 
    const struct glsl_type *type;
+
+   /* Access qualifiers */
+   enum gl_access_qualifier access;
 };
 
 enum vtn_base_type {
@@ -416,6 +419,9 @@ struct vtn_access_chain {
     */
    bool ptr_as_array;
 
+   /* Access qualifiers */
+   enum gl_access_qualifier access;
+
    /** Struct elements and array offsets.
     *
     * This is an array of 1 so that it can conveniently be created on the
@@ -645,6 +651,10 @@ vtn_untyped_value(struct vtn_builder *b, uint32_t value_id)
    return &b->values[value_id];
 }
 
+/* Consider not using this function directly and instead use
+ * vtn_push_ssa/vtn_push_value_pointer so that appropriate applying of
+ * decorations is handled by common code.
+ */
 static inline struct vtn_value *
 vtn_push_value(struct vtn_builder *b, uint32_t value_id,
                enum vtn_value_type value_type)
@@ -656,22 +666,8 @@ vtn_push_value(struct vtn_builder *b, uint32_t value_id,
                value_id);
 
    val->value_type = value_type;
-   return &b->values[value_id];
-}
 
-static inline struct vtn_value *
-vtn_push_ssa(struct vtn_builder *b, uint32_t value_id,
-             struct vtn_type *type, struct vtn_ssa_value *ssa)
-{
-   struct vtn_value *val;
-   if (type->base_type == vtn_base_type_pointer) {
-      val = vtn_push_value(b, value_id, vtn_value_type_pointer);
-      val->pointer = vtn_pointer_from_ssa(b, ssa->def, type);
-   } else {
-      val = vtn_push_value(b, value_id, vtn_value_type_ssa);
-      val->ssa = ssa;
-   }
-   return val;
+   return &b->values[value_id];
 }
 
 static inline struct vtn_value *
@@ -706,8 +702,43 @@ vtn_constant_uint(struct vtn_builder *b, uint32_t value_id)
    }
 }
 
+static inline enum gl_access_qualifier vtn_value_access(struct vtn_value *value)
+{
+   switch (value->value_type) {
+   case vtn_value_type_invalid:
+   case vtn_value_type_undef:
+   case vtn_value_type_string:
+   case vtn_value_type_decoration_group:
+   case vtn_value_type_constant:
+   case vtn_value_type_function:
+   case vtn_value_type_block:
+   case vtn_value_type_extension:
+      return 0;
+   case vtn_value_type_type:
+      return value->type->access;
+   case vtn_value_type_pointer:
+      return value->pointer->access;
+   case vtn_value_type_ssa:
+      return value->ssa->access;
+   case vtn_value_type_image_pointer:
+      return value->image->image->access;
+   case vtn_value_type_sampled_image:
+      return value->sampled_image->image->access |
+         value->sampled_image->sampler->access;
+   }
+
+   unreachable("invalid type");
+}
+
 struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id);
 
+struct vtn_value *vtn_push_value_pointer(struct vtn_builder *b,
+                                         uint32_t value_id,
+                                         struct vtn_pointer *ptr);
+
+struct vtn_value *vtn_push_ssa(struct vtn_builder *b, uint32_t value_id,
+                               struct vtn_type *type, struct vtn_ssa_value *ssa);
+
 struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b,
                                            const struct glsl_type *type);
 
diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c
index 6fbe6900e48..aa9ee39ba78 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -30,6 +30,52 @@
 #include "nir_deref.h"
 #include <vulkan/vulkan_core.h>
 
+static void ptr_decoration_cb(struct vtn_builder *b,
+                              struct vtn_value *val, int member,
+                              const struct vtn_decoration *dec,
+                              void *void_ptr);
+
+struct vtn_value *
+vtn_push_value_pointer(struct vtn_builder *b, uint32_t value_id,
+                       struct vtn_pointer *ptr)
+{
+   struct vtn_value *val = vtn_push_value(b, value_id, vtn_value_type_pointer);
+   val->pointer = ptr;
+   vtn_foreach_decoration(b, val, ptr_decoration_cb, ptr);
+   return val;
+}
+
+static void
+ssa_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
+                  const struct vtn_decoration *dec, void *void_ssa)
+{
+   struct vtn_ssa_value *ssa = void_ssa;
+
+   switch (dec->decoration) {
+   case SpvDecorationNonUniformEXT:
+      ssa->access |= ACCESS_NON_UNIFORM;
+      break;
+
+   default:
+      break;
+   }
+}
+
+struct vtn_value *
+vtn_push_ssa(struct vtn_builder *b, uint32_t value_id,
+             struct vtn_type *type, struct vtn_ssa_value *ssa)
+{
+   struct vtn_value *val;
+   if (type->base_type == vtn_base_type_pointer) {
+      val = vtn_push_value_pointer(b, value_id, vtn_pointer_from_ssa(b, ssa->def, type));
+   } else {
+      val = vtn_push_value(b, value_id, vtn_value_type_ssa);
+      val->ssa = ssa;
+      vtn_foreach_decoration(b, val, ssa_decoration_cb, val->ssa);
+   }
+   return val;
+}
+
 static struct vtn_access_chain *
 vtn_access_chain_create(struct vtn_builder *b, unsigned length)
 {
@@ -189,7 +235,7 @@ vtn_nir_deref_pointer_dereference(struct vtn_builder *b,
                                   struct vtn_access_chain *deref_chain)
 {
    struct vtn_type *type = base->type;
-   enum gl_access_qualifier access = base->access;
+   enum gl_access_qualifier access = base->access | deref_chain->access;
    unsigned idx = 0;
 
    nir_deref_instr *tail;
@@ -2349,6 +2395,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
    case SpvOpInBoundsAccessChain:
    case SpvOpInBoundsPtrAccessChain: {
       struct vtn_access_chain *chain = vtn_access_chain_create(b, count - 4);
+      enum gl_access_qualifier access = 0;
       chain->ptr_as_array = (opcode == SpvOpPtrAccessChain || opcode == SpvOpInBoundsPtrAccessChain);
 
       unsigned idx = 0;
@@ -2376,8 +2423,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
          } else {
             chain->link[idx].mode = vtn_access_mode_id;
             chain->link[idx].id = w[i];
-
          }
+         access |= vtn_value_access(link_val);
          idx++;
       }
 
@@ -2404,11 +2451,11 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
                                 val->sampled_image->sampler);
       } else {
          vtn_assert(base_val->value_type == vtn_value_type_pointer);
-         struct vtn_value *val =
-            vtn_push_value(b, w[2], vtn_value_type_pointer);
-         val->pointer = vtn_pointer_dereference(b, base_val->pointer, chain);
-         val->pointer->ptr_type = ptr_type;
-         vtn_foreach_decoration(b, val, ptr_decoration_cb, val->pointer);
+         struct vtn_pointer *ptr =
+            vtn_pointer_dereference(b, base_val->pointer, chain);
+         ptr->ptr_type = ptr_type;
+         ptr->access |= access;
+         vtn_push_value_pointer(b, w[2], ptr);
       }
       break;
    }
@@ -2433,7 +2480,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
 
       if (glsl_type_is_image(res_type->type) ||
           glsl_type_is_sampler(res_type->type)) {
-         vtn_push_value(b, w[2], vtn_value_type_pointer)->pointer = src;
+         vtn_push_value_pointer(b, w[2], src);
          return;
       }
 
@@ -2545,10 +2592,11 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
                   "scalar type");
 
       /* The pointer will be converted to an SSA value automatically */
-      nir_ssa_def *ptr_ssa = vtn_ssa_value(b, w[3])->def;
+      struct vtn_ssa_value *ptr_ssa = vtn_ssa_value(b, w[3]);
 
       u_val->ssa = vtn_create_ssa_value(b, u_val->type->type);
-      u_val->ssa->def = nir_sloppy_bitcast(&b->nb, ptr_ssa, u_val->type->type);
+      u_val->ssa->def = nir_sloppy_bitcast(&b->nb, ptr_ssa->def, u_val->type->type);
+      u_val->ssa->access |= ptr_ssa->access;
       break;
    }
 
@@ -2568,6 +2616,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
       nir_ssa_def *ptr_ssa = nir_sloppy_bitcast(&b->nb, u_val->ssa->def,
                                                 ptr_val->type->type);
       ptr_val->pointer = vtn_pointer_from_ssa(b, ptr_ssa, ptr_val->type);
+      vtn_foreach_decoration(b, ptr_val, ptr_decoration_cb, ptr_val->pointer);
+      ptr_val->pointer->access |= u_val->ssa->access;
       break;
    }
 
diff --git a/src/egl/Android.mk b/src/egl/Android.mk
index a9319f56ae7..d69be3350bd 100644
--- a/src/egl/Android.mk
+++ b/src/egl/Android.mk
@@ -71,6 +71,10 @@ ifeq ($(BOARD_USES_DRM_GRALLOC),true)
 	LOCAL_SHARED_LIBRARIES += libgralloc_drm
 endif
 
+ifeq ($(strip $(BOARD_USES_GRALLOC1)),true)
+LOCAL_CFLAGS += -DHAVE_GRALLOC1
+endif
+
 ifeq ($(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5 6 7),)
 LOCAL_SHARED_LIBRARIES += libnativewindow
 endif
@@ -88,6 +92,6 @@ endif
 
 LOCAL_MODULE := libGLES_mesa
 LOCAL_MODULE_RELATIVE_PATH := egl
-
+LOCAL_CFLAGS += -Wno-error
 include $(MESA_COMMON_MK)
 include $(BUILD_SHARED_LIBRARY)
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index d584bccdebe..0efd30a144f 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -65,6 +65,7 @@
 #include "util/u_atomic.h"
 #include "util/u_vector.h"
 #include "mapi/glapi/glapi.h"
+#include "util/bitscan.h"
 
 /* Additional definitions not yet in the drm_fourcc.h.
  */
@@ -152,11 +153,7 @@ const __DRIuseInvalidateExtension use_invalidate = {
 static const EGLint dri2_to_egl_attribute_map[__DRI_ATTRIB_MAX] = {
    [__DRI_ATTRIB_BUFFER_SIZE ]          = EGL_BUFFER_SIZE,
    [__DRI_ATTRIB_LEVEL]                 = EGL_LEVEL,
-   [__DRI_ATTRIB_RED_SIZE]              = EGL_RED_SIZE,
-   [__DRI_ATTRIB_GREEN_SIZE]            = EGL_GREEN_SIZE,
-   [__DRI_ATTRIB_BLUE_SIZE]             = EGL_BLUE_SIZE,
    [__DRI_ATTRIB_LUMINANCE_SIZE]        = EGL_LUMINANCE_SIZE,
-   [__DRI_ATTRIB_ALPHA_SIZE]            = EGL_ALPHA_SIZE,
    [__DRI_ATTRIB_DEPTH_SIZE]            = EGL_DEPTH_SIZE,
    [__DRI_ATTRIB_STENCIL_SIZE]          = EGL_STENCIL_SIZE,
    [__DRI_ATTRIB_SAMPLE_BUFFERS]        = EGL_SAMPLE_BUFFERS,
@@ -191,10 +188,50 @@ dri2_match_config(const _EGLConfig *conf, const _EGLConfig *criteria)
    return EGL_TRUE;
 }
 
+void
+dri2_get_shifts_and_sizes(const __DRIcoreExtension *core,
+                          const __DRIconfig *config, int *shifts,
+		          unsigned int *sizes)
+{
+   unsigned int mask;
+
+   if (core->getConfigAttrib(config, __DRI_ATTRIB_RED_SHIFT, (unsigned int *)&shifts[0])) {
+      core->getConfigAttrib(config, __DRI_ATTRIB_GREEN_SHIFT, (unsigned int *)&shifts[1]);
+      core->getConfigAttrib(config, __DRI_ATTRIB_BLUE_SHIFT, (unsigned int *)&shifts[2]);
+      core->getConfigAttrib(config, __DRI_ATTRIB_ALPHA_SHIFT, (unsigned int *)&shifts[3]);
+   } else {
+      /* Driver isn't exposing shifts, so convert masks to shifts */
+      core->getConfigAttrib(config, __DRI_ATTRIB_RED_MASK, &mask);
+      shifts[0] = ffs(mask) - 1;
+      core->getConfigAttrib(config, __DRI_ATTRIB_GREEN_MASK, &mask);
+      shifts[1] = ffs(mask) - 1;
+      core->getConfigAttrib(config, __DRI_ATTRIB_BLUE_MASK, &mask);
+      shifts[2] = ffs(mask) - 1;
+      core->getConfigAttrib(config, __DRI_ATTRIB_ALPHA_MASK, &mask);
+      shifts[3] = ffs(mask) - 1;
+   }
+
+   core->getConfigAttrib(config, __DRI_ATTRIB_RED_SIZE, &sizes[0]);
+   core->getConfigAttrib(config, __DRI_ATTRIB_GREEN_SIZE, &sizes[1]);
+   core->getConfigAttrib(config, __DRI_ATTRIB_BLUE_SIZE, &sizes[2]);
+   core->getConfigAttrib(config, __DRI_ATTRIB_ALPHA_SIZE, &sizes[3]);
+}
+
+void
+dri2_get_render_type_float(const __DRIcoreExtension *core,
+                           const __DRIconfig *config,
+                           bool *is_float)
+{
+   unsigned int render_type;
+
+   core->getConfigAttrib(config, __DRI_ATTRIB_RENDER_TYPE, &render_type);
+   *is_float = (render_type & __DRI_ATTRIB_FLOAT_BIT) ? true : false;
+}
+
 struct dri2_egl_config *
 dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
                 EGLint surface_type, const EGLint *attr_list,
-                const unsigned int *rgba_masks)
+                const int *rgba_shifts, const unsigned int *rgba_sizes)
 {
    struct dri2_egl_config *conf;
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
@@ -202,7 +239,8 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
    unsigned int attrib, value, double_buffer;
    bool srgb = false;
    EGLint key, bind_to_texture_rgb, bind_to_texture_rgba;
-   unsigned int dri_masks[4] = { 0, 0, 0, 0 };
+   int dri_shifts[4] = { -1, -1, -1, -1 };
+   unsigned int dri_sizes[4] = { 0, 0, 0, 0 };
    _EGLConfig *matching_config;
    EGLint num_configs = 0;
    EGLint config_id;
@@ -219,6 +257,9 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
 
       switch (attrib) {
       case __DRI_ATTRIB_RENDER_TYPE:
+         if (value & __DRI_ATTRIB_FLOAT_BIT)
+            _eglSetConfigKey(&base, EGL_COLOR_COMPONENT_TYPE_EXT,
+                             EGL_COLOR_COMPONENT_TYPE_FLOAT_EXT);
          if (value & __DRI_ATTRIB_RGBA_BIT)
             value = EGL_RGB_BUFFER;
          else if (value & __DRI_ATTRIB_LUMINANCE_BIT)
@@ -250,20 +291,56 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
          double_buffer = value;
          break;
 
+      case __DRI_ATTRIB_RED_SIZE:
+         dri_sizes[0] = value;
+         _eglSetConfigKey(&base, EGL_RED_SIZE, value);
+         break;
+
       case __DRI_ATTRIB_RED_MASK:
-         dri_masks[0] = value;
+         dri_shifts[0] = ffs(value) - 1;
+         break;
+
+      case __DRI_ATTRIB_RED_SHIFT:
+         dri_shifts[0] = value;
+         break;
+
+      case __DRI_ATTRIB_GREEN_SIZE:
+         dri_sizes[1] = value;
+         _eglSetConfigKey(&base, EGL_GREEN_SIZE, value);
          break;
 
       case __DRI_ATTRIB_GREEN_MASK:
-         dri_masks[1] = value;
+         dri_shifts[1] = ffs(value) - 1;
+         break;
+
+      case __DRI_ATTRIB_GREEN_SHIFT:
+         dri_shifts[1] = value;
+         break;
+
+      case __DRI_ATTRIB_BLUE_SIZE:
+         dri_sizes[2] = value;
+         _eglSetConfigKey(&base, EGL_BLUE_SIZE, value);
          break;
 
       case __DRI_ATTRIB_BLUE_MASK:
-         dri_masks[2] = value;
+         dri_shifts[2] = ffs(value) - 1;
+         break;
+
+      case __DRI_ATTRIB_BLUE_SHIFT:
+         dri_shifts[2] = value;
+         break;
+
+     case __DRI_ATTRIB_ALPHA_SIZE:
+         dri_sizes[3] = value;
+         _eglSetConfigKey(&base, EGL_ALPHA_SIZE, value);
          break;
 
       case __DRI_ATTRIB_ALPHA_MASK:
-         dri_masks[3] = value;
+         dri_shifts[3] = ffs(value) - 1;
+         break;
+
+      case __DRI_ATTRIB_ALPHA_SHIFT:
+         dri_shifts[3] = value;
          break;
 
       case __DRI_ATTRIB_ACCUM_RED_SIZE:
@@ -305,7 +382,10 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
       for (int i = 0; attr_list[i] != EGL_NONE; i += 2)
          _eglSetConfigKey(&base, attr_list[i], attr_list[i+1]);
 
-   if (rgba_masks && memcmp(rgba_masks, dri_masks, sizeof(dri_masks)))
+   if (rgba_shifts && memcmp(rgba_shifts, dri_shifts, sizeof(dri_shifts)))
+      return NULL;
+
+   if (rgba_sizes && memcmp(rgba_sizes, dri_sizes, sizeof(dri_sizes)))
       return NULL;
 
    base.NativeRenderable = EGL_TRUE;
@@ -1424,6 +1504,37 @@ dri2_surf_update_fence_fd(_EGLContext *ctx,
    dri2_surface_set_out_fence_fd(surf, fence_fd);
 }
 
+EGLBoolean
+dri2_create_drawable(struct dri2_egl_display *dri2_dpy,
+                     const __DRIconfig *config,
+                     struct dri2_egl_surface *dri2_surf)
+{
+   __DRIcreateNewDrawableFunc createNewDrawable;
+   void *loaderPrivate = dri2_surf;
+
+   if (dri2_dpy->image_driver)
+      createNewDrawable = dri2_dpy->image_driver->createNewDrawable;
+   else if (dri2_dpy->dri2)
+      createNewDrawable = dri2_dpy->dri2->createNewDrawable;
+   else if (dri2_dpy->swrast)
+      createNewDrawable = dri2_dpy->swrast->createNewDrawable;
+   else
+      return _eglError(EGL_BAD_ALLOC, "no createNewDrawable");
+
+   /* As always gbm is a bit special.. */
+#ifdef HAVE_DRM_PLATFORM
+   if (dri2_surf->gbm_surf)
+      loaderPrivate = dri2_surf->gbm_surf;
+#endif
+
+   dri2_surf->dri_drawable = (*createNewDrawable)(dri2_dpy->dri_screen,
+                                                  config, loaderPrivate);
+   if (dri2_surf->dri_drawable == NULL)
+      return _eglError(EGL_BAD_ALLOC, "createNewDrawable");
+
+   return EGL_TRUE;
+}
+
 /**
  * Called via eglMakeCurrent(), drv->API.MakeCurrent().
  */
@@ -2244,6 +2355,8 @@ dri2_num_fourcc_format_planes(EGLint format)
    case DRM_FORMAT_ABGR2101010:
    case DRM_FORMAT_RGBA1010102:
    case DRM_FORMAT_BGRA1010102:
+   case DRM_FORMAT_XBGR16161616F:
+   case DRM_FORMAT_ABGR16161616F:
    case DRM_FORMAT_YUYV:
    case DRM_FORMAT_YVYU:
    case DRM_FORMAT_UYVY:
@@ -2627,21 +2740,39 @@ dri2_export_dma_buf_image_query_mesa(_EGLDriver *drv, _EGLDisplay *disp,
 {
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    struct dri2_egl_image *dri2_img = dri2_egl_image(img);
+   int num_planes;
 
    (void) drv;
 
    if (!dri2_can_export_dma_buf_image(disp, img))
       return EGL_FALSE;
 
+   dri2_dpy->image->queryImage(dri2_img->dri_image,
+                               __DRI_IMAGE_ATTRIB_NUM_PLANES, &num_planes);
    if (nplanes)
-      dri2_dpy->image->queryImage(dri2_img->dri_image,
-                                  __DRI_IMAGE_ATTRIB_NUM_PLANES, nplanes);
+     *nplanes = num_planes;
+
    if (fourcc)
       dri2_dpy->image->queryImage(dri2_img->dri_image,
                                   __DRI_IMAGE_ATTRIB_FOURCC, fourcc);
 
-   if (modifiers)
-      *modifiers = 0;
+   if (modifiers) {
+      int mod_hi, mod_lo;
+      uint64_t modifier = DRM_FORMAT_MOD_INVALID;
+      bool query;
+
+      query = dri2_dpy->image->queryImage(dri2_img->dri_image,
+                                          __DRI_IMAGE_ATTRIB_MODIFIER_UPPER,
+                                          &mod_hi);
+      query &= dri2_dpy->image->queryImage(dri2_img->dri_image,
+                                           __DRI_IMAGE_ATTRIB_MODIFIER_LOWER,
+                                           &mod_lo);
+      if (query)
+         modifier = combine_u32_into_u64 (mod_hi, mod_lo);
+
+      for (int i = 0; i < num_planes; i++)
+        modifiers[i] = modifier;
+   }
 
    return EGL_TRUE;
 }
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index aa143deb867..2d47b3a195a 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -69,6 +69,10 @@ struct zwp_linux_dmabuf_v1;
 #include <hardware/gralloc.h>
 #endif /* HAVE_ANDROID_PLATFORM */
 
+#ifdef HAVE_GRALLOC1
+#include <hardware/gralloc1.h>
+#endif
+
 #include "eglconfig.h"
 #include "eglcontext.h"
 #include "egldevice.h"
@@ -239,6 +243,13 @@ struct dri2_egl_display
 
 #ifdef HAVE_ANDROID_PLATFORM
    const gralloc_module_t *gralloc;
+#ifdef HAVE_GRALLOC1
+   uint16_t gralloc_version;
+   gralloc1_device_t *gralloc1_dvc;
+   GRALLOC1_PFN_LOCK_FLEX pfn_lockflex;
+   GRALLOC1_PFN_GET_FORMAT pfn_getFormat;
+   GRALLOC1_PFN_UNLOCK pfn_unlock;
+#endif
 #endif
 
    bool                      is_render_node;
@@ -402,10 +413,20 @@ dri2_surface_get_dri_drawable(_EGLSurface *surf);
 __DRIimage *
 dri2_lookup_egl_image(__DRIscreen *screen, void *image, void *data);
 
+void
+dri2_get_shifts_and_sizes(const __DRIcoreExtension *core,
+		          const __DRIconfig *config, int *shifts,
+			  unsigned int *sizes);
+
+void
+dri2_get_render_type_float(const __DRIcoreExtension *core,
+                           const __DRIconfig *config,
+                           bool *is_float);
+
 struct dri2_egl_config *
 dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
                 EGLint surface_type, const EGLint *attr_list,
-                const unsigned int *rgba_masks);
+                const int *rgba_shifts, const unsigned int *rgba_sizes);
 
 _EGLImage *
 dri2_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
@@ -540,6 +561,11 @@ dri2_init_surface(_EGLSurface *surf, _EGLDisplay *disp, EGLint type,
 void
 dri2_fini_surface(_EGLSurface *surf);
 
+EGLBoolean
+dri2_create_drawable(struct dri2_egl_display *dri2_dpy,
+                     const __DRIconfig *config,
+                     struct dri2_egl_surface *dri2_surf);
+
 static inline uint64_t
 combine_u32_into_u64(uint32_t hi, uint32_t lo)
 {
diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c
index e9ea9e6002b..6e2e0fd3125 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -64,14 +64,26 @@ struct droid_yuv_format {
    int fourcc; /* __DRI_IMAGE_FOURCC_ */
 };
 
+/* This enumeration can be deleted if Android defined it in
+ * system/core/include/system/graphics.h
+ */
+enum {
+   HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL = 0x100,
+   HAL_PIXEL_FORMAT_NV12 = 0x10F,
+   HAL_PIXEL_FORMAT_P010_INTEL = 0x110
+};
+
 /* The following table is used to look up a DRI image FourCC based
  * on native format and information contained in android_ycbcr struct. */
 static const struct droid_yuv_format droid_yuv_formats[] = {
    /* Native format, YCrCb, Chroma step, DRI image FourCC */
    { HAL_PIXEL_FORMAT_YCbCr_420_888, YCbCr, 2, __DRI_IMAGE_FOURCC_NV12 },
+   { HAL_PIXEL_FORMAT_P010_INTEL,      YCbCr, 4, __DRI_IMAGE_FOURCC_P010 },
    { HAL_PIXEL_FORMAT_YCbCr_420_888, YCbCr, 1, __DRI_IMAGE_FOURCC_YUV420 },
    { HAL_PIXEL_FORMAT_YCbCr_420_888, YCrCb, 1, __DRI_IMAGE_FOURCC_YVU420 },
    { HAL_PIXEL_FORMAT_YV12,          YCrCb, 1, __DRI_IMAGE_FOURCC_YVU420 },
+   { HAL_PIXEL_FORMAT_NV12,            YCbCr, 2, __DRI_IMAGE_FOURCC_NV12 },
+   { HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL, YCbCr, 2, __DRI_IMAGE_FOURCC_NV12 },
    /* HACK: See droid_create_image_from_prime_fd() and
     * https://issuetracker.google.com/32077885. */
    { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED, YCbCr, 2, __DRI_IMAGE_FOURCC_NV12 },
@@ -109,6 +121,9 @@ get_format_bpp(int native)
    int bpp;
 
    switch (native) {
+   case HAL_PIXEL_FORMAT_RGBA_FP16:
+      bpp = 8;
+      break;
    case HAL_PIXEL_FORMAT_RGBA_8888:
    case HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED:
       /*
@@ -117,6 +132,7 @@ get_format_bpp(int native)
        */
    case HAL_PIXEL_FORMAT_RGBX_8888:
    case HAL_PIXEL_FORMAT_BGRA_8888:
+   case HAL_PIXEL_FORMAT_RGBA_1010102:
       bpp = 4;
       break;
    case HAL_PIXEL_FORMAT_RGB_565:
@@ -143,6 +159,8 @@ static int get_fourcc(int native)
        * TODO: Remove this once https://issuetracker.google.com/32077885 is fixed.
        */
    case HAL_PIXEL_FORMAT_RGBX_8888: return __DRI_IMAGE_FOURCC_XBGR8888;
+   case HAL_PIXEL_FORMAT_RGBA_FP16: return __DRI_IMAGE_FOURCC_ABGR16161616F;
+   case HAL_PIXEL_FORMAT_RGBA_1010102: return __DRI_IMAGE_FOURCC_ABGR2101010;
    default:
       _eglLog(_EGL_WARNING, "unsupported native buffer format 0x%x", native);
    }
@@ -161,6 +179,8 @@ static int get_format(int format)
        * TODO: Revert this once https://issuetracker.google.com/32077885 is fixed.
        */
    case HAL_PIXEL_FORMAT_RGBX_8888: return __DRI_IMAGE_FORMAT_XBGR8888;
+   case HAL_PIXEL_FORMAT_RGBA_FP16: return __DRI_IMAGE_FORMAT_ABGR16161616F;
+   case HAL_PIXEL_FORMAT_RGBA_1010102: return __DRI_IMAGE_FORMAT_ABGR2101010;
    default:
       _eglLog(_EGL_WARNING, "unsupported native buffer format 0x%x", format);
    }
@@ -255,6 +275,47 @@ droid_window_dequeue_buffer(struct dri2_egl_surface *dri2_surf)
    return EGL_TRUE;
 }
 
+static int
+droid_resolve_format(struct dri2_egl_display *dri2_dpy,
+                     struct ANativeWindowBuffer *buf)
+{
+   int format = -1;
+   int ret;
+
+   if (buf->format != HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED)
+      return buf->format;
+#ifdef HAVE_GRALLOC1
+   if(dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) {
+      if (!dri2_dpy->pfn_getFormat) {
+         _eglLog(_EGL_WARNING, "Gralloc does not support getFormat");
+         return -1;
+      }
+      ret = dri2_dpy->pfn_getFormat(dri2_dpy->gralloc1_dvc, buf->handle,
+                                    &format);
+      if (ret) {
+         _eglLog(_EGL_WARNING, "gralloc->getFormat failed: %d", ret);
+         return -1;
+      }
+   } else {
+#else
+   if (!dri2_dpy->gralloc->perform) {
+      _eglLog(_EGL_WARNING, "gralloc->perform not supported");
+      return -1;
+   }
+   ret = dri2_dpy->gralloc->perform(dri2_dpy->gralloc,
+                                    GRALLOC_DRM_GET_FORMAT,
+                                    buf->handle, &format);
+   if (ret){
+      _eglLog(_EGL_WARNING, "gralloc->perform failed with error: %d", ret);
+      return -1;
+   }
+#endif
+#ifdef HAVE_GRALLOC1
+   }
+#endif
+   return format;
+}
+
 static EGLBoolean
 droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf)
 {
@@ -341,7 +402,6 @@ droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
 		    _EGLConfig *conf, void *native_window,
 		    const EGLint *attrib_list)
 {
-   __DRIcreateNewDrawableFunc createNewDrawable;
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
    struct dri2_egl_surface *dri2_surf;
@@ -385,17 +445,8 @@ droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
       goto cleanup_surface;
    }
 
-   if (dri2_dpy->image_driver)
-      createNewDrawable = dri2_dpy->image_driver->createNewDrawable;
-   else
-      createNewDrawable = dri2_dpy->dri2->createNewDrawable;
-
-   dri2_surf->dri_drawable = (*createNewDrawable)(dri2_dpy->dri_screen, config,
-                                                  dri2_surf);
-   if (dri2_surf->dri_drawable == NULL) {
-      _eglError(EGL_BAD_ALLOC, "createNewDrawable");
+   if (!dri2_create_drawable(dri2_dpy, config, dri2_surf))
       goto cleanup_surface;
-   }
 
    if (window) {
       window->common.incRef(&window->common);
@@ -469,7 +520,7 @@ droid_swap_interval(_EGLDriver *drv, _EGLDisplay *disp,
    struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
    struct ANativeWindow *window = dri2_surf->window;
 
-   if (window->setSwapInterval(window, interval))
+   if (window && window->setSwapInterval(window, interval))
       return EGL_FALSE;
 
    surf->SwapInterval = interval;
@@ -670,11 +721,18 @@ droid_query_buffer_age(_EGLDriver *drv,
 {
    struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surface);
 
+   /* To avoid blocking other EGL calls, release the display mutex before
+    * we enter droid_window_dequeue_buffer() and re-acquire the mutex upon
+    * return.
+    */
+   mtx_unlock(&disp->Mutex);
    if (update_buffers(dri2_surf) < 0) {
       _eglError(EGL_BAD_ALLOC, "droid_query_buffer_age");
+      mtx_lock(&disp->Mutex);
       return -1;
    }
 
+   mtx_lock(&disp->Mutex);
    return dri2_surf->back ? dri2_surf->back->age : 0;
 }
 
@@ -737,6 +795,31 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
    return EGL_TRUE;
 }
 
+static int get_ycbcr_from_flexlayout(struct android_flex_layout *outFlexLayout, struct android_ycbcr *ycbcr)
+{
+
+    for( int i = 0; i < outFlexLayout->num_planes; i++) {
+       switch(outFlexLayout->planes[i].component){
+         case FLEX_COMPONENT_Y:
+             ycbcr->y = outFlexLayout->planes[i].top_left;
+             ycbcr->ystride = outFlexLayout->planes[i].v_increment;
+         break;
+         case FLEX_COMPONENT_Cb:
+             ycbcr->cb = outFlexLayout->planes[i].top_left;
+             ycbcr->cstride = outFlexLayout->planes[i].v_increment;
+         break;
+         case FLEX_COMPONENT_Cr:
+             ycbcr->cr = outFlexLayout->planes[i].top_left;
+             ycbcr->chroma_step = outFlexLayout->planes[i].h_increment;
+         break;
+         default:
+             _eglLog(_EGL_WARNING,"unknown component 0x%x", __func__, outFlexLayout->planes[i].component);
+         break;
+       }
+  }
+  return 0;
+}
+
 #if ANDROID_API_LEVEL >= 23
 static EGLBoolean
 droid_set_damage_region(_EGLDriver *drv,
@@ -786,12 +869,45 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx,
    int fourcc;
    int ret;
 
-   if (!dri2_dpy->gralloc->lock_ycbcr) {
-      _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr");
+   int format = droid_resolve_format(dri2_dpy, buf);
+   if (format < 0) {
+      _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR");
       return NULL;
    }
 
+#ifdef HAVE_GRALLOC1
+   struct android_flex_layout outFlexLayout;
+   gralloc1_rect_t accessRegion;
+
    memset(&ycbcr, 0, sizeof(ycbcr));
+
+   if (dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) {
+     if (!dri2_dpy->pfn_lockflex) {
+        _eglLog(_EGL_WARNING, "Gralloc does not support lockflex");
+        return NULL;
+     }
+     ret = dri2_dpy->pfn_lockflex(dri2_dpy->gralloc1_dvc, buf->handle,
+                                       0, 0, &accessRegion, &outFlexLayout, -1);
+     if (ret) {
+        _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret);
+        return NULL;
+     }
+     ret = get_ycbcr_from_flexlayout(&outFlexLayout, &ycbcr);
+     if (ret) {
+        _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret);
+        return NULL;
+     }
+     int outReleaseFence = 0;
+     dri2_dpy->pfn_unlock(dri2_dpy->gralloc1_dvc, buf->handle, &outReleaseFence);
+   } else {
+#endif
+   if (!dri2_dpy->gralloc->lock_ycbcr) {
+     _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr");
+     return NULL;
+   }
+
+   memset(&ycbcr, 0, sizeof(ycbcr));
+
    ret = dri2_dpy->gralloc->lock_ycbcr(dri2_dpy->gralloc, buf->handle,
                                        0, 0, 0, 0, 0, &ycbcr);
    if (ret) {
@@ -804,6 +920,9 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx,
       return NULL;
    }
    dri2_dpy->gralloc->unlock(dri2_dpy->gralloc, buf->handle);
+#ifdef HAVE_GRALLOC1
+  }
+#endif
 
    /* When lock_ycbcr's usage argument contains no SW_READ/WRITE flags
     * it will return the .y/.cb/.cr pointers based on a NULL pointer,
@@ -829,14 +948,15 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx,
 
    /* .chroma_step is the byte distance between the same chroma channel
     * values of subsequent pixels, assumed to be the same for Cb and Cr. */
-   fourcc = get_fourcc_yuv(buf->format, chroma_order, ycbcr.chroma_step);
+   fourcc = get_fourcc_yuv(format, chroma_order, ycbcr.chroma_step);
    if (fourcc == -1) {
       _eglLog(_EGL_WARNING, "unsupported YUV format, native = %x, chroma_order = %s, chroma_step = %d",
-              buf->format, chroma_order == YCbCr ? "YCbCr" : "YCrCb", ycbcr.chroma_step);
+              format, chroma_order == YCbCr ? "YCbCr" : "YCrCb", ycbcr.chroma_step);
       return NULL;
    }
 
-   if (ycbcr.chroma_step == 2) {
+   /* FIXME? we should not rely on chroma_step */
+   if (ycbcr.chroma_step == 2 || ycbcr.chroma_step == 4) {
       /* Semi-planar Y + CbCr or Y + CrCb format. */
       const EGLint attr_list_2plane[] = {
          EGL_WIDTH, buf->width,
@@ -878,9 +998,16 @@ static _EGLImage *
 droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx,
                                  struct ANativeWindowBuffer *buf, int fd)
 {
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    unsigned int pitch;
 
-   if (is_yuv(buf->format)) {
+   int format = droid_resolve_format(dri2_dpy, buf);
+   if (format < 0) {
+      _eglLog(_EGL_WARNING, "Could not resolve buffer format");
+      return NULL;
+   }
+
+   if (is_yuv(format)) {
       _EGLImage *image;
 
       image = droid_create_image_from_prime_fd_yuv(disp, ctx, buf, fd);
@@ -895,13 +1022,13 @@ droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx,
          return image;
    }
 
-   const int fourcc = get_fourcc(buf->format);
+   const int fourcc = get_fourcc(format);
    if (fourcc == -1) {
       _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR");
       return NULL;
    }
 
-   pitch = buf->stride * get_format_bpp(buf->format);
+   pitch = buf->stride * get_format_bpp(format);
    if (pitch == 0) {
       _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR");
       return NULL;
@@ -1143,12 +1270,13 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    static const struct {
       int format;
-      unsigned int rgba_masks[4];
+      int rgba_shifts[4];
+      unsigned int rgba_sizes[4];
    } visuals[] = {
-      { HAL_PIXEL_FORMAT_RGBA_8888, { 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 } },
-      { HAL_PIXEL_FORMAT_RGBX_8888, { 0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000 } },
-      { HAL_PIXEL_FORMAT_RGB_565,   { 0x0000f800, 0x000007e0, 0x0000001f, 0x00000000 } },
-      { HAL_PIXEL_FORMAT_BGRA_8888, { 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 } },
+      { HAL_PIXEL_FORMAT_RGBA_8888, { 0, 8, 16, 24 }, { 8, 8, 8, 8 } },
+      { HAL_PIXEL_FORMAT_RGBX_8888, { 0, 8, 16, -1 }, { 8, 8, 8, 0 } },
+      { HAL_PIXEL_FORMAT_RGB_565,   { 11, 5, 0, -1 }, { 5, 6, 5, 0 } },
+      { HAL_PIXEL_FORMAT_BGRA_8888, { 16, 8, 0, 24 }, { 8, 8, 8, 8 } },
    };
 
    unsigned int format_count[ARRAY_SIZE(visuals)] = { 0 };
@@ -1187,7 +1315,7 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
          struct dri2_egl_config *dri2_conf =
             dri2_add_config(disp, dri2_dpy->driver_configs[j],
                             config_count + 1, surface_type, config_attrs,
-                            visuals[i].rgba_masks);
+                            visuals[i].rgba_shifts, visuals[i].rgba_sizes);
          if (dri2_conf) {
             if (dri2_conf->base.ConfigID == config_count + 1)
                config_count++;
@@ -1567,6 +1695,22 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp)
       err = "DRI2: failed to get gralloc module";
       goto cleanup;
    }
+#ifdef HAVE_GRALLOC1
+   hw_device_t *device;
+   dri2_dpy->gralloc_version = dri2_dpy->gralloc->common.module_api_version;
+   if (dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) {
+      ret = dri2_dpy->gralloc->common.methods->open(&dri2_dpy->gralloc->common, GRALLOC_HARDWARE_MODULE_ID, &device);
+      if (ret) {
+        err = "Failed to open hw_device device";
+        goto cleanup;
+      } else {
+        dri2_dpy->gralloc1_dvc = (gralloc1_device_t *)device;
+        dri2_dpy->pfn_lockflex = (GRALLOC1_PFN_LOCK_FLEX)dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_LOCK_FLEX);
+        dri2_dpy->pfn_getFormat = (GRALLOC1_PFN_GET_FORMAT)dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_GET_FORMAT);
+        dri2_dpy->pfn_unlock = (GRALLOC1_PFN_UNLOCK)dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_UNLOCK);
+      }
+   }
+#endif
 
    disp->DriverData = (void *) dri2_dpy;
    if (!disp->Options.ForceSoftware)
diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c
index c1ab1c9b0f6..1da521ada3b 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -96,7 +96,9 @@ dri2_drm_config_is_compatible(struct dri2_egl_display *dri2_dpy,
                               struct gbm_surface *surface)
 {
    const struct gbm_dri_visual *visual = NULL;
-   unsigned int red, green, blue, alpha;
+   int shifts[4];
+   unsigned int sizes[4];
+   bool is_float;
    int i;
 
    /* Check that the EGLConfig being used to render to the surface is
@@ -104,10 +106,9 @@ dri2_drm_config_is_compatible(struct dri2_egl_display *dri2_dpy,
     * otherwise-compatible formats is relatively common, explicitly allow
     * this.
     */
-   dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_RED_MASK, &red);
-   dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_GREEN_MASK, &green);
-   dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_BLUE_MASK, &blue);
-   dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_ALPHA_MASK, &alpha);
+   dri2_get_shifts_and_sizes(dri2_dpy->core, config, shifts, sizes);
+
+   dri2_get_render_type_float(dri2_dpy->core, config, &is_float);
 
    for (i = 0; i < dri2_dpy->gbm_dri->num_visuals; i++) {
       visual = &dri2_dpy->gbm_dri->visual_table[i];
@@ -118,10 +119,15 @@ dri2_drm_config_is_compatible(struct dri2_egl_display *dri2_dpy,
    if (i == dri2_dpy->gbm_dri->num_visuals)
       return false;
 
-   if (red != visual->rgba_masks.red ||
-       green != visual->rgba_masks.green ||
-       blue != visual->rgba_masks.blue ||
-       (alpha && visual->rgba_masks.alpha && alpha != visual->rgba_masks.alpha)) {
+   if (shifts[0] != visual->rgba_shifts.red ||
+       shifts[1] != visual->rgba_shifts.green ||
+       shifts[2] != visual->rgba_shifts.blue ||
+       (shifts[3] > -1 && shifts[3] != visual->rgba_shifts.alpha) ||
+       sizes[0] != visual->rgba_sizes.red ||
+       sizes[1] != visual->rgba_sizes.green ||
+       sizes[2] != visual->rgba_sizes.blue ||
+       (sizes[3] > 0 && sizes[3] != visual->rgba_sizes.alpha) ||
+       is_float != visual->is_float) {
       return false;
    }
 
@@ -171,23 +177,8 @@ dri2_drm_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
    dri2_surf->base.Height = surf->base.height;
    surf->dri_private = dri2_surf;
 
-   if (dri2_dpy->dri2) {
-      dri2_surf->dri_drawable =
-         dri2_dpy->dri2->createNewDrawable(dri2_dpy->dri_screen, config,
-                                           dri2_surf->gbm_surf);
-
-   } else {
-      assert(dri2_dpy->swrast != NULL);
-
-      dri2_surf->dri_drawable =
-         dri2_dpy->swrast->createNewDrawable(dri2_dpy->dri_screen, config,
-                                             dri2_surf->gbm_surf);
-
-   }
-   if (dri2_surf->dri_drawable == NULL) {
-      _eglError(EGL_BAD_ALLOC, "createNewDrawable()");
+   if (!dri2_create_drawable(dri2_dpy, config, dri2_surf))
       goto cleanup_surf;
-   }
 
    return &dri2_surf->base;
 
@@ -627,24 +618,27 @@ drm_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
    memset(format_count, 0, num_visuals * sizeof(unsigned int));
 
    for (unsigned i = 0; dri2_dpy->driver_configs[i]; i++) {
-      unsigned int red, green, blue, alpha;
+      const __DRIconfig *config = dri2_dpy->driver_configs[i];
+      int shifts[4];
+      unsigned int sizes[4];
+      bool is_float;
+
+      dri2_get_shifts_and_sizes(dri2_dpy->core, config, shifts, sizes);
 
-      dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i],
-                                      __DRI_ATTRIB_RED_MASK, &red);
-      dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i],
-                                      __DRI_ATTRIB_GREEN_MASK, &green);
-      dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i],
-                                      __DRI_ATTRIB_BLUE_MASK, &blue);
-      dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i],
-                                      __DRI_ATTRIB_ALPHA_MASK, &alpha);
+      dri2_get_render_type_float(dri2_dpy->core, config, &is_float);
 
       for (unsigned j = 0; j < num_visuals; j++) {
          struct dri2_egl_config *dri2_conf;
 
-         if (visuals[j].rgba_masks.red != red ||
-             visuals[j].rgba_masks.green != green ||
-             visuals[j].rgba_masks.blue != blue ||
-             visuals[j].rgba_masks.alpha != alpha)
+         if (visuals[j].rgba_shifts.red != shifts[0] ||
+             visuals[j].rgba_shifts.green != shifts[1] ||
+             visuals[j].rgba_shifts.blue != shifts[2] ||
+             visuals[j].rgba_shifts.alpha != shifts[3] ||
+             visuals[j].rgba_sizes.red != sizes[0] ||
+             visuals[j].rgba_sizes.green != sizes[1] ||
+             visuals[j].rgba_sizes.blue != sizes[2] ||
+             visuals[j].rgba_sizes.alpha != sizes[3] ||
+             visuals[j].is_float != is_float)
             continue;
 
          const EGLint attr_list[] = {
@@ -653,7 +647,7 @@ drm_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
          };
 
          dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i],
-               config_count + 1, EGL_WINDOW_BIT, attr_list, NULL);
+               config_count + 1, EGL_WINDOW_BIT, attr_list, NULL, NULL);
          if (dri2_conf) {
             if (dri2_conf->base.ConfigID == config_count + 1)
                config_count++;
@@ -730,6 +724,7 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
          goto cleanup;
       }
    }
+   dri2_dpy->gbm_dri = gbm_dri_device(gbm);
 
    if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) {
       err = "DRI2: gbm device using incorrect/incompatible backend";
@@ -744,7 +739,6 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
 
    disp->Device = dev;
 
-   dri2_dpy->gbm_dri = gbm_dri_device(gbm);
    dri2_dpy->driver_name = strdup(dri2_dpy->gbm_dri->driver_name);
 
    dri2_dpy->dri_screen = dri2_dpy->gbm_dri->screen;
diff --git a/src/egl/drivers/dri2/platform_surfaceless.c b/src/egl/drivers/dri2/platform_surfaceless.c
index fefb2b449d0..4d594e7e336 100644
--- a/src/egl/drivers/dri2/platform_surfaceless.c
+++ b/src/egl/drivers/dri2/platform_surfaceless.c
@@ -135,13 +135,8 @@ dri2_surfaceless_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
       goto cleanup_surface;
    }
 
-   dri2_surf->dri_drawable =
-      dri2_dpy->image_driver->createNewDrawable(dri2_dpy->dri_screen, config,
-                                                dri2_surf);
-   if (dri2_surf->dri_drawable == NULL) {
-      _eglError(EGL_BAD_ALLOC, "image->createNewDrawable");
+   if (!dri2_create_drawable(dri2_dpy, config, dri2_surf))
       goto cleanup_surface;
-    }
 
    if (conf->RedSize == 5)
       dri2_surf->visual = __DRI_IMAGE_FORMAT_RGB565;
@@ -186,11 +181,12 @@ surfaceless_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    static const struct {
       const char *format_name;
-      unsigned int rgba_masks[4];
+      int rgba_shifts[4];
+      unsigned int rgba_sizes[4];
    } visuals[] = {
-      { "ARGB8888", { 0xff0000, 0xff00, 0xff, 0xff000000 } },
-      { "RGB888",   { 0xff0000, 0xff00, 0xff, 0x0 } },
-      { "RGB565",   { 0x00f800, 0x07e0, 0x1f, 0x0 } },
+      { "ARGB8888", { 16, 8, 0, 24 }, { 8, 8, 8, 8 } },
+      { "RGB888",   { 16, 8, 0, -1 }, { 8, 8, 8, 0 } },
+      { "RGB565",   { 11, 5, 0, -1 }, { 5, 6, 5, 0 } },
    };
    unsigned int format_count[ARRAY_SIZE(visuals)] = { 0 };
    unsigned int config_count = 0;
@@ -201,7 +197,7 @@ surfaceless_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
 
          dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i],
                config_count + 1, EGL_PBUFFER_BIT, NULL,
-               visuals[j].rgba_masks);
+               visuals[j].rgba_shifts, visuals[j].rgba_sizes);
 
          if (dri2_conf) {
             if (dri2_conf->base.ConfigID == config_count + 1)
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
index 2a2c8214169..46e637444be 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -68,49 +68,57 @@ static const struct dri2_wl_visual {
     */
    int alt_dri_image_format;
    int bpp;
-   unsigned int rgba_masks[4];
+   int rgba_shifts[4];
+   unsigned int rgba_sizes[4];
 } dri2_wl_visuals[] = {
    {
      "XRGB2101010",
      WL_DRM_FORMAT_XRGB2101010, WL_SHM_FORMAT_XRGB2101010,
      __DRI_IMAGE_FORMAT_XRGB2101010, __DRI_IMAGE_FORMAT_XBGR2101010, 32,
-     { 0x3ff00000, 0x000ffc00, 0x000003ff, 0x00000000 }
+     { 20, 10, 0, -1 },
+     { 10, 10, 10, 0 },
    },
    {
      "ARGB2101010",
      WL_DRM_FORMAT_ARGB2101010, WL_SHM_FORMAT_ARGB2101010,
      __DRI_IMAGE_FORMAT_ARGB2101010, __DRI_IMAGE_FORMAT_ABGR2101010, 32,
-     { 0x3ff00000, 0x000ffc00, 0x000003ff, 0xc0000000 }
+     { 20, 10, 0, 30 },
+     { 10, 10, 10, 2 },
    },
    {
      "XBGR2101010",
      WL_DRM_FORMAT_XBGR2101010, WL_SHM_FORMAT_XBGR2101010,
      __DRI_IMAGE_FORMAT_XBGR2101010, __DRI_IMAGE_FORMAT_XRGB2101010, 32,
-     { 0x000003ff, 0x000ffc00, 0x3ff00000, 0x00000000 }
+     { 0, 10, 20, -1 },
+     { 10, 10, 10, 0 },
    },
    {
      "ABGR2101010",
      WL_DRM_FORMAT_ABGR2101010, WL_SHM_FORMAT_ABGR2101010,
      __DRI_IMAGE_FORMAT_ABGR2101010, __DRI_IMAGE_FORMAT_ARGB2101010, 32,
-     { 0x000003ff, 0x000ffc00, 0x3ff00000, 0xc0000000 }
+     { 0, 10, 20, 30 },
+     { 10, 10, 10, 2 },
    },
    {
      "XRGB8888",
      WL_DRM_FORMAT_XRGB8888, WL_SHM_FORMAT_XRGB8888,
      __DRI_IMAGE_FORMAT_XRGB8888, __DRI_IMAGE_FORMAT_NONE, 32,
-     { 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000 }
+     { 16, 8, 0, -1 },
+     { 8, 8, 8, 0 },
    },
    {
      "ARGB8888",
      WL_DRM_FORMAT_ARGB8888, WL_SHM_FORMAT_ARGB8888,
      __DRI_IMAGE_FORMAT_ARGB8888, __DRI_IMAGE_FORMAT_NONE, 32,
-     { 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 }
+     { 16, 8, 0, 24 },
+     { 8, 8, 8, 8 },
    },
    {
      "RGB565",
      WL_DRM_FORMAT_RGB565, WL_SHM_FORMAT_RGB565,
      __DRI_IMAGE_FORMAT_RGB565, __DRI_IMAGE_FORMAT_NONE, 16,
-     { 0xf800, 0x07e0, 0x001f, 0x0000 }
+     { 11, 5, 0, -1 },
+     { 5, 6, 5, 0 },
    },
 };
 
@@ -118,20 +126,22 @@ static int
 dri2_wl_visual_idx_from_config(struct dri2_egl_display *dri2_dpy,
                                const __DRIconfig *config)
 {
-   unsigned int red, green, blue, alpha;
+   int shifts[4];
+   unsigned int sizes[4];
 
-   dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_RED_MASK, &red);
-   dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_GREEN_MASK, &green);
-   dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_BLUE_MASK, &blue);
-   dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_ALPHA_MASK, &alpha);
+   dri2_get_shifts_and_sizes(dri2_dpy->core, config, shifts, sizes);
 
    for (unsigned int i = 0; i < ARRAY_SIZE(dri2_wl_visuals); i++) {
       const struct dri2_wl_visual *wl_visual = &dri2_wl_visuals[i];
 
-      if (red == wl_visual->rgba_masks[0] &&
-          green == wl_visual->rgba_masks[1] &&
-          blue == wl_visual->rgba_masks[2] &&
-          alpha == wl_visual->rgba_masks[3]) {
+      if (shifts[0] == wl_visual->rgba_shifts[0] &&
+          shifts[1] == wl_visual->rgba_shifts[1] &&
+          shifts[2] == wl_visual->rgba_shifts[2] &&
+          shifts[3] == wl_visual->rgba_shifts[3] &&
+          sizes[0] == wl_visual->rgba_sizes[0] &&
+          sizes[1] == wl_visual->rgba_sizes[1] &&
+          sizes[2] == wl_visual->rgba_sizes[2] &&
+          sizes[3] == wl_visual->rgba_sizes[3]) {
          return i;
       }
    }
@@ -272,7 +282,6 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
                               _EGLConfig *conf, void *native_window,
                               const EGLint *attrib_list)
 {
-   __DRIcreateNewDrawableFunc createNewDrawable;
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
    struct wl_egl_window *window = native_window;
@@ -349,19 +358,8 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
    if (dri2_dpy->flush)
       dri2_surf->wl_win->resize_callback = resize_callback;
 
-   if (dri2_dpy->image_driver)
-      createNewDrawable = dri2_dpy->image_driver->createNewDrawable;
-   else if (dri2_dpy->dri2)
-      createNewDrawable = dri2_dpy->dri2->createNewDrawable;
-   else
-      createNewDrawable = dri2_dpy->swrast->createNewDrawable;
-
-   dri2_surf->dri_drawable = (*createNewDrawable)(dri2_dpy->dri_screen, config,
-                                                  dri2_surf);
-    if (dri2_surf->dri_drawable == NULL) {
-      _eglError(EGL_BAD_ALLOC, "createNewDrawable");
+   if (!dri2_create_drawable(dri2_dpy, config, dri2_surf))
        goto cleanup_surf_wrapper;
-    }
 
    dri2_surf->base.SwapInterval = dri2_dpy->default_swap_interval;
 
@@ -675,6 +673,15 @@ update_buffers(struct dri2_egl_surface *dri2_surf)
    return 0;
 }
 
+static int
+update_buffers_if_needed(struct dri2_egl_surface *dri2_surf)
+{
+   if (dri2_surf->back != NULL)
+      return 0;
+
+   return update_buffers(dri2_surf);
+}
+
 static __DRIbuffer *
 dri2_wl_get_buffers_with_format(__DRIdrawable * driDrawable,
                                 int *width, int *height,
@@ -992,7 +999,7 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,
 
    /* Make sure we have a back buffer in case we're swapping without ever
     * rendering. */
-   if (get_back_bo(dri2_surf) < 0)
+   if (update_buffers_if_needed(dri2_surf) < 0)
       return _eglError(EGL_BAD_ALLOC, "dri2_swap_buffers");
 
    if (draw->SwapInterval > 0) {
@@ -1078,7 +1085,7 @@ dri2_wl_query_buffer_age(_EGLDriver *drv,
 {
    struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surface);
 
-   if (get_back_bo(dri2_surf) < 0) {
+   if (update_buffers_if_needed(dri2_surf) < 0) {
       _eglError(EGL_BAD_ALLOC, "dri2_query_buffer_age");
       return -1;
    }
@@ -1353,7 +1360,7 @@ dri2_wl_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
             continue;
 
          dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i],
-               count + 1, EGL_WINDOW_BIT, NULL, dri2_wl_visuals[j].rgba_masks);
+               count + 1, EGL_WINDOW_BIT, NULL, dri2_wl_visuals[j].rgba_shifts, dri2_wl_visuals[j].rgba_sizes);
          if (dri2_conf) {
             if (dri2_conf->base.ConfigID == count + 1)
                count++;
@@ -1386,7 +1393,8 @@ dri2_wl_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
           */
          dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i],
                                      count + 1, EGL_WINDOW_BIT, NULL,
-                                     dri2_wl_visuals[c].rgba_masks);
+                                     dri2_wl_visuals[c].rgba_shifts,
+                                     dri2_wl_visuals[c].rgba_sizes);
          if (dri2_conf) {
             if (dri2_conf->base.ConfigID == count + 1)
                count++;
diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index c8c676d2f00..d88e55dbf23 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -42,6 +42,7 @@
 #include <sys/stat.h>
 #include "util/debug.h"
 #include "util/macros.h"
+#include "util/bitscan.h"
 
 #include "egl_dri2.h"
 #include "egl_dri2_fallbacks.h"
@@ -261,7 +262,7 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
 
    (void) drv;
 
-   dri2_surf = malloc(sizeof *dri2_surf);
+   dri2_surf = calloc(1, sizeof *dri2_surf);
    if (!dri2_surf) {
       _eglError(EGL_BAD_ALLOC, "dri2_create_surface");
       return NULL;
@@ -289,21 +290,8 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
       goto cleanup_pixmap;
    }
 
-   if (dri2_dpy->dri2) {
-      dri2_surf->dri_drawable =
-         dri2_dpy->dri2->createNewDrawable(dri2_dpy->dri_screen, config,
-                                           dri2_surf);
-   } else {
-      assert(dri2_dpy->swrast);
-      dri2_surf->dri_drawable = 
-         dri2_dpy->swrast->createNewDrawable(dri2_dpy->dri_screen, config,
-                                             dri2_surf);
-   }
-
-   if (dri2_surf->dri_drawable == NULL) {
-      _eglError(EGL_BAD_ALLOC, "dri2->createNewDrawable");
+   if (!dri2_create_drawable(dri2_dpy, config, dri2_surf))
       goto cleanup_pixmap;
-   }
 
    if (type != EGL_PBUFFER_BIT) {
       cookie = xcb_get_geometry (dri2_dpy->conn, dri2_surf->drawable);
@@ -778,9 +766,49 @@ dri2_x11_config_match_attrib(struct dri2_egl_display *dri2_dpy,
    return config_val == value;
 }
 
+/**
+ * See if the X server can export a pixmap with the given color depth.
+ *
+ * Glamor in xorg-server 1.20 can't export pixmaps which have a different
+ * color depth than the root window as a DRI image.  This makes it impossible
+ * to expose pbuffer-only visuals with, say, 16bpp on a 24bpp X display.
+ */
+static bool
+x11_can_export_pixmap_with_bpp(struct dri2_egl_display *dri2_dpy, int bpp)
+{
+   bool supported = false;
+
+#ifdef HAVE_DRI3
+   xcb_dri3_buffer_from_pixmap_cookie_t cookie;
+   xcb_dri3_buffer_from_pixmap_reply_t *reply;
+
+   xcb_pixmap_t pixmap = xcb_generate_id(dri2_dpy->conn);
+   xcb_create_pixmap(dri2_dpy->conn, bpp, pixmap, dri2_dpy->screen->root, 1, 1);
+   cookie = xcb_dri3_buffer_from_pixmap(dri2_dpy->conn, pixmap);
+   reply = xcb_dri3_buffer_from_pixmap_reply(dri2_dpy->conn, cookie, NULL);
+
+   if (reply) {
+      int *fds = xcb_dri3_buffer_from_pixmap_reply_fds(dri2_dpy->conn, reply);
+
+      for (int i = 0; i < reply->nfd; i++) {
+         close(fds[i]);
+      }
+
+      supported = true;
+
+      free(reply);
+   }
+
+   xcb_free_pixmap(dri2_dpy->conn, pixmap);
+#endif
+
+   return supported;
+}
+
 static EGLBoolean
 dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy,
-                                 _EGLDisplay *disp, bool supports_preserved)
+                                 _EGLDisplay *disp, bool supports_preserved,
+                                 bool add_pbuffer_configs)
 {
    xcb_depth_iterator_t d;
    xcb_visualtype_t *visuals;
@@ -818,16 +846,23 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy,
                     EGL_NONE
             };
 
-            unsigned int rgba_masks[4] = {
-               visuals[i].red_mask,
-               visuals[i].green_mask,
-               visuals[i].blue_mask,
+            int rgba_shifts[4] = {
+               ffs(visuals[i].red_mask) - 1,
+               ffs(visuals[i].green_mask) - 1,
+               ffs(visuals[i].blue_mask) - 1,
+               -1,
+            };
+
+            unsigned int rgba_sizes[4] = {
+               util_bitcount(visuals[i].red_mask),
+               util_bitcount(visuals[i].green_mask),
+               util_bitcount(visuals[i].blue_mask),
                0,
             };
 
             dri2_conf = dri2_add_config(disp, config, config_count + 1,
                                         surface_type, config_attrs,
-                                        rgba_masks);
+                                        rgba_shifts, rgba_sizes);
             if (dri2_conf)
                if (dri2_conf->base.ConfigID == config_count + 1)
                   config_count++;
@@ -841,11 +876,14 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy,
              * wants... especially on drivers that only have 32-bit RGBA
              * EGLConfigs! */
             if (d.data->depth == 24 || d.data->depth == 30) {
-               rgba_masks[3] =
-                  ~(rgba_masks[0] | rgba_masks[1] | rgba_masks[2]);
+               unsigned int rgba_mask = ~(visuals[i].red_mask |
+                                          visuals[i].green_mask |
+                                          visuals[i].blue_mask);
+               rgba_shifts[3] = ffs(rgba_mask) - 1;
+               rgba_sizes[3] = util_bitcount(rgba_mask);
                dri2_conf = dri2_add_config(disp, config, config_count + 1,
                                            surface_type, config_attrs,
-                                           rgba_masks);
+                                           rgba_shifts, rgba_sizes);
                if (dri2_conf)
                   if (dri2_conf->base.ConfigID == config_count + 1)
                      config_count++;
@@ -859,35 +897,37 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy,
    /* Add a 565-no-depth-no-stencil pbuffer-only config.  If X11 is depth 24,
     * we wouldn't have 565 available, which the CTS demands.
     */
-   for (int j = 0; dri2_dpy->driver_configs[j]; j++) {
-      const __DRIconfig *config = dri2_dpy->driver_configs[j];
-      const EGLint config_attrs[] = {
-         EGL_NATIVE_VISUAL_ID,    0,
-         EGL_NATIVE_VISUAL_TYPE,  EGL_NONE,
-         EGL_NONE
-      };
-      EGLint surface_type = EGL_PBUFFER_BIT;
-      unsigned int rgba_masks[4] = {
-         0x1f << 11,
-         0x3f << 5,
-         0x1f << 0,
-         0,
-      };
-
-      /* Check that we've found single-sample, no depth, no stencil. */
-      if (!dri2_x11_config_match_attrib(dri2_dpy, config,
-                                        __DRI_ATTRIB_DEPTH_SIZE, 0) ||
-          !dri2_x11_config_match_attrib(dri2_dpy, config,
-                                        __DRI_ATTRIB_STENCIL_SIZE, 0) ||
-          !dri2_x11_config_match_attrib(dri2_dpy, config,
-                                        __DRI_ATTRIB_SAMPLES, 0)) {
-         continue;
-      }
-
-      if (dri2_add_config(disp, config, config_count + 1, surface_type,
-                          config_attrs, rgba_masks)) {
-         config_count++;
-         break;
+   if (add_pbuffer_configs && x11_can_export_pixmap_with_bpp(dri2_dpy, 16)) {
+      for (int j = 0; dri2_dpy->driver_configs[j]; j++) {
+         const __DRIconfig *config = dri2_dpy->driver_configs[j];
+         const EGLint config_attrs[] = {
+            EGL_NATIVE_VISUAL_ID,    0,
+            EGL_NATIVE_VISUAL_TYPE,  EGL_NONE,
+            EGL_NONE
+         };
+         EGLint surface_type = EGL_PBUFFER_BIT;
+         int rgba_shifts[4] = { 11, 5, 0, -1 };
+         unsigned int rgba_sizes[4] = { 5, 6, 5, 0 };
+ 
+         /* Check that we've found single-sample, no depth, no stencil,
+          * and single-buffered.
+          */
+         if (!dri2_x11_config_match_attrib(dri2_dpy, config,
+                                           __DRI_ATTRIB_DEPTH_SIZE, 0) ||
+             !dri2_x11_config_match_attrib(dri2_dpy, config,
+                                           __DRI_ATTRIB_STENCIL_SIZE, 0) ||
+             !dri2_x11_config_match_attrib(dri2_dpy, config,
+                                           __DRI_ATTRIB_SAMPLES, 0) ||
+             !dri2_x11_config_match_attrib(dri2_dpy, config,
+                                           __DRI_ATTRIB_DOUBLE_BUFFER, 0)) {
+            continue;
+         }
+
+         if (dri2_add_config(disp, config, config_count + 1, surface_type,
+                             config_attrs, rgba_shifts, rgba_sizes)) {
+            config_count++;
+            break;
+         }
       }
    }
 
@@ -1355,7 +1395,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
 
    dri2_setup_screen(disp);
 
-   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true))
+   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true, false))
       goto cleanup;
 
    /* Fill vtbl last to prevent accidentally calling virtual function during
@@ -1453,7 +1493,7 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
 
    dri2_set_WL_bind_wayland_display(drv, disp);
 
-   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false))
+   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false, true))
       goto cleanup;
 
    dri2_dpy->loader_dri3_ext.core = dri2_dpy->core;
@@ -1563,7 +1603,7 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
 
    dri2_set_WL_bind_wayland_display(drv, disp);
 
-   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true))
+   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true, false))
       goto cleanup;
 
    /* Fill vtbl last to prevent accidentally calling virtual function during
diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c b/src/egl/drivers/dri2/platform_x11_dri3.c
index 189212745ce..04c041cac9b 100644
--- a/src/egl/drivers/dri2/platform_x11_dri3.c
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -402,13 +402,15 @@ dri3_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
 static void
 dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate)
 {
+   struct loader_dri3_drawable *draw = loaderPrivate;
+   (void) driDrawable;
+
    /* There does not seem to be any kind of consensus on whether we should
     * support front-buffer rendering or not:
     * http://lists.freedesktop.org/archives/mesa-dev/2013-June/040129.html
     */
-   _eglLog(_EGL_WARNING, "FIXME: egl/x11 doesn't support front buffer rendering.");
-   (void) driDrawable;
-   (void) loaderPrivate;
+   if (!draw->is_pixmap)
+      _eglLog(_EGL_WARNING, "FIXME: egl/x11 doesn't support front buffer rendering.");
 }
 
 const __DRIimageLoaderExtension dri3_image_loader_extension = {
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 588c6a5f1eb..a87df71fd39 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -95,6 +95,7 @@
 #include "c99_compat.h"
 #include "c11/threads.h"
 #include "util/macros.h"
+#include "util/android_trace.h"
 
 #include "eglapi.h"
 #include "egldefines.h"
@@ -411,6 +412,11 @@ _eglGetPlatformDisplayCommon(EGLenum platform, void *native_display,
    case EGL_PLATFORM_SURFACELESS_MESA:
       disp = _eglGetSurfacelessDisplay(native_display, attrib_list);
       break;
+#endif
+#ifdef HAVE_ANDROID_PLATFORM
+   case EGL_PLATFORM_ANDROID_KHR:
+      disp = _eglGetAndroidDisplay(native_display, attrib_list);
+      break;
 #endif
    default:
       RETURN_EGL_ERROR(NULL, EGL_BAD_PARAMETER, NULL);
@@ -674,6 +680,10 @@ eglTerminate(EGLDisplay dpy)
       /* do not reset disp->Driver */
       disp->ClientAPIsString[0] = 0;
       disp->Initialized = EGL_FALSE;
+
+      /* Reset blob cache funcs on terminate. */
+      disp->BlobCacheSet = NULL;
+      disp->BlobCacheGet = NULL;
    }
 
    RETURN_EGL_SUCCESS(disp, EGL_TRUE);
@@ -1300,6 +1310,7 @@ static EGLBoolean
 _eglSwapBuffersWithDamageCommon(_EGLDisplay *disp, _EGLSurface *surf,
                                 EGLint *rects, EGLint n_rects)
 {
+   MTRACE_BEGIN();
    _EGLContext *ctx = _eglGetCurrentContext();
    _EGLDriver *drv;
    EGLBoolean ret;
@@ -1328,6 +1339,7 @@ _eglSwapBuffersWithDamageCommon(_EGLDisplay *disp, _EGLSurface *surf,
       surf->BufferAgeRead = EGL_FALSE;
    }
 
+   MTRACE_END();
    RETURN_EGL_EVAL(disp, ret);
 }
 
@@ -1345,10 +1357,13 @@ static EGLBoolean EGLAPIENTRY
 eglSwapBuffersWithDamageKHR(EGLDisplay dpy, EGLSurface surface,
                             EGLint *rects, EGLint n_rects)
 {
+   MTRACE_BEGIN();
    _EGLDisplay *disp = _eglLockDisplay(dpy);
    _EGLSurface *surf = _eglLookupSurface(surface, disp);
    _EGL_FUNC_START(disp, EGL_OBJECT_SURFACE_KHR, surf, EGL_FALSE);
-   return _eglSwapBuffersWithDamageCommon(disp, surf, rects, n_rects);
+   EGLBoolean ret =  _eglSwapBuffersWithDamageCommon(disp, surf, rects, n_rects);
+   MTRACE_END();
+   return ret;
 }
 
 /**
diff --git a/src/egl/main/egldevice.c b/src/egl/main/egldevice.c
index c5c9a21273a..ece0f4075da 100644
--- a/src/egl/main/egldevice.c
+++ b/src/egl/main/egldevice.c
@@ -108,9 +108,9 @@ static int
 _eglAddDRMDevice(drmDevicePtr device, _EGLDevice **out_dev)
 {
    _EGLDevice *dev;
+   const int wanted_nodes = 1 << DRM_NODE_RENDER | 1 << DRM_NODE_PRIMARY;
 
-   if ((device->available_nodes & (1 << DRM_NODE_PRIMARY |
-                                   1 << DRM_NODE_RENDER)) == 0)
+   if ((device->available_nodes & wanted_nodes) != wanted_nodes)
       return -1;
 
    dev = _eglGlobal.DeviceList;
diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c
index ba5f84510fe..59e069641a6 100644
--- a/src/egl/main/egldisplay.c
+++ b/src/egl/main/egldisplay.c
@@ -542,3 +542,19 @@ _eglGetSurfacelessDisplay(void *native_display,
    return _eglFindDisplay(_EGL_PLATFORM_SURFACELESS, native_display);
 }
 #endif /* HAVE_SURFACELESS_PLATFORM */
+
+#ifdef HAVE_ANDROID_PLATFORM
+_EGLDisplay*
+_eglGetAndroidDisplay(void *native_display,
+                          const EGLAttrib *attrib_list)
+{
+
+   /* This platform recognizes no display attributes. */
+   if (attrib_list != NULL && attrib_list[0] != EGL_NONE) {
+      _eglError(EGL_BAD_ATTRIBUTE, "eglGetPlatformDisplay");
+      return NULL;
+   }
+
+   return _eglFindDisplay(_EGL_PLATFORM_ANDROID, native_display);
+}
+#endif /* HAVE_ANDROID_PLATFORM */
diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h
index cfd0ff66d64..9f6a1d6f6aa 100644
--- a/src/egl/main/egldisplay.h
+++ b/src/egl/main/egldisplay.h
@@ -301,6 +301,12 @@ _eglGetSurfacelessDisplay(void *native_display,
                           const EGLAttrib *attrib_list);
 #endif
 
+#ifdef HAVE_ANDROID_PLATFORM
+_EGLDisplay*
+_eglGetAndroidDisplay(void *native_display,
+                         const EGLAttrib *attrib_list);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/freedreno/Android.drm.mk b/src/freedreno/Android.drm.mk
new file mode 100644
index 00000000000..dfa9bed7d2e
--- /dev/null
+++ b/src/freedreno/Android.drm.mk
@@ -0,0 +1,41 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for libfreedreno_drm.a
+
+# ---------------------------------------
+# Build libfreedreno_drm
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(drm_SOURCES)
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/gallium/include \
+	$(MESA_TOP)/src/gallium/auxiliary
+
+LOCAL_MODULE := libfreedreno_drm
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/freedreno/Android.ir3.mk b/src/freedreno/Android.ir3.mk
new file mode 100644
index 00000000000..c6a9d3288d7
--- /dev/null
+++ b/src/freedreno/Android.ir3.mk
@@ -0,0 +1,51 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for libfreedreno_ir3.a
+
+# ---------------------------------------
+# Build libfreedreno_ir3
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(ir3_SOURCES)
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/compiler/nir \
+	$(MESA_TOP)/src/gallium/include \
+	$(MESA_TOP)/src/gallium/auxiliary \
+	$(MESA_TOP)/prebuilt-intermediates/nir \
+
+# We need libmesa_nir to get NIR's generated include directories.
+LOCAL_STATIC_LIBRARIES := \
+	libmesa_nir
+
+LOCAL_MODULE := libfreedreno_ir3
+
+LOCAL_GENERATED_SOURCES := \
+	$(MESA_GEN_GLSL_H) \
+	$(MESA_GEN_NIR_H)
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/src/freedreno/Android.mk b/src/freedreno/Android.mk
new file mode 100644
index 00000000000..e46e2199dc1
--- /dev/null
+++ b/src/freedreno/Android.mk
@@ -0,0 +1,30 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for libfreedreno_*
+
+LOCAL_PATH := $(call my-dir)
+
+include $(LOCAL_PATH)/Makefile.sources
+include $(MESA_TOP)/src/gallium/drivers/freedreno/Android.gen.mk
+include $(LOCAL_PATH)/Android.drm.mk
+include $(LOCAL_PATH)/Android.ir3.mk
diff --git a/src/freedreno/Makefile.sources b/src/freedreno/Makefile.sources
index aa8edec82f2..a46a2c30913 100644
--- a/src/freedreno/Makefile.sources
+++ b/src/freedreno/Makefile.sources
@@ -36,6 +36,8 @@ ir3_SOURCES := \
 	ir3/ir3_nir.c \
 	ir3/ir3_nir.h \
 	ir3/ir3_nir_analyze_ubo_ranges.c \
+	ir3/ir3_nir_lower_load_barycentric_at_sample.c \
+	ir3/ir3_nir_lower_load_barycentric_at_offset.c \
 	ir3/ir3_nir_lower_io_offsets.c \
 	ir3/ir3_nir_lower_tg4_to_tex.c \
 	ir3/ir3_nir_move_varying_inputs.c \
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 97f4ae96cd9..5c049a5936f 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -48,7 +48,7 @@ void * ir3_alloc(struct ir3 *shader, int sz)
 struct ir3 * ir3_create(struct ir3_compiler *compiler,
 		gl_shader_stage type, unsigned nin, unsigned nout)
 {
-	struct ir3 *shader = rzalloc(compiler, struct ir3);
+	struct ir3 *shader = rzalloc(NULL, struct ir3);
 
 	shader->compiler = compiler;
 	shader->type = type;
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index f1d9b53c7c4..353dfdfe252 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1044,6 +1044,7 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 		barrier->cat7.g = true;
 		barrier->cat7.r = true;
 		barrier->cat7.w = true;
+		barrier->cat7.l = true;
 		barrier->barrier_class = IR3_BARRIER_IMAGE_W |
 				IR3_BARRIER_BUFFER_W;
 		barrier->barrier_conflict =
@@ -1304,7 +1305,8 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 			idx += nir_src_as_uint(intr->src[1]);
 			for (int i = 0; i < intr->num_components; i++) {
 				unsigned inloc = idx * 4 + i + comp;
-				if (ctx->so->inputs[idx].bary) {
+				if (ctx->so->inputs[idx].bary &&
+						!ctx->so->inputs[idx].use_ldlv) {
 					dst[i] = ir3_BARY_F(b, create_immed(b, inloc), 0, coord, 0);
 				} else {
 					/* for non-varyings use the pre-setup input, since
@@ -2375,6 +2377,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
 	so->inputs[n].compmask = (1 << (ncomp + frac)) - 1;
 	so->inputs_count = MAX2(so->inputs_count, n + 1);
 	so->inputs[n].interpolate = in->data.interpolation;
+	so->inputs[n].ncomp = ncomp;
 
 	if (ctx->so->type == MESA_SHADER_FRAGMENT) {
 
@@ -2402,8 +2405,6 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
 				so->inputs[n].bary = true;
 				instr = create_frag_input(ctx, false, idx);
 			} else {
-				bool use_ldlv = false;
-
 				/* detect the special case for front/back colors where
 				 * we need to do flat vs smooth shading depending on
 				 * rast state:
@@ -2424,12 +2425,12 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
 				if (ctx->compiler->flat_bypass) {
 					if ((so->inputs[n].interpolate == INTERP_MODE_FLAT) ||
 							(so->inputs[n].rasterflat && ctx->so->key.rasterflat))
-						use_ldlv = true;
+						so->inputs[n].use_ldlv = true;
 				}
 
 				so->inputs[n].bary = true;
 
-				instr = create_frag_input(ctx, use_ldlv, idx);
+				instr = create_frag_input(ctx, so->inputs[n].use_ldlv, idx);
 			}
 
 			compile_assert(ctx, idx < ctx->ir->ninputs);
diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
index 312c0644623..46216a6f862 100644
--- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
+++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
@@ -32,11 +32,11 @@ get_ubo_load_range(nir_intrinsic_instr *instr)
 {
 	struct ir3_ubo_range r;
 
-	const int bytes = nir_intrinsic_dest_components(instr) *
-		(nir_dest_bit_size(instr->dest) / 8);
+	const int offset = nir_src_as_uint(instr->src[1]);
+	const int bytes = nir_intrinsic_dest_components(instr) * 4;
 
-	r.start = ROUND_DOWN_TO(nir_src_as_uint(instr->src[1]), 16 * 4);
-	r.end = ALIGN(r.start + bytes, 16 * 4);
+	r.start = ROUND_DOWN_TO(offset, 16 * 4);
+	r.end = ALIGN(offset + bytes, 16 * 4);
 
 	return r;
 }
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 7c1dc38de23..be4e93b9a8f 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -414,6 +414,7 @@ struct ir3_shader_variant {
 		/* fragment shader specific: */
 		bool    bary       : 1;   /* fetched varying (vs one loaded into reg) */
 		bool    rasterflat : 1;   /* special handling for emit->rasterflat */
+		bool    use_ldlv   : 1;   /* internal to ir3_compiler_nir */
 		bool    half       : 1;
 		enum glsl_interp_mode interpolate;
 	} inputs[16 + 2];  /* +POSITION +FACE */
diff --git a/src/freedreno/vulkan/meson.build b/src/freedreno/vulkan/meson.build
index f58aff2cbdb..a25193a8147 100644
--- a/src/freedreno/vulkan/meson.build
+++ b/src/freedreno/vulkan/meson.build
@@ -90,12 +90,10 @@ libvulkan_freedreno = shared_library(
   include_directories : [
     inc_common,
     inc_compiler,
-    inc_vulkan_util,
     inc_vulkan_wsi,
     inc_freedreno,
   ],
   link_with : [
-    libvulkan_util,
     libvulkan_wsi,
     libmesa_util,
     libfreedreno_drm, # required by ir3_shader_get_variant, which we don't use
@@ -111,6 +109,7 @@ libvulkan_freedreno = shared_library(
     dep_valgrind,
     idep_nir,
     tu_deps,
+    idep_vulkan_util,
   ],
   c_args : [c_vis_args, no_override_init_args, tu_flags],
   link_args : [ld_args_bsymbolic, ld_args_gc_sections],
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c
index aae72c7a843..b2cf3fe7168 100644
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -897,7 +897,7 @@ static const VkQueueFamilyProperties tu_queue_family_properties = {
       VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
    .queueCount = 1,
    .timestampValidBits = 64,
-   .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
+   .minImageTransferGranularity = { 1, 1, 1 },
 };
 
 void
diff --git a/src/gallium/Android.common.mk b/src/gallium/Android.common.mk
index 782510ff0f4..0d55f04ac94 100644
--- a/src/gallium/Android.common.mk
+++ b/src/gallium/Android.common.mk
@@ -27,6 +27,9 @@ LOCAL_C_INCLUDES += \
 	$(GALLIUM_TOP)/include \
 	$(GALLIUM_TOP)/auxiliary \
 	$(GALLIUM_TOP)/winsys \
-	$(GALLIUM_TOP)/drivers
+	$(GALLIUM_TOP)/drivers \
+	$(MESA_TOP)/src/freedreno \
+	$(MESA_TOP)/src/freedreno/ir3 \
+	$(MESA_TOP)/src/freedreno/registers
 
 include $(MESA_COMMON_MK)
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 3fc096789c0..f8c69585e6a 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -950,6 +950,8 @@ draw_set_mapped_so_targets(struct draw_context *draw,
 {
    int i;
 
+   draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
    for (i = 0; i < num_targets; i++)
       draw->so.targets[i] = targets[i];
    for (i = num_targets; i < PIPE_MAX_SO_BUFFERS; i++)
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 6420cfbb261..c752163ee05 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -753,8 +753,10 @@ void draw_gs_destroy( struct draw_context *draw )
 {
    int i;
    if (draw->gs.tgsi.machine) {
-      for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++)
+      for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {
          align_free(draw->gs.tgsi.machine->Primitives[i]);
+         align_free(draw->gs.tgsi.machine->PrimitiveOffsets[i]);
+      }
       tgsi_exec_machine_destroy(draw->gs.tgsi.machine);
    }
 }
diff --git a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
index 9db0dc01117..f043bcf1ab3 100644
--- a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
+++ b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
@@ -37,4 +37,5 @@ DRI_CONF_SECTION_MISCELLANEOUS
    DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER("false")
    DRI_CONF_GLSL_ZERO_INIT("false")
    DRI_CONF_ALLOW_RGB10_CONFIGS("true")
+   DRI_CONF_ALLOW_FP16_CONFIGS("false")
 DRI_CONF_SECTION_END
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 15edf2f48dc..b6758fc2f9b 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -213,8 +213,10 @@ set_vertex_shader(struct blit_state *ctx)
     * fragment shader input semantics and vertex_element/buffers.
     */
    if (!ctx->vs) {
-      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
-                                      TGSI_SEMANTIC_GENERIC };
+      const enum tgsi_semantic semantic_names[] = {
+         TGSI_SEMANTIC_POSITION,
+         TGSI_SEMANTIC_GENERIC
+      };
       const uint semantic_indexes[] = { 0, 0 };
       ctx->vs = util_make_vertex_passthrough_shader(ctx->pipe, 2,
                                                     semantic_names,
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index e19fde9873d..3dc49cd0958 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -2059,7 +2059,8 @@ void util_blitter_generate_mipmap(struct blitter_context *blitter,
       target = PIPE_TEXTURE_2D_ARRAY;
 
    assert(tex->nr_samples <= 1);
-   assert(!util_format_has_stencil(desc));
+   /* Disallow stencil formats without depth. */
+   assert(!util_format_has_stencil(desc) || util_format_has_depth(desc));
 
    is_depth = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS;
 
diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c
index 22e6c8ce771..d8380b76bf6 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.c
+++ b/src/gallium/auxiliary/util/u_debug_symbol.c
@@ -219,7 +219,7 @@ debug_symbol_name_dbghelp(const void *addr, char* buf, unsigned size)
 #endif /* PIPE_OS_WINDOWS */
 
 
-#if defined(__GLIBC__) && !defined(__UCLIBC__)
+#if defined(HAVE_EXECINFO_H)
 
 #include <execinfo.h>
 
@@ -240,7 +240,7 @@ debug_symbol_name_glibc(const void *addr, char* buf, unsigned size)
    return TRUE;
 }
 
-#endif /* defined(__GLIBC__) && !defined(__UCLIBC__) */
+#endif /* defined(HAVE_EXECINFO_H) */
 
 
 void
@@ -252,11 +252,11 @@ debug_symbol_name(const void *addr, char* buf, unsigned size)
    }
 #endif
 
-#if defined(__GLIBC__) && !defined(__UCLIBC__)
+#if defined(HAVE_EXECINFO_H)
    if (debug_symbol_name_glibc(addr, buf, size)) {
        return;
    }
-#endif
+#endif /* defined(HAVE_EXECINFO_H) */
 
    util_snprintf(buf, size, "%p", addr);
    buf[size - 1] = 0;
diff --git a/src/gallium/auxiliary/util/u_dump_defines.c b/src/gallium/auxiliary/util/u_dump_defines.c
index 50dfa37bab7..41108c7248a 100644
--- a/src/gallium/auxiliary/util/u_dump_defines.c
+++ b/src/gallium/auxiliary/util/u_dump_defines.c
@@ -418,6 +418,7 @@ static const char *
 util_query_type_short_names[] = {
    "occlusion_counter",
    "occlusion_predicate",
+   "occlusion_predicate_conservative",
    "timestamp",
    "timestamp_disjoint",
    "time_elapsed",
@@ -425,6 +426,7 @@ util_query_type_short_names[] = {
    "primitives_emitted",
    "so_statistics",
    "so_overflow_predicate",
+   "so_overflow_any_predicate",
    "gpu_finished",
    "pipeline_statistics",
 };
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index 567d3d0c542..21b8c162b30 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -668,11 +668,13 @@ util_copy_image_view(struct pipe_image_view *dst,
       pipe_resource_reference(&dst->resource, src->resource);
       dst->format = src->format;
       dst->access = src->access;
+      dst->shader_access = src->shader_access;
       dst->u = src->u;
    } else {
       pipe_resource_reference(&dst->resource, NULL);
       dst->format = PIPE_FORMAT_NONE;
       dst->access = 0;
+      dst->shader_access = 0;
       memset(&dst->u, 0, sizeof(dst->u));
    }
 }
diff --git a/src/gallium/auxiliary/util/u_screen.c b/src/gallium/auxiliary/util/u_screen.c
index 410f17421e6..3f64e466d7f 100644
--- a/src/gallium/auxiliary/util/u_screen.c
+++ b/src/gallium/auxiliary/util/u_screen.c
@@ -282,6 +282,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen,
    case PIPE_CAP_TGSI_BALLOT:
    case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
    case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
+   case PIPE_CAP_TGSI_DIV:
       return 0;
 
    case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
@@ -325,7 +326,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen,
    case PIPE_CAP_TGSI_ATOMFADD:
    case PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS:
    case PIPE_CAP_IMAGE_LOAD_FORMATTED:
-   case PIPE_CAP_PREFER_COMPUTE_BLIT_FOR_MULTIMEDIA:
+   case PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA:
       return 0;
 
    case PIPE_CAP_MAX_GS_INVOCATIONS:
@@ -359,7 +360,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen,
       return 1;
 
    case PIPE_CAP_DMABUF:
-#ifdef PIPE_OS_LINUX
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD)
       return 1;
 #else
       return 0;
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 12c58ff7436..a5ab3e06b5e 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -70,7 +70,7 @@ init_shaders(struct vl_compositor *c)
       return false;
    }
 
-   if (c->pipe_compute_supported) {
+   if (c->pipe_cs_composit_supported) {
       c->cs_video_buffer = vl_compositor_cs_create_shader(c, compute_shader_video_buffer);
       if (!c->cs_video_buffer) {
          debug_printf("Unable to create video_buffer compute shader.\n");
@@ -125,7 +125,7 @@ static void cleanup_shaders(struct vl_compositor *c)
    c->pipe->delete_fs_state(c->pipe, c->fs_rgb_yuv.y);
    c->pipe->delete_fs_state(c->pipe, c->fs_rgb_yuv.uv);
 
-   if (c->pipe_compute_supported) {
+   if (c->pipe_cs_composit_supported) {
       c->pipe->delete_compute_state(c->pipe, c->cs_video_buffer);
       c->pipe->delete_compute_state(c->pipe, c->cs_weave_rgb);
       c->pipe->delete_compute_state(c->pipe, c->cs_rgba);
@@ -561,7 +561,7 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state *s,
       float half_a_line = 0.5f / s->layers[layer].zw.y;
       switch(deinterlace) {
       case VL_COMPOSITOR_WEAVE:
-         if (c->pipe_compute_supported)
+         if (c->pipe_cs_composit_supported)
             s->layers[layer].cs = c->cs_weave_rgb;
          else
             s->layers[layer].fs = c->fs_weave_rgb;
@@ -571,7 +571,7 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state *s,
          s->layers[layer].zw.x = 0.0f;
          s->layers[layer].src.tl.y += half_a_line;
          s->layers[layer].src.br.y += half_a_line;
-         if (c->pipe_compute_supported)
+         if (c->pipe_cs_composit_supported)
             s->layers[layer].cs = c->cs_video_buffer;
          else
             s->layers[layer].fs = c->fs_video_buffer;
@@ -581,7 +581,7 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state *s,
          s->layers[layer].zw.x = 1.0f;
          s->layers[layer].src.tl.y -= half_a_line;
          s->layers[layer].src.br.y -= half_a_line;
-         if (c->pipe_compute_supported)
+         if (c->pipe_cs_composit_supported)
             s->layers[layer].cs = c->cs_video_buffer;
          else
             s->layers[layer].fs = c->fs_video_buffer;
@@ -589,7 +589,7 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state *s,
       }
 
    } else {
-      if (c->pipe_compute_supported)
+      if (c->pipe_cs_composit_supported)
          s->layers[layer].cs = c->cs_video_buffer;
       else
          s->layers[layer].fs = c->fs_video_buffer;
@@ -757,7 +757,10 @@ vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe)
 
    memset(c, 0, sizeof(*c));
 
-   c->pipe_compute_supported = pipe->screen->get_param(pipe->screen, PIPE_CAP_COMPUTE);
+   c->pipe_cs_composit_supported = pipe->screen->get_param(pipe->screen, PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA) &&
+            pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_TEX_TXF_LZ) &&
+            pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_DIV);
+
    c->pipe = pipe;
 
    if (!init_pipe_state(c)) {
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index d42618824a7..ae06c153d5b 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -121,7 +121,7 @@ struct vl_compositor
    void *cs_weave_rgb;
    void *cs_rgba;
 
-   bool pipe_compute_supported;
+   bool pipe_cs_composit_supported;
 
    struct {
       struct {
diff --git a/src/gallium/auxiliary/vl/vl_compositor_cs.c b/src/gallium/auxiliary/vl/vl_compositor_cs.c
index 485b4174b8e..abd2aa63f32 100644
--- a/src/gallium/auxiliary/vl/vl_compositor_cs.c
+++ b/src/gallium/auxiliary/vl/vl_compositor_cs.c
@@ -61,7 +61,7 @@ const char *compute_shader_video_buffer =
       "IMM[0] UINT32 { 8, 8, 1, 0}\n"
       "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
 
-      "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
+      "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
 
       /* Drawn area check */
       "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
@@ -70,20 +70,20 @@ const char *compute_shader_video_buffer =
       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
 
-      "UIF TEMP[1]\n"
+      "UIF TEMP[1].xxxx\n"
          /* Translate */
-         "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
-         "U2F TEMP[2], TEMP[2]\n"
-         "DIV TEMP[3], TEMP[2], IMM[1].yyyy\n"
+         "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n"
+         "U2F TEMP[2].xy, TEMP[2].xyyy\n"
+         "DIV TEMP[3].xy, TEMP[2].xyyy, IMM[1].yyyy\n"
 
          /* Scale */
-         "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
-         "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n"
+         "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwww\n"
+         "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwww\n"
 
          /* Fetch texels */
-         "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n"
-         "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n"
-         "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n"
+         "TEX_LZ TEMP[4].x, TEMP[2].xyyy, SAMP[0], RECT\n"
+         "TEX_LZ TEMP[4].y, TEMP[3].xyyy, SAMP[1], RECT\n"
+         "TEX_LZ TEMP[4].z, TEMP[3].xyyy, SAMP[2], RECT\n"
 
          "MOV TEMP[4].w, IMM[1].xxxx\n"
 
@@ -93,12 +93,12 @@ const char *compute_shader_video_buffer =
          "DP4 TEMP[7].z, CONST[2], TEMP[4]\n"
 
          "MOV TEMP[5].w, TEMP[4].zzzz\n"
-         "SLE TEMP[6].w, TEMP[5], CONST[3].xxxx\n"
-         "SGT TEMP[5].w, TEMP[5], CONST[3].yyyy\n"
+         "SLE TEMP[6].w, TEMP[5].wwww, CONST[3].xxxx\n"
+         "SGT TEMP[5].w, TEMP[5].wwww, CONST[3].yyyy\n"
 
-         "MAX TEMP[7].w, TEMP[5], TEMP[6]\n"
+         "MAX TEMP[7].w, TEMP[5].wwww, TEMP[6].wwww\n"
 
-         "STORE IMAGE[0], TEMP[0], TEMP[7], 2D\n"
+         "STORE IMAGE[0], TEMP[0].xyyy, TEMP[7], 2D\n"
       "ENDIF\n"
 
       "END\n";
@@ -124,7 +124,7 @@ const char *compute_shader_weave =
       "IMM[2] UINT32 { 1, 2, 4, 0}\n"
       "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n"
 
-      "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
+      "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
 
       /* Drawn area check */
       "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
@@ -133,22 +133,22 @@ const char *compute_shader_weave =
       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
 
-      "UIF TEMP[1]\n"
-         "MOV TEMP[2], TEMP[0]\n"
+      "UIF TEMP[1].xxxx\n"
+         "MOV TEMP[2].xy, TEMP[0].xyyy\n"
          /* Translate */
-         "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n"
+         "UADD TEMP[2].xy, TEMP[2].xyyy, -CONST[5].xyxy\n"
 
          /* Top Y */
-         "U2F TEMP[2], TEMP[2]\n"
+         "U2F TEMP[2].xy, TEMP[2].xyyy\n"
          "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n"
          /* Down Y */
-         "MOV TEMP[12], TEMP[2]\n"
+         "MOV TEMP[12].xy, TEMP[2].xyyy\n"
 
          /* Top UV */
-         "MOV TEMP[3], TEMP[2]\n"
+         "MOV TEMP[3].xy, TEMP[2].xyyy\n"
          "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n"
          /* Down UV */
-         "MOV TEMP[13], TEMP[3]\n"
+         "MOV TEMP[13].xy, TEMP[3].xyyy\n"
 
          /* Texture offset */
          "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n"
@@ -162,10 +162,10 @@ const char *compute_shader_weave =
          "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n"
 
          /* Scale */
-         "DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n"
-         "DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n"
-         "DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n"
-         "DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n"
+         "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"
+         "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[3].zwzw\n"
+         "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwzw\n"
+         "DIV TEMP[13].xy, TEMP[13].xyyy, CONST[3].zwzw\n"
 
          /* Weave offset */
          "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n"
@@ -176,32 +176,32 @@ const char *compute_shader_weave =
          /* Texture layer */
          "MOV TEMP[14].x, TEMP[2].yyyy\n"
          "MOV TEMP[14].yz, TEMP[3].yyyy\n"
-         "ROUND TEMP[15], TEMP[14]\n"
-         "ADD TEMP[14], TEMP[14], -TEMP[15]\n"
-         "MOV TEMP[14], |TEMP[14]|\n"
-         "MUL TEMP[14], TEMP[14], IMM[1].yyyy\n"
+         "ROUND TEMP[15].xyz, TEMP[14].xyzz\n"
+         "ADD TEMP[14].xyz, TEMP[14].xyzz, -TEMP[15].xyzz\n"
+         "MOV TEMP[14].xyz, |TEMP[14].xyzz|\n"
+         "MUL TEMP[14].xyz, TEMP[14].xyzz, IMM[1].yyyy\n"
 
          /* Normalize */
-         "DIV TEMP[2].xy, TEMP[2], CONST[5].zwzw\n"
-         "DIV TEMP[12].xy, TEMP[12], CONST[5].zwzw\n"
+         "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[5].zwzw\n"
+         "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[5].zwzw\n"
          "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n"
-         "DIV TEMP[3].xy, TEMP[3], TEMP[15].xyxy\n"
-         "DIV TEMP[13].xy, TEMP[13], TEMP[15].xyxy\n"
+         "DIV TEMP[3].xy, TEMP[3].xyyy, TEMP[15].xyxy\n"
+         "DIV TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xyxy\n"
 
          /* Fetch texels */
          "MOV TEMP[2].z, IMM[1].wwww\n"
          "MOV TEMP[3].z, IMM[1].wwww\n"
-         "TEX_LZ TEMP[10].x, TEMP[2], SAMP[0], 2D_ARRAY\n"
-         "TEX_LZ TEMP[10].y, TEMP[3], SAMP[1], 2D_ARRAY\n"
-         "TEX_LZ TEMP[10].z, TEMP[3], SAMP[2], 2D_ARRAY\n"
+         "TEX_LZ TEMP[10].x, TEMP[2].xyzz, SAMP[0], 2D_ARRAY\n"
+         "TEX_LZ TEMP[10].y, TEMP[3].xyzz, SAMP[1], 2D_ARRAY\n"
+         "TEX_LZ TEMP[10].z, TEMP[3].xyzz, SAMP[2], 2D_ARRAY\n"
 
          "MOV TEMP[12].z, IMM[1].xxxx\n"
          "MOV TEMP[13].z, IMM[1].xxxx\n"
-         "TEX_LZ TEMP[11].x, TEMP[12], SAMP[0], 2D_ARRAY\n"
-         "TEX_LZ TEMP[11].y, TEMP[13], SAMP[1], 2D_ARRAY\n"
-         "TEX_LZ TEMP[11].z, TEMP[13], SAMP[2], 2D_ARRAY\n"
+         "TEX_LZ TEMP[11].x, TEMP[12].xyzz, SAMP[0], 2D_ARRAY\n"
+         "TEX_LZ TEMP[11].y, TEMP[13].xyzz, SAMP[1], 2D_ARRAY\n"
+         "TEX_LZ TEMP[11].z, TEMP[13].xyzz, SAMP[2], 2D_ARRAY\n"
 
-         "LRP TEMP[6], TEMP[14], TEMP[10], TEMP[11]\n"
+         "LRP TEMP[6].xyz, TEMP[14].xyzz, TEMP[10].xyzz, TEMP[11].xyzz\n"
          "MOV TEMP[6].w, IMM[1].xxxx\n"
 
          /* Color Space Conversion */
@@ -210,12 +210,12 @@ const char *compute_shader_weave =
          "DP4 TEMP[9].z, CONST[2], TEMP[6]\n"
 
          "MOV TEMP[7].w, TEMP[6].zzzz\n"
-         "SLE TEMP[8].w, TEMP[7], CONST[3].xxxx\n"
-         "SGT TEMP[7].w, TEMP[7], CONST[3].yyyy\n"
+         "SLE TEMP[8].w, TEMP[7].wwww, CONST[3].xxxx\n"
+         "SGT TEMP[7].w, TEMP[7].wwww, CONST[3].yyyy\n"
 
-         "MAX TEMP[9].w, TEMP[7], TEMP[8]\n"
+         "MAX TEMP[9].w, TEMP[7].wwww, TEMP[8].wwww\n"
 
-         "STORE IMAGE[0], TEMP[0], TEMP[9], 2D\n"
+         "STORE IMAGE[0], TEMP[0].xyyy, TEMP[9], 2D\n"
       "ENDIF\n"
 
       "END\n";
@@ -239,7 +239,7 @@ const char *compute_shader_rgba =
       "IMM[0] UINT32 { 8, 8, 1, 0}\n"
       "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n"
 
-      "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n"
+      "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n"
 
       /* Drawn area check */
       "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n"
@@ -248,18 +248,18 @@ const char *compute_shader_rgba =
       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n"
       "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n"
 
-      "UIF TEMP[1]\n"
+      "UIF TEMP[1].xxxx\n"
          /* Translate */
-         "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n"
-         "U2F TEMP[2], TEMP[2]\n"
+         "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n"
+         "U2F TEMP[2].xy, TEMP[2].xyyy\n"
 
          /* Scale */
-         "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n"
+         "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n"
 
          /* Fetch texels */
-         "TEX_LZ TEMP[3], TEMP[2], SAMP[0], RECT\n"
+         "TEX_LZ TEMP[3], TEMP[2].xyyy, SAMP[0], RECT\n"
 
-         "STORE IMAGE[0], TEMP[0], TEMP[3], 2D\n"
+         "STORE IMAGE[0], TEMP[0].xyyy, TEMP[3], 2D\n"
       "ENDIF\n"
 
       "END\n";
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 57554d0681d..690e534d206 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -521,8 +521,9 @@ The integer capabilities:
   execution. 0 = throttling is disabled.
 * ``PIPE_CAP_DMABUF``: Whether Linux DMABUF handles are supported by
   resource_from_handle and resource_get_handle.
-* ``PIPE_CAP_PREFER_COMPUTE_BLIT_FOR_MULTIMEDIA``: Whether VDPAU, VAAPI, and
-  OpenMAX should use a compute-based blit instead of pipe_context::blit.
+* ``PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA``: Whether VDPAU, VAAPI, and
+  OpenMAX should use a compute-based blit instead of pipe_context::blit and compute pipeline for compositing images.
+* ``PIPE_CAP_TGSI_DIV``: Whether opcode DIV is supported
 
 .. _pipe_capf:
 
diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c b/src/gallium/drivers/etnaviv/etnaviv_emit.c
index ed7b7ee3cb8..ee54daabf3e 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_emit.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c
@@ -577,12 +577,12 @@ etna_emit_state(struct etna_context *ctx)
    static const uint32_t uniform_dirty_bits =
       ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;
 
-   if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
+   if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
       etna_uniforms_write(
          ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX],
          ctx->shader_state.VS_UNIFORMS, &ctx->shader_state.vs_uniforms_size);
 
-   if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
+   if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
       etna_uniforms_write(
          ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT],
          ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size);
diff --git a/src/gallium/drivers/etnaviv/etnaviv_resource.c b/src/gallium/drivers/etnaviv/etnaviv_resource.c
index 83179d3cd08..ab77a80c72b 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
@@ -622,6 +622,7 @@ etna_resource_get_handle(struct pipe_screen *pscreen,
       rsc = etna_resource(rsc->external);
 
    handle->stride = rsc->levels[0].stride;
+   handle->offset = rsc->levels[0].offset;
    handle->modifier = layout_to_modifier(rsc->layout);
 
    if (handle->type == WINSYS_HANDLE_TYPE_SHARED) {
diff --git a/src/gallium/drivers/freedreno/Android.gen.mk b/src/gallium/drivers/freedreno/Android.gen.mk
index 17b6fbe1b7e..d29ba159d5c 100644
--- a/src/gallium/drivers/freedreno/Android.gen.mk
+++ b/src/gallium/drivers/freedreno/Android.gen.mk
@@ -25,7 +25,7 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
 endif
 
 ir3_nir_trig_deps := \
-	$(LOCAL_PATH)/ir3/ir3_nir_trig.py \
+	$(MESA_TOP)/src/freedreno/ir3/ir3_nir_trig.py \
 	$(MESA_TOP)/src/compiler/nir/nir_algebraic.py
 
 intermediates := $(call local-generated-sources-dir)
diff --git a/src/gallium/drivers/freedreno/Android.mk b/src/gallium/drivers/freedreno/Android.mk
index ccd88a7d16c..f0ae361cd6a 100644
--- a/src/gallium/drivers/freedreno/Android.mk
+++ b/src/gallium/drivers/freedreno/Android.mk
@@ -44,7 +44,7 @@ LOCAL_C_INCLUDES := \
 LOCAL_GENERATED_SOURCES := $(MESA_GEN_NIR_H)
 
 LOCAL_SHARED_LIBRARIES := libdrm
-LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_nir
+LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_nir libfreedreno_drm libfreedreno_ir3
 LOCAL_MODULE := libmesa_pipe_freedreno
 
 include $(LOCAL_PATH)/Android.gen.mk
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
index 59dcaa4bf6b..878f67afba0 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -79,6 +79,7 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 		return NULL;
 
 	pctx = &fd3_ctx->base.base;
+	pctx->screen = pscreen;
 
 	fd3_ctx->base.dev = fd_device_ref(screen->dev);
 	fd3_ctx->base.screen = fd_screen(pscreen);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
index e9730e9c209..896050918dc 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
@@ -79,6 +79,7 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 		return NULL;
 
 	pctx = &fd4_ctx->base.base;
+	pctx->screen = pscreen;
 
 	fd4_ctx->base.dev = fd_device_ref(screen->dev);
 	fd4_ctx->base.screen = fd_screen(pscreen);
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c b/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c
index da76afdfa60..8a829759cdc 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c
@@ -482,6 +482,7 @@ fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
 	batch->needs_flush = true;
 
 	fd_batch_flush(batch, false, false);
+	fd_batch_reference(&batch, NULL);
 
 	return true;
 }
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_context.c b/src/gallium/drivers/freedreno/a5xx/fd5_context.c
index 37e02c61b1f..211d2b5fee5 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_context.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_context.c
@@ -78,6 +78,7 @@ fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 		return NULL;
 
 	pctx = &fd5_ctx->base.base;
+	pctx->screen = pscreen;
 
 	fd5_ctx->base.dev = fd_device_ref(screen->dev);
 	fd5_ctx->base.screen = fd_screen(pscreen);
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_draw.h b/src/gallium/drivers/freedreno/a5xx/fd5_draw.h
index c0d50b29cfd..119ecd4d3ad 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_draw.h
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_draw.h
@@ -98,8 +98,7 @@ fd5_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring,
 
 		if (info->index_size) {
 			struct pipe_resource *idx = info->index.resource;
-			unsigned max_indicies = (idx->width0 - info->indirect->offset) /
-					info->index_size;
+			unsigned max_indicies = idx->width0 / info->index_size;
 
 			OUT_PKT7(ring, CP_DRAW_INDX_INDIRECT, 6);
 			OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_DMA,
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.c b/src/gallium/drivers/freedreno/a6xx/fd6_context.c
index 7ba0926e3a0..4f696ef092c 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_context.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.c
@@ -84,6 +84,7 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 		return NULL;
 
 	pctx = &fd6_ctx->base.base;
+	pctx->screen = pscreen;
 
 	fd6_ctx->base.dev = fd_device_ref(screen->dev);
 	fd6_ctx->base.screen = fd_screen(pscreen);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index dfa0944cedb..f70963ef9bd 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -227,15 +227,16 @@ setup_border_colors(struct fd_texture_stateobj *tex, struct bcolor_entry *entrie
 			/*
 			 * HACK: for PIPE_FORMAT_X24S8_UINT we end up w/ the
 			 * stencil border color value in bc->ui[0] but according
-			 * to desc->swizzle and desc->channel, the .x component
+			 * to desc->swizzle and desc->channel, the .x/.w component
 			 * is NONE and the stencil value is in the y component.
-			 * Meanwhile the hardware wants this in the .x componetn.
+			 * Meanwhile the hardware wants this in the .w component
+			 * for x24s8 and the .x component for x32_s8x24.
 			 */
 			if ((format == PIPE_FORMAT_X24S8_UINT) ||
 					(format == PIPE_FORMAT_X32_S8X24_UINT)) {
 				if (j == 0) {
 					c = 1;
-					cd = 0;
+					cd = (format == PIPE_FORMAT_X32_S8X24_UINT) ? 0 : 3;
 				} else {
 					continue;
 				}
@@ -1168,14 +1169,14 @@ t7              opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
 
 	WRITE(REG_A6XX_VPC_UNKNOWN_9600, 0);
 	WRITE(REG_A6XX_GRAS_UNKNOWN_8600, 0x880);
-	WRITE(REG_A6XX_HLSQ_UNKNOWN_BE04, 0);
-	WRITE(REG_A6XX_SP_UNKNOWN_AE03, 0x00000410);
+	WRITE(REG_A6XX_HLSQ_UNKNOWN_BE04, 0x80000);
+	WRITE(REG_A6XX_SP_UNKNOWN_AE03, 0x1430);
 	WRITE(REG_A6XX_SP_IBO_COUNT, 0);
 	WRITE(REG_A6XX_SP_UNKNOWN_B182, 0);
 	WRITE(REG_A6XX_HLSQ_UNKNOWN_BB11, 0);
 	WRITE(REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000);
 	WRITE(REG_A6XX_UCHE_CLIENT_PF, 4);
-	WRITE(REG_A6XX_RB_UNKNOWN_8E01, 0x0);
+	WRITE(REG_A6XX_RB_UNKNOWN_8E01, 0x1);
 	WRITE(REG_A6XX_SP_UNKNOWN_AB00, 0x5);
 	WRITE(REG_A6XX_VFD_UNKNOWN_A009, 0x00000001);
 	WRITE(REG_A6XX_RB_UNKNOWN_8811, 0x00000010);
@@ -1186,7 +1187,7 @@ t7              opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
 
 	WRITE(REG_A6XX_GRAS_UNKNOWN_8101, 0);
 	WRITE(REG_A6XX_GRAS_SAMPLE_CNTL, 0);
-	WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0);
+	WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0x2);
 
 	WRITE(REG_A6XX_RB_RENDER_CONTROL0, 0x401);
 	WRITE(REG_A6XX_RB_RENDER_CONTROL1, 0);
@@ -1232,7 +1233,10 @@ t7              opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
 	WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0);
 	WRITE(REG_A6XX_VPC_UNKNOWN_9108, 0x3);
 	WRITE(REG_A6XX_SP_TP_UNKNOWN_B304, 0);
-	WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2);
+	/* NOTE blob seems to (mostly?) use 0xb2 for SP_TP_UNKNOWN_B309
+	 * but this seems to kill texture gather offsets.
+	 */
+	WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0xa2);
 	WRITE(REG_A6XX_RB_UNKNOWN_8804, 0);
 	WRITE(REG_A6XX_GRAS_UNKNOWN_80A4, 0);
 	WRITE(REG_A6XX_GRAS_UNKNOWN_80A5, 0);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_format.c b/src/gallium/drivers/freedreno/a6xx/fd6_format.c
index 9448ff18e54..fb480266b0c 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_format.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_format.c
@@ -216,7 +216,7 @@ static struct fd6_format formats[PIPE_FORMAT_COUNT] = {
 	_T(R9G9B9E5_FLOAT,  9_9_9_E5_FLOAT, NONE,            WZYX),
 
 	_T(Z24X8_UNORM,       X8Z24_UNORM,  X8Z24_UNORM,   WZYX),
-	_T(X24S8_UINT,        8_8_8_8_UINT, X8Z24_UNORM,   XYZW),  // XXX
+	_T(X24S8_UINT,        8_8_8_8_UINT, X8Z24_UNORM,   WZYX),
 	_T(Z24_UNORM_S8_UINT, X8Z24_UNORM,  X8Z24_UNORM,   WZYX),
 	_T(Z32_FLOAT,         32_FLOAT,     R32_FLOAT,     WZYX),
 	_T(Z32_FLOAT_S8X24_UINT, 32_FLOAT,  R32_FLOAT,     WZYX),
@@ -450,7 +450,7 @@ fd6_tex_swiz(enum pipe_format format, unsigned char *swiz,
 	 */
 	if (format == PIPE_FORMAT_X24S8_UINT) {
 		const unsigned char stencil_swiz[4] = {
-			PIPE_SWIZZLE_X, PIPE_SWIZZLE_X, PIPE_SWIZZLE_X, PIPE_SWIZZLE_X
+			PIPE_SWIZZLE_W, PIPE_SWIZZLE_W, PIPE_SWIZZLE_W, PIPE_SWIZZLE_W
 		};
 		util_format_compose_swizzles(stencil_swiz, uswiz, swiz);
 	} else if (fd6_pipe2swap(format) != WZYX) {
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
index a00e4446333..ebdfd5b8923 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
@@ -214,6 +214,12 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
 			OUT_RING(ring, 0x00000000);
 		}
 
+		/* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE
+		 * plus this CP_EVENT_WRITE at the end in it's own IB..
+		 */
+		OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+		OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25));
+
 		if (rsc->stencil) {
 			struct fd_resource_slice *slice = fd_resource_slice(rsc->stencil, 0);
 			stride = slice->pitch * rsc->stencil->cpp;
@@ -402,7 +408,6 @@ set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag)
 static void
 emit_binning_pass(struct fd_batch *batch)
 {
-	struct fd_context *ctx = batch->ctx;
 	struct fd_ringbuffer *ring = batch->gmem;
 	struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
 
@@ -463,12 +468,22 @@ emit_binning_pass(struct fd_batch *batch)
 	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 	OUT_RING(ring, UNK_2D);
 
-	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
-	OUT_RING(ring, CACHE_FLUSH_TS);
-	OUT_RELOCW(ring, fd6_context(ctx)->blit_mem, 0, 0, 0);  /* ADDR_LO/HI */
-	OUT_RING(ring, 0x00000000);
-
+	fd6_cache_inv(batch, ring);
+	fd6_cache_flush(batch, ring);
 	fd_wfi(batch, ring);
+
+	OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+
+	OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+	OUT_RING(ring, 0x0);
+
+	OUT_PKT7(ring, CP_SET_MODE, 1);
+	OUT_RING(ring, 0x0);
+
+	OUT_WFI5(ring);
+
+	OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
+	OUT_RING(ring, 0x7c400004);        /* RB_CCU_CNTL */
 }
 
 static void
@@ -544,6 +559,15 @@ fd6_emit_tile_init(struct fd_batch *batch)
 
 		OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
 		OUT_RING(ring, 0x0);
+
+		OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1);
+		OUT_RING(ring, 0x1);
+
+		OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1);
+		OUT_RING(ring, 0x1);
+
+		OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+		OUT_RING(ring, 0x1);
 	} else {
 		set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
 		patch_draws(batch, IGNORE_VISIBILITY);
@@ -580,9 +604,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
 	struct fd6_context *fd6_ctx = fd6_context(ctx);
 	struct fd_ringbuffer *ring = batch->gmem;
 
-	OUT_PKT7(ring, CP_SET_MARKER, 1);
-	OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7));
-
 	emit_marker6(ring, 7);
 	OUT_PKT7(ring, CP_SET_MARKER, 1);
 	OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10);
@@ -595,8 +616,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
 
 	set_scissor(ring, x1, y1, x2, y2);
 
-	set_window_offset(ring, x1, y1);
-
 	OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1);
 	OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
 
@@ -620,7 +639,32 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile)
 				(tile->p * 4) + (32 * A6XX_VSC_DATA_PITCH), 0, 0);
 		OUT_RELOC(ring, fd6_ctx->vsc_data2,
 				(tile->p * A6XX_VSC_DATA2_PITCH), 0, 0);
+
+		set_window_offset(ring, x1, y1);
+
+		struct fd_gmem_stateobj *gmem = &batch->ctx->gmem;
+		set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
+
+		OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1);
+		OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
+
+		OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
+		OUT_RING(ring, 0x0);
+
+		OUT_PKT7(ring, CP_SET_MODE, 1);
+		OUT_RING(ring, 0x0);
+
+		OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8804, 1);
+		OUT_RING(ring, 0x0);
+
+		OUT_PKT4(ring, REG_A6XX_SP_TP_UNKNOWN_B304, 1);
+		OUT_RING(ring, 0x0);
+
+		OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_80A4, 1);
+		OUT_RING(ring, 0x0);
 	} else {
+		set_window_offset(ring, x1, y1);
+
 		OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
 		OUT_RING(ring, 0x1);
 
@@ -640,6 +684,13 @@ set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring)
 	blit_scissor.maxx = MIN2(pfb->width, batch->max_scissor.maxx);
 	blit_scissor.maxy = MIN2(pfb->height, batch->max_scissor.maxy);
 
+	/* NOTE: blob switches to CP_BLIT instead of CP_EVENT_WRITE:BLIT for
+	 * small render targets.  But since we align pitch to binw I think
+	 * we can get away avoiding GPU hangs a simpler way, by just rounding
+	 * up the blit scissor:
+	 */
+	blit_scissor.maxx = MAX2(blit_scissor.maxx, batch->ctx->screen->gmem_alignw);
+
 	OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
 	OUT_RING(ring,
 			 A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) |
@@ -1021,26 +1072,6 @@ prepare_tile_fini_ib(struct fd_batch *batch)
 			FD_RINGBUFFER_STREAMING);
 	ring = batch->tile_fini;
 
-	if (use_hw_binning(batch)) {
-		OUT_PKT7(ring, CP_SET_MARKER, 1);
-		OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10);
-	}
-
-	OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
-	OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
-			CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
-			CP_SET_DRAW_STATE__0_GROUP_ID(0));
-	OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
-	OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
-
-	OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
-	OUT_RING(ring, 0x0);
-
-	emit_marker6(ring, 7);
-	OUT_PKT7(ring, CP_SET_MARKER, 1);
-	OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10);
-	emit_marker6(ring, 7);
-
 	set_blit_scissor(batch, ring);
 
 	if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
@@ -1074,7 +1105,32 @@ prepare_tile_fini_ib(struct fd_batch *batch)
 static void
 fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile)
 {
-	fd6_emit_ib(batch->gmem, batch->tile_fini);
+	struct fd_ringbuffer *ring = batch->gmem;
+
+	if (use_hw_binning(batch)) {
+		OUT_PKT7(ring, CP_SET_MARKER, 1);
+		OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10);
+	}
+
+	OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+	OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
+			CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
+			CP_SET_DRAW_STATE__0_GROUP_ID(0));
+	OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
+	OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
+
+	OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
+	OUT_RING(ring, 0x0);
+
+	emit_marker6(ring, 7);
+	OUT_PKT7(ring, CP_SET_MARKER, 1);
+	OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10);
+	emit_marker6(ring, 7);
+
+	fd6_emit_ib(ring, batch->tile_fini);
+
+	OUT_PKT7(ring, CP_SET_MARKER, 1);
+	OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7));
 }
 
 static void
diff --git a/src/gallium/drivers/iris/iris_fence.c b/src/gallium/drivers/iris/iris_fence.c
index 06452f70966..f94a52724cf 100644
--- a/src/gallium/drivers/iris/iris_fence.c
+++ b/src/gallium/drivers/iris/iris_fence.c
@@ -205,24 +205,25 @@ iris_fence_await(struct pipe_context *ctx,
 #define MSEC_PER_SEC (1000)
 
 static uint64_t
-rel2abs(uint64_t timeout)
+gettime_ns(void)
 {
-   struct timespec ts;
-   uint64_t now;
+   struct timespec current;
+   clock_gettime(CLOCK_MONOTONIC, &current);
+   return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
+}
 
-   if (!timeout)
+static uint64_t
+rel2abs(uint64_t timeout)
+{
+   if (timeout == 0)
       return 0;
 
-   if (timeout == PIPE_TIMEOUT_INFINITE)
-      return INT64_MAX;
-
-   clock_gettime(CLOCK_MONOTONIC, &ts);
-   now = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+   uint64_t current_time = gettime_ns();
+   uint64_t max_timeout = (uint64_t) INT64_MAX - current_time;
 
-   if (now > INT64_MAX - timeout)
-      return INT64_MAX;
+   timeout = MIN2(max_timeout, timeout);
 
-   return now + timeout;
+   return current_time + timeout;
 }
 
 static boolean
@@ -243,7 +244,7 @@ iris_fence_finish(struct pipe_screen *p_screen,
    struct drm_syncobj_wait args = {
       .handles = (uintptr_t)handles,
       .count_handles = fence->count,
-      .timeout_nsec = rel2abs(timeout), /* XXX */
+      .timeout_nsec = rel2abs(timeout),
       .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL
    };
    return drm_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args) == 0;
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
index 30ec3f1ff86..7512889c47b 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -468,7 +468,8 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
             if (load->src[0].ssa == temp_ubo_name) {
                nir_instr_rewrite_src(instr, &load->src[0],
                                      nir_src_for_ssa(nir_imm_int(&b, 0)));
-            } else if (nir_src_as_uint(load->src[0]) == 0) {
+            } else if (nir_src_is_const(load->src[0]) &&
+                       nir_src_as_uint(load->src[0]) == 0) {
                nir_ssa_def *offset =
                   nir_iadd(&b, load->src[1].ssa,
                            nir_imm_int(&b, 4 * num_system_values));
diff --git a/src/gallium/drivers/iris/iris_program_cache.c b/src/gallium/drivers/iris/iris_program_cache.c
index 849f96906ca..e82375f0825 100644
--- a/src/gallium/drivers/iris/iris_program_cache.c
+++ b/src/gallium/drivers/iris/iris_program_cache.c
@@ -45,7 +45,7 @@
 #include "iris_resource.h"
 
 struct keybox {
-   uint8_t size;
+   uint16_t size;
    enum iris_program_cache_id cache_id;
    uint8_t data[0];
 };
diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c
index fd50139defc..343339685b7 100644
--- a/src/gallium/drivers/iris/iris_resource.c
+++ b/src/gallium/drivers/iris/iris_resource.c
@@ -1003,6 +1003,7 @@ iris_map_copy_region(struct iris_transfer *map)
       .nr_samples = xfer->resource->nr_samples,
       .nr_storage_samples = xfer->resource->nr_storage_samples,
       .array_size = box->depth,
+      .format = res->internal_format,
    };
 
    if (xfer->resource->target == PIPE_BUFFER)
@@ -1012,22 +1013,6 @@ iris_map_copy_region(struct iris_transfer *map)
    else
       templ.target = PIPE_TEXTURE_2D;
 
-   /* Depth, stencil, and ASTC can't be linear surfaces, so we can't use
-    * xfer->resource->format directly.  Pick a bpb compatible format so
-    * resource creation will succeed; blorp_copy will override it anyway.
-    */
-   switch (util_format_get_blocksizebits(res->internal_format)) {
-   case 8:   templ.format = PIPE_FORMAT_R8_UINT;           break;
-   case 16:  templ.format = PIPE_FORMAT_R8G8_UINT;         break;
-   case 24:  templ.format = PIPE_FORMAT_R8G8B8_UINT;       break;
-   case 32:  templ.format = PIPE_FORMAT_R8G8B8A8_UINT;     break;
-   case 48:  templ.format = PIPE_FORMAT_R16G16B16_UINT;    break;
-   case 64:  templ.format = PIPE_FORMAT_R16G16B16A16_UINT; break;
-   case 96:  templ.format = PIPE_FORMAT_R32G32B32_UINT;    break;
-   case 128: templ.format = PIPE_FORMAT_R32G32B32A32_UINT; break;
-   default: unreachable("Invalid bpb");
-   }
-
    map->staging = iris_resource_create(pscreen, &templ);
    assert(map->staging);
 
@@ -1443,6 +1428,10 @@ iris_transfer_map(struct pipe_context *ctx,
       no_gpu = true;
    }
 
+   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
+   if (fmtl->txc == ISL_TXC_ASTC)
+      no_gpu = true;
+
    if ((map_would_stall || res->aux.usage == ISL_AUX_USAGE_CCS_E) && !no_gpu) {
       /* If we need a synchronous mapping and the resource is busy,
        * we copy to/from a linear temporary buffer using the GPU.
@@ -1586,6 +1575,8 @@ iris_flush_and_dirty_for_history(struct iris_context *ice,
       flush |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
 
    iris_emit_pipe_control_flush(batch, flush);
+
+   iris_dirty_for_history(ice, res);
 }
 
 bool
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index 5b321a6f862..30d32b2ed43 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -179,6 +179,7 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
    case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
    case PIPE_CAP_INVALIDATE_BUFFER:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
       return true;
    case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE:
    case PIPE_CAP_TGSI_FS_FBFETCH:
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 677fa5aba53..f1b7e631892 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -631,6 +631,7 @@ iris_emit_l3_config(struct iris_batch *batch, const struct gen_l3_config *cfg,
        * desirable behavior.
        */
       reg.ErrorDetectionBehaviorControl = true;
+      reg.UseFullWays = true;
 #endif
       reg.URBAllocation = cfg->n[GEN_L3P_URB];
       reg.ROAllocation = cfg->n[GEN_L3P_RO];
@@ -733,20 +734,12 @@ iris_init_render_context(struct iris_screen *screen,
       }
       iris_emit_lri(batch, HALF_SLICE_CHICKEN7, reg_val);
 
-      /* WA_2204188704: Pixel Shader Panic dispatch must be disabled. */
-      iris_pack_state(GENX(COMMON_SLICE_CHICKEN3), &reg_val, reg) {
-         reg.PSThreadPanicDispatch = 0x3;
-         reg.PSThreadPanicDispatchMask = 0x3;
-      }
-      iris_emit_lri(batch, COMMON_SLICE_CHICKEN3, reg_val);
-
       iris_pack_state(GENX(SLICE_COMMON_ECO_CHICKEN1), &reg_val, reg) {
          reg.StateCacheRedirectToCSSectionEnable = true;
          reg.StateCacheRedirectToCSSectionEnableMask = true;
       }
       iris_emit_lri(batch, SLICE_COMMON_ECO_CHICKEN1, reg_val);
 
-
       // XXX: 3D_MODE?
 #endif
 
@@ -1901,7 +1894,8 @@ iris_create_surface(struct pipe_context *ctx,
       return NULL;
    }
 
-   surf->view = (struct isl_view) {
+   struct isl_view *view = &surf->view;
+   *view = (struct isl_view) {
       .format = fmt.fmt,
       .base_level = tmpl->u.tex.level,
       .levels = 1,
@@ -1925,15 +1919,98 @@ iris_create_surface(struct pipe_context *ctx,
    if (!unlikely(map))
       return NULL;
 
-   unsigned aux_modes = res->aux.possible_usages;
-   while (aux_modes) {
-      enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes);
+   if (!isl_format_is_compressed(res->surf.format)) {
+      /* This is a normal surface.  Fill out a SURFACE_STATE for each possible
+       * auxiliary surface mode and return the pipe_surface.
+       */
+      unsigned aux_modes = res->aux.possible_usages;
+      while (aux_modes) {
+         enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes);
+
+         fill_surface_state(&screen->isl_dev, map, res, view, aux_usage);
 
-      fill_surface_state(&screen->isl_dev, map, res, &surf->view, aux_usage);
+         map += SURFACE_STATE_ALIGNMENT;
+      }
 
-      map += SURFACE_STATE_ALIGNMENT;
+      return psurf;
+   }
+
+   /* The resource has a compressed format, which is not renderable, but we
+    * have a renderable view format.  We must be attempting to upload blocks
+    * of compressed data via an uncompressed view.
+    *
+    * In this case, we can assume there are no auxiliary buffers, a single
+    * miplevel, and that the resource is single-sampled.  Gallium may try
+    * and create an uncompressed view with multiple layers, however.
+    */
+   assert(!isl_format_is_compressed(fmt.fmt));
+   assert(res->aux.possible_usages == 1 << ISL_AUX_USAGE_NONE);
+   assert(res->surf.samples == 1);
+   assert(view->levels == 1);
+
+   struct isl_surf isl_surf;
+   uint32_t offset_B = 0, tile_x_sa = 0, tile_y_sa = 0;
+
+   if (view->base_level > 0) {
+      /* We can't rely on the hardware's miplevel selection with such
+       * a substantial lie about the format, so we select a single image
+       * using the Tile X/Y Offset fields.  In this case, we can't handle
+       * multiple array slices.
+       *
+       * On Broadwell, HALIGN and VALIGN are specified in pixels and are
+       * hard-coded to align to exactly the block size of the compressed
+       * texture.  This means that, when reinterpreted as a non-compressed
+       * texture, the tile offsets may be anything and we can't rely on
+       * X/Y Offset.
+       *
+       * Return NULL to force the state tracker to take fallback paths.
+       */
+      if (view->array_len > 1 || GEN_GEN == 8)
+         return NULL;
+
+      const bool is_3d = res->surf.dim == ISL_SURF_DIM_3D;
+      isl_surf_get_image_surf(&screen->isl_dev, &res->surf,
+                              view->base_level,
+                              is_3d ? 0 : view->base_array_layer,
+                              is_3d ? view->base_array_layer : 0,
+                              &isl_surf,
+                              &offset_B, &tile_x_sa, &tile_y_sa);
+
+      /* We use address and tile offsets to access a single level/layer
+       * as a subimage, so reset level/layer so it doesn't offset again.
+       */
+      view->base_array_layer = 0;
+      view->base_level = 0;
+   } else {
+      /* Level 0 doesn't require tile offsets, and the hardware can find
+       * array slices using QPitch even with the format override, so we
+       * can allow layers in this case.  Copy the original ISL surface.
+       */
+      memcpy(&isl_surf, &res->surf, sizeof(isl_surf));
    }
 
+   /* Scale down the image dimensions by the block size. */
+   const struct isl_format_layout *fmtl =
+      isl_format_get_layout(res->surf.format);
+   isl_surf.format = fmt.fmt;
+   isl_surf.logical_level0_px = isl_surf_get_logical_level0_el(&isl_surf);
+   isl_surf.phys_level0_sa = isl_surf_get_phys_level0_el(&isl_surf);
+   tile_x_sa /= fmtl->bw;
+   tile_y_sa /= fmtl->bh;
+
+   psurf->width = isl_surf.logical_level0_px.width;
+   psurf->height = isl_surf.logical_level0_px.height;
+
+   struct isl_surf_fill_state_info f = {
+      .surf = &isl_surf,
+      .view = view,
+      .mocs = mocs(res->bo),
+      .address = res->bo->gtt_offset + offset_B,
+      .x_offset_sa = tile_x_sa,
+      .y_offset_sa = tile_y_sa,
+   };
+
+   isl_surf_fill_state_s(&screen->isl_dev, map, &f);
    return psurf;
 }
 
@@ -3863,7 +3940,7 @@ surf_state_offset_for_aux(struct iris_resource *res,
                           enum isl_aux_usage aux_usage)
 {
    return SURFACE_STATE_ALIGNMENT *
-          util_bitcount(res->aux.possible_usages & ((1 << aux_usage) - 1));
+          util_bitcount(aux_modes & ((1 << aux_usage) - 1));
 }
 
 static void
@@ -3893,11 +3970,12 @@ update_clear_value(struct iris_context *ice,
                    struct iris_batch *batch,
                    struct iris_resource *res,
                    struct iris_state_ref *state,
-                   unsigned aux_modes,
+                   unsigned all_aux_modes,
                    struct isl_view *view)
 {
    struct iris_screen *screen = batch->screen;
    const struct gen_device_info *devinfo = &screen->devinfo;
+   UNUSED unsigned aux_modes = all_aux_modes;
 
    /* We only need to update the clear color in the surface state for gen8 and
     * gen9. Newer gens can read it directly from the clear color state buffer.
@@ -3912,13 +3990,13 @@ update_clear_value(struct iris_context *ice,
       while (aux_modes) {
          enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes);
 
-         surf_state_update_clear_value(batch, res, state, aux_modes,
+         surf_state_update_clear_value(batch, res, state, all_aux_modes,
                                        aux_usage);
       }
    } else if (devinfo->gen == 8) {
       pipe_resource_reference(&state->res, NULL);
       void *map = alloc_surface_states(ice->state.surface_uploader,
-                                       state, res->aux.possible_usages);
+                                       state, all_aux_modes);
       while (aux_modes) {
          enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes);
          fill_surface_state(&screen->isl_dev, map, res, view, aux_usage);
diff --git a/src/gallium/drivers/lima/lima_draw.c b/src/gallium/drivers/lima/lima_draw.c
index e0a1e181937..22910fbe72b 100644
--- a/src/gallium/drivers/lima/lima_draw.c
+++ b/src/gallium/drivers/lima/lima_draw.c
@@ -1438,6 +1438,7 @@ lima_pack_wb_zsbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx)
 {
    struct lima_context_framebuffer *fb = &ctx->framebuffer;
    struct lima_resource *res = lima_resource(fb->base.zsbuf->texture);
+   int level = fb->base.zsbuf->u.tex.level;
 
    uint32_t format;
 
@@ -1455,14 +1456,14 @@ lima_pack_wb_zsbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx)
 
    struct lima_pp_wb_reg *wb = (void *)wb_reg;
    wb[wb_idx].type = 0x01; /* 1 for depth, stencil */
-   wb[wb_idx].address = res->bo->va;
+   wb[wb_idx].address = res->bo->va + res->levels[level].offset;
    wb[wb_idx].pixel_format = format;
    if (res->tiled) {
       wb[wb_idx].pixel_layout = 0x2;
       wb[wb_idx].pitch = fb->tiled_w;
    } else {
       wb[wb_idx].pixel_layout = 0x0;
-      wb[wb_idx].pitch = res->levels[0].stride / 8;
+      wb[wb_idx].pitch = res->levels[level].stride / 8;
    }
    wb[wb_idx].mrt_bits = 0;
 }
@@ -1472,6 +1473,7 @@ lima_pack_wb_cbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx)
 {
    struct lima_context_framebuffer *fb = &ctx->framebuffer;
    struct lima_resource *res = lima_resource(fb->base.cbufs[0]->texture);
+   int level = fb->base.cbufs[0]->u.tex.level;
 
    bool swap_channels = false;
    switch (fb->base.cbufs[0]->format) {
@@ -1485,14 +1487,14 @@ lima_pack_wb_cbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx)
 
    struct lima_pp_wb_reg *wb = (void *)wb_reg;
    wb[wb_idx].type = 0x02; /* 2 for color buffer */
-   wb[wb_idx].address = res->bo->va;
+   wb[wb_idx].address = res->bo->va + res->levels[level].offset;
    wb[wb_idx].pixel_format = LIMA_PIXEL_FORMAT_B8G8R8A8;
    if (res->tiled) {
       wb[wb_idx].pixel_layout = 0x2;
       wb[wb_idx].pitch = fb->tiled_w;
    } else {
       wb[wb_idx].pixel_layout = 0x0;
-      wb[wb_idx].pitch = res->levels[0].stride / 8;
+      wb[wb_idx].pitch = res->levels[level].stride / 8;
    }
    wb[wb_idx].mrt_bits = swap_channels ? 0x4 : 0x0;
 }
diff --git a/src/gallium/drivers/lima/lima_texture.c b/src/gallium/drivers/lima/lima_texture.c
index 90af0dec74b..ead5e79a02e 100644
--- a/src/gallium/drivers/lima/lima_texture.c
+++ b/src/gallium/drivers/lima/lima_texture.c
@@ -119,19 +119,17 @@ lima_texture_desc_set_res(struct lima_context *ctx, uint32_t *desc,
 
    uint32_t base_va = lima_res->bo->va;
 
-   /* attach level 0 */
-   desc[6] |= (base_va << 24) | (layout << 13);
-   desc[7] |= base_va >> 8;
+   /* attach first level */
+   uint32_t first_va = base_va + lima_res->levels[first_level].offset;
+   desc[6] |= (first_va << 24) | (layout << 13);
+   desc[7] |= first_va >> 8;
 
    /* Attach remaining levels.
     * Each subsequent mipmap address is specified using the 26 msbs.
     * These addresses are then packed continuously in memory */
    unsigned current_desc_index = 7;
    unsigned current_desc_bit_index = 24;
-   for (i = 1; i < LIMA_MAX_MIP_LEVELS; i++) {
-      if (first_level + i > last_level)
-         break;
-
+   for (i = first_level + 1; i <= last_level; i++) {
       uint32_t address = base_va + lima_res->levels[i].offset;
       address = (address >> 6);
       desc[current_desc_index] |= (address << current_desc_bit_index);
@@ -163,32 +161,21 @@ lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sample
    /* 2D texture */
    desc[1] |= 0x400;
 
-   desc[1] &= ~0xff000000;
+   first_level = texture->base.u.tex.first_level;
+   last_level = texture->base.u.tex.last_level;
+   if (last_level - first_level >= LIMA_MAX_MIP_LEVELS)
+      last_level = first_level + LIMA_MAX_MIP_LEVELS - 1;
+
    switch (sampler->base.min_mip_filter) {
-      case PIPE_TEX_MIPFILTER_NEAREST:
-         first_level = texture->base.u.tex.first_level;
-         last_level = texture->base.u.tex.last_level;
-         if (last_level - first_level >= LIMA_MAX_MIP_LEVELS)
-            last_level = first_level + LIMA_MAX_MIP_LEVELS - 1;
-         mipmapping = true;
-         desc[1] |= ((last_level - first_level) << 24);
-         desc[2] &= ~0x0600;
-         break;
       case PIPE_TEX_MIPFILTER_LINEAR:
-         first_level = texture->base.u.tex.first_level;
-         last_level = texture->base.u.tex.last_level;
-         if (last_level - first_level >= LIMA_MAX_MIP_LEVELS)
-            last_level = first_level + LIMA_MAX_MIP_LEVELS - 1;
+         desc[2] |= 0x0600;
+      case PIPE_TEX_MIPFILTER_NEAREST:
          mipmapping = true;
          desc[1] |= ((last_level - first_level) << 24);
-         desc[2] |= 0x0600;
          break;
       case PIPE_TEX_MIPFILTER_NONE:
       default:
-         first_level = 0;
-         last_level = 0;
          mipmapping = false;
-         desc[2] &= ~0x0600;
          break;
    }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 510346d2abf..125d2695707 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -271,6 +271,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_INT64:
    case PIPE_CAP_INT64_DIVMOD:
    case PIPE_CAP_QUERY_SO_OVERFLOW:
+   case PIPE_CAP_TGSI_DIV:
       return 1;
 
    case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index aca3b0afb1e..1f702a987d8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -51,12 +51,12 @@ NVC0LegalizeSSA::handleDIV(Instruction *i)
    // Generate movs to the input regs for the call we want to generate
    for (int s = 0; i->srcExists(s); ++s) {
       Instruction *ld = i->getSrc(s)->getInsn();
-      assert(ld->getSrc(0) != NULL);
       // check if we are moving an immediate, propagate it in that case
       if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV) ||
             !(ld->src(0).getFile() == FILE_IMMEDIATE))
          bld.mkMovToReg(s, i->getSrc(s));
       else {
+         assert(ld->getSrc(0) != NULL);
          bld.mkMovToReg(s, ld->getSrc(0));
          // Clear the src, to make code elimination possible here before we
          // delete the instruction i later
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 0b3220903b9..bfdb923379b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2080,14 +2080,15 @@ void
 AlgebraicOpt::handleCVT_CVT(Instruction *cvt)
 {
    Instruction *insn = cvt->getSrc(0)->getInsn();
-   RoundMode rnd = insn->rnd;
 
-   if (insn->saturate ||
+   if (!insn ||
+       insn->saturate ||
        insn->subOp ||
        insn->dType != insn->sType ||
        insn->dType != cvt->sType)
       return;
 
+   RoundMode rnd = insn->rnd;
    switch (insn->op) {
    case OP_CEIL:
       rnd = ROUND_PI;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index b5dc033bd2d..58889d1be8d 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -247,6 +247,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS:
    case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
    case PIPE_CAP_IMAGE_LOAD_FORMATTED:
+   case PIPE_CAP_TGSI_DIV:
       return 0;
 
    case PIPE_CAP_MAX_GS_INVOCATIONS:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 940fb9ce25c..a725aedcd8e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -346,6 +346,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
       break;
    default:
       assert(!"unsupported IR!");
+      free(info);
       return false;
    }
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 423b6af3b64..e28f90e4047 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -218,6 +218,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
    case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
    case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:
+   case PIPE_CAP_TGSI_DIV:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
       return 1; /* class_3d >= NVA0_3D_CLASS; */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 55167a27c09..5af5e38b82e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -599,19 +599,20 @@ nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
 
 static inline void
 nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
-                               unsigned nr, void **hwcso)
+                               unsigned nr, void **hwcsos)
 {
    unsigned highest_found = 0;
    unsigned i;
 
    assert(nr <= PIPE_MAX_SAMPLERS);
    for (i = 0; i < nr; ++i) {
+      struct nv50_tsc_entry *hwcso = hwcsos ? nv50_tsc_entry(hwcsos[i]) : NULL;
       struct nv50_tsc_entry *old = nv50->samplers[s][i];
 
-      if (hwcso[i])
+      if (hwcso)
          highest_found = i;
 
-      nv50->samplers[s][i] = nv50_tsc_entry(hwcso[i]);
+      nv50->samplers[s][i] = hwcso;
       if (old)
          nv50_screen_tsc_unlock(nv50->screen, old);
    }
@@ -685,12 +686,13 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
 
    assert(nr <= PIPE_MAX_SAMPLERS);
    for (i = 0; i < nr; ++i) {
+      struct pipe_sampler_view *view = views ? views[i] : NULL;
       struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]);
       if (old)
          nv50_screen_tic_unlock(nv50->screen, old);
 
-      if (views[i] && views[i]->texture) {
-         struct pipe_resource *res = views[i]->texture;
+      if (view && view->texture) {
+         struct pipe_resource *res = view->texture;
          if (res->target == PIPE_BUFFER &&
              (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
             nv50->textures_coherent[s] |= 1 << i;
@@ -700,7 +702,7 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
          nv50->textures_coherent[s] &= ~(1 << i);
       }
 
-      pipe_sampler_view_reference(&nv50->textures[s][i], views[i]);
+      pipe_sampler_view_reference(&nv50->textures[s][i], view);
    }
 
    assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
@@ -768,6 +770,7 @@ nv50_sp_state_create(struct pipe_context *pipe,
       break;
    default:
       assert(!"unsupported IR!");
+      free(prog);
       return NULL;
    }
 
@@ -864,6 +867,7 @@ nv50_cp_state_create(struct pipe_context *pipe,
       break;
    default:
       assert(!"unsupported IR!");
+      free(prog);
       return NULL;
    }
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index c81d8952c98..1ff9f19f139 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -594,6 +594,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
       break;
    default:
       assert(!"unsupported IR!");
+      free(info);
       return false;
    }
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 79224ac99a7..1c2d3ee3d7c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -276,6 +276,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
    case PIPE_CAP_QUERY_SO_OVERFLOW:
    case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:
+   case PIPE_CAP_TGSI_DIV:
       return 1;
    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
       return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 12e21862ee0..3c31e162b58 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -463,22 +463,23 @@ nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
 static inline void
 nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0,
                                unsigned s,
-                               unsigned nr, void **hwcso)
+                               unsigned nr, void **hwcsos)
 {
    unsigned highest_found = 0;
    unsigned i;
 
    for (i = 0; i < nr; ++i) {
+      struct nv50_tsc_entry *hwcso = hwcsos ? nv50_tsc_entry(hwcsos[i]) : NULL;
       struct nv50_tsc_entry *old = nvc0->samplers[s][i];
 
-      if (hwcso[i])
+      if (hwcso)
          highest_found = i;
 
-      if (hwcso[i] == old)
+      if (hwcso == old)
          continue;
       nvc0->samplers_dirty[s] |= 1 << i;
 
-      nvc0->samplers[s][i] = nv50_tsc_entry(hwcso[i]);
+      nvc0->samplers[s][i] = hwcso;
       if (old)
          nvc0_screen_tsc_unlock(nvc0->screen, old);
    }
@@ -523,14 +524,15 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
    unsigned i;
 
    for (i = 0; i < nr; ++i) {
+      struct pipe_sampler_view *view = views ? views[i] : NULL;
       struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
 
-      if (views[i] == nvc0->textures[s][i])
+      if (view == nvc0->textures[s][i])
          continue;
       nvc0->textures_dirty[s] |= 1 << i;
 
-      if (views[i] && views[i]->texture) {
-         struct pipe_resource *res = views[i]->texture;
+      if (view && view->texture) {
+         struct pipe_resource *res = view->texture;
          if (res->target == PIPE_BUFFER &&
              (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
             nvc0->textures_coherent[s] |= 1 << i;
@@ -548,7 +550,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
          nvc0_screen_tic_unlock(nvc0->screen, old);
       }
 
-      pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]);
+      pipe_sampler_view_reference(&nvc0->textures[s][i], view);
    }
 
    for (i = nr; i < nvc0->num_textures[s]; ++i) {
@@ -607,6 +609,7 @@ nvc0_sp_state_create(struct pipe_context *pipe,
       break;
    default:
       assert(!"unsupported IR!");
+      free(prog);
       return NULL;
    }
 
@@ -739,6 +742,7 @@ nvc0_cp_state_create(struct pipe_context *pipe,
       break;
    default:
       assert(!"unsupported IR!");
+      free(prog);
       return NULL;
    }
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index c5e4dec20bd..022aace73db 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -393,23 +393,24 @@ nve4_compute_validate_constbufs(struct nvc0_context *nvc0)
             uint64_t address
                = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);
 
-            assert(i > 0); /* we really only want uniform buffer objects */
-
-            BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
-            PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
-            PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
-            BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
-            PUSH_DATA (push, 4 * 4);
-            PUSH_DATA (push, 0x1);
-            BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4);
-            PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
-
-            PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
-            PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
-            PUSH_DATA (push, nvc0->constbuf[5][i].size);
-            PUSH_DATA (push, 0);
-            BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
+            /* constbufs above 0 will are fetched via ubo info in the shader */
+            if (i > 0) {
+               BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);
+               PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
+               PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1));
+               BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
+               PUSH_DATA (push, 4 * 4);
+               PUSH_DATA (push, 0x1);
+               BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4);
+               PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+
+               PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
+               PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
+               PUSH_DATA (push, nvc0->constbuf[s][i].size);
+               PUSH_DATA (push, 0);
+            }
 
+            BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD);
             res->cb_bindings[s] |= 1 << i;
          }
       }
@@ -554,9 +555,9 @@ nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
 static void
 nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc)
 {
-   // only user constant buffers 1-6 can be put in the descriptor, the rest are
+   // only user constant buffers 0-6 can be put in the descriptor, the rest are
    // loaded through global memory
-   for (int i = 1; i <= 6; i++) {
+   for (int i = 0; i <= 6; i++) {
       if (nvc0->constbuf[5][i].user || !nvc0->constbuf[5][i].u.buf)
          continue;
 
@@ -609,6 +610,10 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
    if (nvc0->constbuf[5][0].user || cp->parm_size) {
       nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
                                  NVC0_CB_USR_INFO(5), 1 << 16);
+
+      // Later logic will attempt to bind a real buffer at position 0. That
+      // should not happen if we've bound a user buffer.
+      assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
    }
    nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
                               NVC0_CB_AUX_INFO(5), 1 << 11);
@@ -649,6 +654,10 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0,
    if (nvc0->constbuf[5][0].user || cp->parm_size) {
       gp100_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
                                   NVC0_CB_USR_INFO(5), 1 << 16);
+
+      // Later logic will attempt to bind a real buffer at position 0. That
+      // should not happen if we've bound a user buffer.
+      assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
    }
    gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
                                NVC0_CB_AUX_INFO(5), 1 << 11);
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
index 29f3ce7ff71..92517c7d1c6 100644
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@@ -995,7 +995,7 @@ emit_load_const(compiler_context *ctx, nir_load_const_instr *instr)
 {
         nir_ssa_def def = instr->def;
 
-        float *v = ralloc_array(NULL, float, 4);
+        float *v = rzalloc_array(NULL, float, 4);
         nir_const_load_to_arr(v, instr, f32);
         _mesa_hash_table_u64_insert(ctx->ssa_constants, def.index + 1, v);
 }
diff --git a/src/gallium/drivers/panfrost/pan_swizzle.c b/src/gallium/drivers/panfrost/pan_swizzle.c
index 52a907ddd55..60f6953443f 100644
--- a/src/gallium/drivers/panfrost/pan_swizzle.c
+++ b/src/gallium/drivers/panfrost/pan_swizzle.c
@@ -164,10 +164,10 @@ panfrost_texture_swizzle(unsigned off_x,
 
         /* Use fast path if available */
         if (!(off_x || off_y) && (width == dest_width)) {
-                if (bytes_per_pixel == 4 /* && (ALIGN(width, 16) == width) */) {
+                if (bytes_per_pixel == 4 && (ALIGN(width, 16) == width)) {
                         swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest);
                         return;
-                } else if (bytes_per_pixel == 1 /* && (ALIGN(width, 16) == width) */) {
+                } else if (bytes_per_pixel == 1 && (ALIGN(width, 16) == width)) {
                         swizzle_bpp1_align16(width, height, source_stride, (block_pitch * 256 >> 4), pixels, (uint8_t *) ldest);
                         return;
                 }
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 014055b221e..0ccc753147b 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -62,7 +62,8 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe,
     q->buf = r300->rws->buffer_create(r300->rws,
                                       r300screen->info.gart_page_size,
                                       r300screen->info.gart_page_size,
-                                      RADEON_DOMAIN_GTT, 0);
+                                      RADEON_DOMAIN_GTT,
+                                      RADEON_FLAG_NO_INTERPROCESS_SHARING);
     if (!q->buf) {
         FREE(q);
         return NULL;
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 211d35d0607..de98d09cb36 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -915,7 +915,8 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
         r300->vbo = rws->buffer_create(rws,
                                        MAX2(R300_MAX_DRAW_VBO_SIZE, size),
                                        R300_BUFFER_ALIGNMENT,
-                                       RADEON_DOMAIN_GTT, 0);
+                                       RADEON_DOMAIN_GTT,
+                                       RADEON_FLAG_NO_INTERPROCESS_SHARING);
         if (!r300->vbo) {
             return FALSE;
         }
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index 4af1c46856e..c946cfc8d03 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -103,7 +103,8 @@ r300_buffer_transfer_map( struct pipe_context *context,
             /* Create a new one in the same pipe_resource. */
             new_buf = r300->rws->buffer_create(r300->rws, rbuf->b.b.width0,
                                                R300_BUFFER_ALIGNMENT,
-                                               rbuf->domain, 0);
+                                               rbuf->domain,
+                                               RADEON_FLAG_NO_INTERPROCESS_SHARING);
             if (new_buf) {
                 /* Discard the old buffer. */
                 pb_reference(&rbuf->buf, NULL);
@@ -183,7 +184,8 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
     rbuf->buf =
         r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.width0,
                                        R300_BUFFER_ALIGNMENT,
-                                       rbuf->domain, 0);
+                                       rbuf->domain,
+                                       RADEON_FLAG_NO_INTERPROCESS_SHARING);
     if (!rbuf->buf) {
         FREE(rbuf);
         return NULL;
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 46d88b34638..21ade4022c5 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -1113,8 +1113,16 @@ r300_texture_create_object(struct r300_screen *rscreen,
 
     /* Create the backing buffer if needed. */
     if (!tex->buf) {
+        /* Only use the first domain for allocation. Multiple domains are not allowed. */
+        unsigned alloc_domain =
+            tex->domain & RADEON_DOMAIN_VRAM ? RADEON_DOMAIN_VRAM :
+                                               RADEON_DOMAIN_GTT;
+
         tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048,
-                                      tex->domain, RADEON_FLAG_NO_SUBALLOC);
+                                      alloc_domain,
+                                      RADEON_FLAG_NO_SUBALLOC |
+                                      /* Use the reusable pool: */
+                                      RADEON_FLAG_NO_INTERPROCESS_SHARING);
 
         if (!tex->buf) {
             goto fail;
diff --git a/src/gallium/drivers/r600/radeon_uvd.c b/src/gallium/drivers/r600/radeon_uvd.c
index 5568f2138e4..0f5bcc53212 100644
--- a/src/gallium/drivers/r600/radeon_uvd.c
+++ b/src/gallium/drivers/r600/radeon_uvd.c
@@ -247,7 +247,7 @@ static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec)
 
 static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_h265_picture_desc *pic)
 {
-	unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
+	unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
 	unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
 	unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
 
@@ -262,8 +262,8 @@ static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_
 	else
 		max_references = MAX2(max_references, 17);
 
-	block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3));
-	log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+	log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 +
+		pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
 
 	width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
 	height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index ca066e89823..95e8007cae2 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -298,7 +298,7 @@ static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec)
 
 static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_h265_picture_desc *pic)
 {
-	unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
+	unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
 	unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
 	unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
 
@@ -313,8 +313,8 @@ static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_
 	else
 		max_references = MAX2(max_references, 17);
 
-	block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3));
-	log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+	log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 +
+		pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
 
 	width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
 	height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c
index 3164dbb2c20..639e5043543 100644
--- a/src/gallium/drivers/radeon/radeon_uvd_enc.c
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
@@ -73,7 +73,8 @@ radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc,
    enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
    enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
    enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
-   enc->enc_pic.max_poc = pic->seq.intra_period;
+   enc->enc_pic.max_poc =
+      MAX2(16, util_next_power_of_two(pic->seq.intra_period));
    enc->enc_pic.log2_max_poc = 0;
    for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
       i = (i >> 1);
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
index 1f41b09472f..7e5be33ec54 100644
--- a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
@@ -573,7 +573,13 @@ radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc)
                                   enc->enc_pic.hevc_spec_misc.
                                   constrained_intra_pred_flag, 1);
    radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
-   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+   if (enc->enc_pic.rc_session_init.rate_control_method ==
+      RENC_UVD_RATE_CONTROL_METHOD_NONE)
+      radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
+   else {
+      radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
+      radeon_uvd_enc_code_ue(enc, 0x0);
+   }
    radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
    radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
    radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
@@ -768,8 +774,7 @@ radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc)
    if ((enc->enc_pic.nal_unit_type != 19)
        && (enc->enc_pic.nal_unit_type != 20)) {
       radeon_uvd_enc_code_fixed_bits(enc,
-                                     enc->enc_pic.frame_num %
-                                     enc->enc_pic.max_poc,
+                                     enc->enc_pic.pic_order_cnt,
                                      enc->enc_pic.log2_max_poc);
       if (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P)
          radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c b/src/gallium/drivers/radeon/radeon_vcn_dec.c
index 688cef90103..b1d6edba466 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_dec.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c
@@ -552,7 +552,7 @@ static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec)
 
 static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, struct pipe_h265_picture_desc *pic)
 {
-	unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
+	unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
 	unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
 	unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
 
@@ -568,8 +568,8 @@ static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, struct pip
 	else
 		max_references = MAX2(max_references, 17);
 
-	block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3));
-	log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+	log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 +
+		pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
 
 	width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
 	height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index 7d64a28a405..c4fbf6eb63f 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -72,7 +72,8 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic
       enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag;
       enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc;
       enc->enc_pic.general_level_idc = pic->seq.general_level_idc;
-      enc->enc_pic.max_poc = pic->seq.intra_period;
+      enc->enc_pic.max_poc =
+         MAX2(16, util_next_power_of_two(pic->seq.intra_period));
       enc->enc_pic.log2_max_poc = 0;
       for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++)
          i = (i >> 1);
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
index 7f5b1909344..fdf0e3ac06c 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
@@ -687,7 +687,13 @@ static void radeon_enc_nalu_pps_hevc(struct radeon_encoder *enc)
 	radeon_enc_code_se(enc, 0x0);
 	radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag, 1);
 	radeon_enc_code_fixed_bits(enc, 0x0, 1);
-	radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	if (enc->enc_pic.rc_session_init.rate_control_method ==
+		RENCODE_RATE_CONTROL_METHOD_NONE)
+		radeon_enc_code_fixed_bits(enc, 0x0, 1);
+	else {
+		radeon_enc_code_fixed_bits(enc, 0x1, 1);
+		radeon_enc_code_ue(enc, 0x0);
+	}
 	radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
 	radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
 	radeon_enc_code_fixed_bits(enc, 0x0, 1);
@@ -988,7 +994,7 @@ static void radeon_enc_slice_header_hevc(struct radeon_encoder *enc)
 	}
 
 	if ((enc->enc_pic.nal_unit_type != 19) && (enc->enc_pic.nal_unit_type != 20)) {
-		radeon_enc_code_fixed_bits(enc, enc->enc_pic.frame_num % enc->enc_pic.max_poc, enc->enc_pic.log2_max_poc);
+		radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt, enc->enc_pic.log2_max_poc);
 		if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P)
 			radeon_enc_code_fixed_bits(enc, 0x1, 1);
 		else {
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index da9b25a442d..1854e1226c3 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -502,7 +502,15 @@ static void cik_sdma_copy(struct pipe_context *ctx,
 		return;
 	}
 
-	if ((sctx->chip_class == CIK || sctx->chip_class == VI) &&
+	/* SDMA causes corruption. See:
+	 *   https://bugs.freedesktop.org/show_bug.cgi?id=110575
+	 *   https://bugs.freedesktop.org/show_bug.cgi?id=110635
+	 *
+	 * Keep SDMA enabled on APUs.
+	 */
+	if ((sctx->screen->debug_flags & DBG(FORCE_DMA) ||
+	     !sctx->screen->info.has_dedicated_vram) &&
+	    (sctx->chip_class == CIK || sctx->chip_class == VI) &&
 	    cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz,
 				  src, src_level, src_box))
 		return;
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
index 4936eb5a5b1..76705937b65 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -287,11 +287,9 @@ si_invalidate_buffer(struct si_context *sctx,
 	/* Check if mapping this buffer would cause waiting for the GPU. */
 	if (si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) ||
 	    !sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) {
-		uint64_t old_va = buf->gpu_address;
-
 		/* Reallocate the buffer in the same pipe_resource. */
 		si_alloc_resource(sctx->screen, buf);
-		si_rebind_buffer(sctx, &buf->b.b, old_va);
+		si_rebind_buffer(sctx, &buf->b.b);
 	} else {
 		util_range_set_empty(&buf->valid_buffer_range);
 	}
@@ -307,7 +305,6 @@ void si_replace_buffer_storage(struct pipe_context *ctx,
 	struct si_context *sctx = (struct si_context*)ctx;
 	struct si_resource *sdst = si_resource(dst);
 	struct si_resource *ssrc = si_resource(src);
-	uint64_t old_gpu_address = sdst->gpu_address;
 
 	pb_reference(&sdst->buf, ssrc->buf);
 	sdst->gpu_address = ssrc->gpu_address;
@@ -322,7 +319,7 @@ void si_replace_buffer_storage(struct pipe_context *ctx,
 	assert(sdst->bo_alignment == ssrc->bo_alignment);
 	assert(sdst->domains == ssrc->domains);
 
-	si_rebind_buffer(sctx, dst, old_gpu_address);
+	si_rebind_buffer(sctx, dst);
 }
 
 static void si_invalidate_resource(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index f1a433b72df..756f5372fa2 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -500,9 +500,13 @@ static bool si_switch_compute_shader(struct si_context *sctx,
 	COMPUTE_DBG(sctx->screen, "COMPUTE_PGM_RSRC1: 0x%08x "
 		"COMPUTE_PGM_RSRC2: 0x%08x\n", config->rsrc1, config->rsrc2);
 
+	sctx->max_seen_compute_scratch_bytes_per_wave =
+		MAX2(sctx->max_seen_compute_scratch_bytes_per_wave,
+		     config->scratch_bytes_per_wave);
+
 	radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
 	          S_00B860_WAVES(sctx->scratch_waves)
-	             | S_00B860_WAVESIZE(config->scratch_bytes_per_wave >> 10));
+	             | S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10));
 
 	sctx->cs_shader_state.emitted_program = program;
 	sctx->cs_shader_state.offset = offset;
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index f795c33cf26..ca25d424fb5 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -999,6 +999,7 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers,
 	buffers->priority = priority;
 	buffers->priority_constbuf = priority_constbuf;
 	buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
+	buffers->offsets = CALLOC(num_buffers, sizeof(buffers->offsets[0]));
 
 	si_init_descriptors(descs, shader_userdata_rel_index, 4, num_buffers);
 }
@@ -1013,6 +1014,7 @@ static void si_release_buffer_resources(struct si_buffer_resources *buffers,
 	}
 
 	FREE(buffers->buffers);
+	FREE(buffers->offsets);
 }
 
 static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
@@ -1219,11 +1221,10 @@ static void si_set_constant_buffer(struct si_context *sctx,
 	if (input && (input->buffer || input->user_buffer)) {
 		struct pipe_resource *buffer = NULL;
 		uint64_t va;
+		unsigned buffer_offset;
 
 		/* Upload the user buffer if needed. */
 		if (input->user_buffer) {
-			unsigned buffer_offset;
-
 			si_upload_const_buffer(sctx,
 					       (struct si_resource**)&buffer, input->user_buffer,
 					       input->buffer_size, &buffer_offset);
@@ -1232,12 +1233,13 @@ static void si_set_constant_buffer(struct si_context *sctx,
 				si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL);
 				return;
 			}
-			va = si_resource(buffer)->gpu_address + buffer_offset;
 		} else {
 			pipe_resource_reference(&buffer, input->buffer);
-			va = si_resource(buffer)->gpu_address + input->buffer_offset;
+			buffer_offset = input->buffer_offset;
 		}
 
+		va = si_resource(buffer)->gpu_address + buffer_offset;
+
 		/* Set the descriptor. */
 		uint32_t *desc = descs->list + slot*4;
 		desc[0] = va;
@@ -1252,6 +1254,7 @@ static void si_set_constant_buffer(struct si_context *sctx,
 			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
 		buffers->buffers[slot] = buffer;
+		buffers->offsets[slot] = buffer_offset;
 		radeon_add_to_gfx_buffer_list_check_mem(sctx,
 							si_resource(buffer),
 							RADEON_USAGE_READ,
@@ -1336,6 +1339,7 @@ static void si_set_shader_buffer(struct si_context *sctx,
 		  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
 	pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
+	buffers->offsets[slot] = sbuffer->buffer_offset;
 	radeon_add_to_gfx_buffer_list_check_mem(sctx, buf,
 						writable ? RADEON_USAGE_READWRITE :
 							   RADEON_USAGE_READ,
@@ -1505,20 +1509,6 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot,
 	sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
 }
 
-static void si_desc_reset_buffer_offset(uint32_t *desc, uint64_t old_buf_va,
-					struct pipe_resource *new_buf)
-{
-	/* Retrieve the buffer offset from the descriptor. */
-	uint64_t old_desc_va = si_desc_extract_buffer_address(desc);
-
-	assert(old_buf_va <= old_desc_va);
-	uint64_t offset_within_buffer = old_desc_va - old_buf_va;
-
-	/* Update the descriptor. */
-	si_set_buf_desc_address(si_resource(new_buf), offset_within_buffer,
-				desc);
-}
-
 /* INTERNAL CONST BUFFERS */
 
 static void si_set_polygon_stipple(struct pipe_context *ctx,
@@ -1597,13 +1587,14 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx)
 
 /* BUFFER DISCARD/INVALIDATION */
 
-/** Reset descriptors of buffer resources after \p buf has been invalidated. */
+/* Reset descriptors of buffer resources after \p buf has been invalidated.
+ * If buf == NULL, reset all descriptors.
+ */
 static void si_reset_buffer_resources(struct si_context *sctx,
 				      struct si_buffer_resources *buffers,
 				      unsigned descriptors_idx,
 				      unsigned slot_mask,
 				      struct pipe_resource *buf,
-				      uint64_t old_va,
 				      enum radeon_bo_priority priority)
 {
 	struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
@@ -1611,13 +1602,15 @@ static void si_reset_buffer_resources(struct si_context *sctx,
 
 	while (mask) {
 		unsigned i = u_bit_scan(&mask);
-		if (buffers->buffers[i] == buf) {
-			si_desc_reset_buffer_offset(descs->list + i*4,
-						    old_va, buf);
+		struct pipe_resource *buffer = buffers->buffers[i];
+
+		if (buffer && (!buf || buffer == buf)) {
+			si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i],
+						descs->list + i*4);
 			sctx->descriptors_dirty |= 1u << descriptors_idx;
 
 			radeon_add_to_gfx_buffer_list_check_mem(sctx,
-								si_resource(buf),
+								si_resource(buffer),
 								buffers->writable_mask & (1u << i) ?
 									RADEON_USAGE_READWRITE :
 									RADEON_USAGE_READ,
@@ -1626,11 +1619,13 @@ static void si_reset_buffer_resources(struct si_context *sctx,
 	}
 }
 
-/* Update all resource bindings where the buffer is bound, including
+/* Update all buffer bindings where the buffer is bound, including
  * all resource descriptors. This is invalidate_buffer without
- * the invalidation. */
-void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
-		      uint64_t old_va)
+ * the invalidation.
+ *
+ * If buf == NULL, update all buffer bindings.
+ */
+void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
 {
 	struct si_resource *buffer = si_resource(buf);
 	unsigned i, shader;
@@ -1644,7 +1639,10 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 	 */
 
 	/* Vertex buffers. */
-	if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
+	if (!buffer) {
+		if (num_elems)
+			sctx->vertex_buffers_dirty = true;
+	} else if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
 		for (i = 0; i < num_elems; i++) {
 			int vb = sctx->vertex_elements->vertex_buffer_index[i];
 
@@ -1661,21 +1659,23 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 	}
 
 	/* Streamout buffers. (other internal buffers can't be invalidated) */
-	if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
+	if (!buffer || buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
 		for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
 			struct si_buffer_resources *buffers = &sctx->rw_buffers;
 			struct si_descriptors *descs =
 				&sctx->descriptors[SI_DESCS_RW_BUFFERS];
+			struct pipe_resource *buffer = buffers->buffers[i];
 
-			if (buffers->buffers[i] != buf)
+			if (!buffer || (buf && buffer != buf))
 				continue;
 
-			si_desc_reset_buffer_offset(descs->list + i*4,
-						    old_va, buf);
+			si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i],
+						descs->list + i*4);
 			sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
 
 			radeon_add_to_gfx_buffer_list_check_mem(sctx,
-								buffer, RADEON_USAGE_WRITE,
+								si_resource(buffer),
+								RADEON_USAGE_WRITE,
 								RADEON_PRIO_SHADER_RW_BUFFER,
 								true);
 
@@ -1689,25 +1689,25 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 	}
 
 	/* Constant and shader buffers. */
-	if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
+	if (!buffer || buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
 			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
 						  si_const_and_shader_buffer_descriptors_idx(shader),
 						  u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
-						  buf, old_va,
+						  buf,
 						  sctx->const_and_shader_buffers[shader].priority_constbuf);
 	}
 
-	if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
+	if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
 			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
 						  si_const_and_shader_buffer_descriptors_idx(shader),
 						  u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
-						  buf, old_va,
+						  buf,
 						  sctx->const_and_shader_buffers[shader].priority);
 	}
 
-	if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
+	if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
 		/* Texture buffers - update bindings. */
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 			struct si_samplers *samplers = &sctx->samplers[shader];
@@ -1717,26 +1717,29 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 
 			while (mask) {
 				unsigned i = u_bit_scan(&mask);
-				if (samplers->views[i]->texture == buf) {
+				struct pipe_resource *buffer = samplers->views[i]->texture;
+
+				if (buffer && buffer->target == PIPE_BUFFER &&
+				    (!buf || buffer == buf)) {
 					unsigned desc_slot = si_get_sampler_slot(i);
 
-					si_desc_reset_buffer_offset(descs->list +
-								    desc_slot * 16 + 4,
-								    old_va, buf);
+					si_set_buf_desc_address(si_resource(buffer),
+								samplers->views[i]->u.buf.offset,
+								descs->list + desc_slot * 16 + 4);
 					sctx->descriptors_dirty |=
 						1u << si_sampler_and_image_descriptors_idx(shader);
 
-					radeon_add_to_gfx_buffer_list_check_mem(sctx,
-									    buffer, RADEON_USAGE_READ,
-									    RADEON_PRIO_SAMPLER_BUFFER,
-									    true);
+					radeon_add_to_gfx_buffer_list_check_mem(
+						sctx, si_resource(buffer),
+						RADEON_USAGE_READ,
+						RADEON_PRIO_SAMPLER_BUFFER, true);
 				}
 			}
 		}
 	}
 
 	/* Shader images */
-	if (buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
+	if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
 		for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
 			struct si_images *images = &sctx->images[shader];
 			struct si_descriptors *descs =
@@ -1745,21 +1748,23 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 
 			while (mask) {
 				unsigned i = u_bit_scan(&mask);
+				struct pipe_resource *buffer = images->views[i].resource;
 
-				if (images->views[i].resource == buf) {
+				if (buffer && buffer->target == PIPE_BUFFER &&
+				    (!buf || buffer == buf)) {
 					unsigned desc_slot = si_get_image_slot(i);
 
 					if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
 						si_mark_image_range_valid(&images->views[i]);
 
-					si_desc_reset_buffer_offset(
-						descs->list + desc_slot * 8 + 4,
-						old_va, buf);
+					si_set_buf_desc_address(si_resource(buffer),
+								images->views[i].u.buf.offset,
+								descs->list + desc_slot * 8 + 4);
 					sctx->descriptors_dirty |=
 						1u << si_sampler_and_image_descriptors_idx(shader);
 
 					radeon_add_to_gfx_buffer_list_check_mem(
-						sctx, buffer,
+						sctx, si_resource(buffer),
 						RADEON_USAGE_READWRITE,
 						RADEON_PRIO_SAMPLER_BUFFER, true);
 				}
@@ -1768,16 +1773,18 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 	}
 
 	/* Bindless texture handles */
-	if (buffer->texture_handle_allocated) {
+	if (!buffer || buffer->texture_handle_allocated) {
 		struct si_descriptors *descs = &sctx->bindless_descriptors;
 
 		util_dynarray_foreach(&sctx->resident_tex_handles,
 				      struct si_texture_handle *, tex_handle) {
 			struct pipe_sampler_view *view = (*tex_handle)->view;
 			unsigned desc_slot = (*tex_handle)->desc_slot;
+			struct pipe_resource *buffer = view->texture;
 
-			if (view->texture == buf) {
-				si_set_buf_desc_address(buffer,
+			if (buffer && buffer->target == PIPE_BUFFER &&
+			    (!buf || buffer == buf)) {
+				si_set_buf_desc_address(si_resource(buffer),
 							view->u.buf.offset,
 							descs->list +
 							desc_slot * 16 + 4);
@@ -1786,7 +1793,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 				sctx->bindless_descriptors_dirty = true;
 
 				radeon_add_to_gfx_buffer_list_check_mem(
-					sctx, buffer,
+					sctx, si_resource(buffer),
 					RADEON_USAGE_READ,
 					RADEON_PRIO_SAMPLER_BUFFER, true);
 			}
@@ -1794,19 +1801,21 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 	}
 
 	/* Bindless image handles */
-	if (buffer->image_handle_allocated) {
+	if (!buffer || buffer->image_handle_allocated) {
 		struct si_descriptors *descs = &sctx->bindless_descriptors;
 
 		util_dynarray_foreach(&sctx->resident_img_handles,
 				      struct si_image_handle *, img_handle) {
 			struct pipe_image_view *view = &(*img_handle)->view;
 			unsigned desc_slot = (*img_handle)->desc_slot;
+			struct pipe_resource *buffer = view->resource;
 
-			if (view->resource == buf) {
+			if (buffer && buffer->target == PIPE_BUFFER &&
+			    (!buf || buffer == buf)) {
 				if (view->access & PIPE_IMAGE_ACCESS_WRITE)
 					si_mark_image_range_valid(view);
 
-				si_set_buf_desc_address(buffer,
+				si_set_buf_desc_address(si_resource(buffer),
 							view->u.buf.offset,
 							descs->list +
 							desc_slot * 16 + 4);
@@ -1815,12 +1824,25 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
 				sctx->bindless_descriptors_dirty = true;
 
 				radeon_add_to_gfx_buffer_list_check_mem(
-					sctx, buffer,
+					sctx, si_resource(buffer),
 					RADEON_USAGE_READWRITE,
 					RADEON_PRIO_SAMPLER_BUFFER, true);
 			}
 		}
 	}
+
+	if (buffer) {
+		/* Do the same for other contexts. They will invoke this function
+		 * with buffer == NULL.
+		 */
+		unsigned new_counter = p_atomic_inc_return(&sctx->screen->dirty_buf_counter);
+
+		/* Skip the update for the current context, because we have already updated
+		 * the buffer bindings.
+		 */
+		if (new_counter == sctx->last_dirty_buf_counter + 1)
+			sctx->last_dirty_buf_counter = new_counter;
+	}
 }
 
 static void si_upload_bindless_descriptor(struct si_context *sctx,
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 4e23d283ab7..e526f3009bf 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -155,7 +155,8 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_TGSI_FS_FBFETCH:
 	case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
 	case PIPE_CAP_IMAGE_LOAD_FORMATTED:
-	case PIPE_CAP_PREFER_COMPUTE_BLIT_FOR_MULTIMEDIA:
+	case PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA:
+        case PIPE_CAP_TGSI_DIV:
 		return 1;
 
 	case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index b0e0ca7af05..2f484f7052a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -464,9 +464,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 						 0, PIPE_USAGE_DEFAULT,
 						 SI_RESOURCE_FLAG_32BIT |
 						 (use_sdma_upload ?
-							  SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA :
-							  (sscreen->cpdma_prefetch_writes_memory ?
-								   0 : SI_RESOURCE_FLAG_READ_ONLY)));
+							  SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA : 0));
 	if (!sctx->b.const_uploader)
 		goto fail;
 
@@ -514,9 +512,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	si_init_fence_functions(sctx);
 	si_init_state_compute_functions(sctx);
 
-	if (sscreen->debug_flags & DBG(FORCE_DMA))
-		sctx->b.resource_copy_region = sctx->dma_copy;
-
 	/* Initialize graphics-only context functions. */
 	if (sctx->has_graphics) {
 		si_init_context_texture_functions(sctx);
@@ -541,6 +536,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	else
 		si_init_dma_functions(sctx);
 
+	if (sscreen->debug_flags & DBG(FORCE_DMA))
+		sctx->b.resource_copy_region = sctx->dma_copy;
+
 	sctx->sample_mask = 0xffff;
 
 	/* Initialize multimedia functions. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index d3ddb912245..35e548cdec5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -526,6 +526,7 @@ struct si_screen {
 	 * the counter before drawing and re-emit the states accordingly.
 	 */
 	unsigned			dirty_tex_counter;
+	unsigned			dirty_buf_counter;
 
 	/* Atomically increment this counter when an existing texture's
 	 * metadata is enabled or disabled in a way that requires changing
@@ -852,6 +853,7 @@ struct si_context {
 	unsigned			initial_gfx_cs_size;
 	unsigned			gpu_reset_counter;
 	unsigned			last_dirty_tex_counter;
+	unsigned			last_dirty_buf_counter;
 	unsigned			last_compressed_colortex_counter;
 	unsigned			last_num_draw_calls;
 	unsigned			flags; /* flush flags */
@@ -982,6 +984,8 @@ struct si_context {
 	struct si_resource	*scratch_buffer;
 	unsigned		scratch_waves;
 	unsigned		spi_tmpring_size;
+	unsigned		max_seen_scratch_bytes_per_wave;
+	unsigned		max_seen_compute_scratch_bytes_per_wave;
 
 	struct si_resource	*compute_scratch_buffer;
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 6df24f9648a..6d74d774b6d 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -409,6 +409,7 @@ struct si_descriptors {
 
 struct si_buffer_resources {
 	struct pipe_resource		**buffers; /* this has num_buffers elements */
+	unsigned			*offsets; /* this has num_buffers elements */
 
 	enum radeon_bo_priority		priority:6;
 	enum radeon_bo_priority		priority_constbuf:6;
@@ -487,8 +488,7 @@ struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap,
 						  unsigned entry_size,
 						  unsigned group_index);
 void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab);
-void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
-		      uint64_t old_va);
+void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf);
 /* si_state.c */
 void si_init_state_compute_functions(struct si_context *sctx);
 void si_init_state_functions(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 8e01e1b35e1..d9dfef0a381 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1254,7 +1254,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	struct pipe_resource *indexbuf = info->index.resource;
-	unsigned dirty_tex_counter;
+	unsigned dirty_tex_counter, dirty_buf_counter;
 	enum pipe_prim_type rast_prim;
 	unsigned index_size = info->index_size;
 	unsigned index_offset = info->indirect ? info->start * index_size : 0;
@@ -1292,6 +1292,13 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
 		si_update_all_texture_descriptors(sctx);
 	}
 
+	dirty_buf_counter = p_atomic_read(&sctx->screen->dirty_buf_counter);
+	if (unlikely(dirty_buf_counter != sctx->last_dirty_buf_counter)) {
+		sctx->last_dirty_buf_counter = dirty_buf_counter;
+		/* Rebind all buffers unconditionally. */
+		si_rebind_buffer(sctx, NULL);
+	}
+
 	si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
 
 	/* Set the rasterization primitive type.
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index db4c77da2ff..ef8943d9011 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -3104,11 +3104,6 @@ static int si_update_scratch_buffer(struct si_context *sctx,
 	return 1;
 }
 
-static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
-{
-	return sctx->scratch_buffer ? sctx->scratch_buffer->b.b.width0 : 0;
-}
-
 static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader)
 {
 	return shader ? shader->config.scratch_bytes_per_wave : 0;
@@ -3123,23 +3118,6 @@ static struct si_shader *si_get_tcs_current(struct si_context *sctx)
 				      sctx->fixed_func_tcs_shader.current;
 }
 
-static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
-{
-	unsigned bytes = 0;
-
-	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current));
-	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current));
-	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current));
-	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current));
-
-	if (sctx->tes_shader.cso) {
-		struct si_shader *tcs = si_get_tcs_current(sctx);
-
-		bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(tcs));
-	}
-	return bytes;
-}
-
 static bool si_update_scratch_relocs(struct si_context *sctx)
 {
 	struct si_shader *tcs = si_get_tcs_current(sctx);
@@ -3197,16 +3175,40 @@ static bool si_update_scratch_relocs(struct si_context *sctx)
 
 static bool si_update_spi_tmpring_size(struct si_context *sctx)
 {
-	unsigned current_scratch_buffer_size =
-		si_get_current_scratch_buffer_size(sctx);
-	unsigned scratch_bytes_per_wave =
-		si_get_max_scratch_bytes_per_wave(sctx);
-	unsigned scratch_needed_size = scratch_bytes_per_wave *
-		sctx->scratch_waves;
+	/* SPI_TMPRING_SIZE.WAVESIZE must be constant for each scratch buffer.
+	 * There are 2 cases to handle:
+	 *
+	 * - If the current needed size is less than the maximum seen size,
+	 *   use the maximum seen size, so that WAVESIZE remains the same.
+	 *
+	 * - If the current needed size is greater than the maximum seen size,
+	 *   the scratch buffer is reallocated, so we can increase WAVESIZE.
+	 *
+	 * Shaders that set SCRATCH_EN=0 don't allocate scratch space.
+	 * Otherwise, the number of waves that can use scratch is
+	 * SPI_TMPRING_SIZE.WAVES.
+	 */
+	unsigned bytes = 0;
+
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current));
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current));
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current));
+
+	if (sctx->tes_shader.cso) {
+		bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current));
+		bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(si_get_tcs_current(sctx)));
+	}
+
+	sctx->max_seen_scratch_bytes_per_wave =
+		MAX2(sctx->max_seen_scratch_bytes_per_wave, bytes);
+
+	unsigned scratch_needed_size =
+		sctx->max_seen_scratch_bytes_per_wave * sctx->scratch_waves;
 	unsigned spi_tmpring_size;
 
 	if (scratch_needed_size > 0) {
-		if (scratch_needed_size > current_scratch_buffer_size) {
+		if (!sctx->scratch_buffer ||
+		    scratch_needed_size > sctx->scratch_buffer->b.b.width0) {
 			/* Create a bigger scratch buffer */
 			si_resource_reference(&sctx->scratch_buffer, NULL);
 
@@ -3232,7 +3234,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
 		"scratch size should already be aligned correctly.");
 
 	spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
-			   S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+			   S_0286E8_WAVESIZE(sctx->max_seen_scratch_bytes_per_wave >> 10);
 	if (spi_tmpring_size != sctx->spi_tmpring_size) {
 		sctx->spi_tmpring_size = spi_tmpring_size;
 		si_mark_atom_dirty(sctx, &sctx->atoms.s.scratch_state);
diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
index 59d50376438..91230c5f0da 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -837,8 +837,7 @@ static boolean si_texture_get_handle(struct pipe_screen* screen,
 		if (sscreen->ws->buffer_is_suballocated(res->buf) ||
 		    tex->surface.tile_swizzle ||
 		    (tex->buffer.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
-		     sscreen->info.has_local_buffers &&
-		     whandle->type != WINSYS_HANDLE_TYPE_KMS)) {
+		     sscreen->info.has_local_buffers)) {
 			assert(!res->b.is_shared);
 			si_reallocate_texture_inplace(sctx, tex,
 							PIPE_BIND_SHARED, false);
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 5d44824e202..f79f20ee6ab 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -174,6 +174,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_DOUBLES:
    case PIPE_CAP_INT64:
    case PIPE_CAP_INT64_DIVMOD:
+   case PIPE_CAP_TGSI_DIV:
       return 1;
    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
       return 16;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index a4470e6cb07..45d4eda5377 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -659,15 +659,6 @@ compute_lambda_vert(const struct sp_sampler_view *sview,
 }
 
 
-static float
-compute_lambda_vert_explicite_gradients(UNUSED const struct sp_sampler_view *sview,
-                                        UNUSED const float derivs[3][2][TGSI_QUAD_SIZE],
-                                        UNUSED int quad)
-{
-   return 0.0f;
-}
-
-
 compute_lambda_from_grad_func
 softpipe_get_lambda_from_grad_func(const struct pipe_sampler_view *view,
                                    enum pipe_shader_type shader)
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 9e13ee8ce6d..b6c21a866fe 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -568,11 +568,11 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl)
          vbuffer_attrs[i].sid = 0;
       }
 
-      /* If we haven't yet emitted a drawing command or if any
-       * vertex buffer state is changing, issue that state now.
+      /* If any of the vertex buffer state has changed, issue
+       * the SetVertexBuffers command. Otherwise, we will just
+       * need to rebind the resources.
        */
-      if (((hwtnl->cmd.swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) == 0) ||
-          vbuf_count != svga->state.hw_draw.num_vbuffers ||
+      if (vbuf_count != svga->state.hw_draw.num_vbuffers ||
           !vertex_buffers_equal(vbuf_count,
                                 vbuffer_attrs,
                                 vbuffers,
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index f747ff78bcf..631778a7437 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -478,6 +478,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_PACKED_UNIFORMS:
    case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
       return 0;
+   case PIPE_CAP_TGSI_DIV:
+      return 1;
    case PIPE_CAP_MAX_GS_INVOCATIONS:
       return 32;
    case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
index bd48fb2aae7..153e2af7eae 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
@@ -565,68 +565,3 @@ using Vec4 = typename SIMD_T::Vec4;
 template <typename SIMD_T>
 using Mask = typename SIMD_T::Mask;
 
-template <typename SIMD_T>
-struct SIMDVecEqual
-{
-    INLINE bool operator()(Integer<SIMD_T> a, Integer<SIMD_T> b) const
-    {
-        Integer<SIMD_T> c = SIMD_T::xor_si(a, b);
-        return SIMD_T::testz_si(c, c);
-    }
-
-    INLINE bool operator()(Float<SIMD_T> a, Float<SIMD_T> b) const
-    {
-        return this->operator()(SIMD_T::castps_si(a), SIMD_T::castps_si(b));
-    }
-
-    INLINE bool operator()(Double<SIMD_T> a, Double<SIMD_T> b) const
-    {
-        return this->operator()(SIMD_T::castpd_si(a), SIMD_T::castpd_si(b));
-    }
-};
-
-template <typename SIMD_T>
-struct SIMDVecHash
-{
-    INLINE uint32_t operator()(Integer<SIMD_T> val) const
-    {
-#if defined(_WIN64) || !defined(_WIN32) // assume non-Windows is always 64-bit
-        static_assert(sizeof(void*) == 8, "This path only meant for 64-bit code");
-
-        uint64_t              crc32          = 0;
-        const uint64_t*       pData          = reinterpret_cast<const uint64_t*>(&val);
-        static const uint32_t loopIterations = sizeof(val) / sizeof(void*);
-        static_assert(loopIterations * sizeof(void*) == sizeof(val), "bad vector size");
-
-        for (uint32_t i = 0; i < loopIterations; ++i)
-        {
-            crc32 = _mm_crc32_u64(crc32, pData[i]);
-        }
-
-        return static_cast<uint32_t>(crc32);
-#else
-        static_assert(sizeof(void*) == 4, "This path only meant for 32-bit code");
-
-        uint32_t crc32 = 0;
-        const uint32_t* pData = reinterpret_cast<const uint32_t*>(&val);
-        static const uint32_t loopIterations = sizeof(val) / sizeof(void*);
-        static_assert(loopIterations * sizeof(void*) == sizeof(val), "bad vector size");
-
-        for (uint32_t i = 0; i < loopIterations; ++i)
-        {
-            crc32 = _mm_crc32_u32(crc32, pData[i]);
-        }
-
-        return crc32;
-#endif
-    };
-
-    INLINE uint32_t operator()(Float<SIMD_T> val) const
-    {
-        return operator()(SIMD_T::castps_si(val));
-    };
-    INLINE uint32_t operator()(Double<SIMD_T> val) const
-    {
-        return operator()(SIMD_T::castpd_si(val));
-    }
-};
diff --git a/src/gallium/drivers/virgl/virgl_resource.c b/src/gallium/drivers/virgl/virgl_resource.c
index ef81f213f40..6d4c9f5fd00 100644
--- a/src/gallium/drivers/virgl/virgl_resource.c
+++ b/src/gallium/drivers/virgl/virgl_resource.c
@@ -112,6 +112,7 @@ static struct pipe_resource *virgl_resource_from_handle(struct pipe_screen *scre
    res->u.b = *templ;
    res->u.b.screen = &vs->base;
    pipe_reference_init(&res->u.b.reference, 1);
+   virgl_resource_layout(&res->u.b, &res->metadata);
 
    res->hw_res = vs->vws->resource_create_from_handle(vs->vws, whandle);
    if (!res->hw_res) {
diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c
index 65106dbb616..a021a6490f6 100644
--- a/src/gallium/drivers/virgl/virgl_screen.c
+++ b/src/gallium/drivers/virgl/virgl_screen.c
@@ -356,7 +356,8 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_NATIVE_FENCE_FD:
       return vscreen->vws->supports_fences;
    case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:
-      return vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_SRGB_WRITE_CONTROL;
+      return (vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_SRGB_WRITE_CONTROL) ||
+            (vscreen->caps.caps.v2.host_feature_check_version < 1);
    case PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS:
       return vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_INDIRECT_INPUT_ADDR;
    default:
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 672a2ea7378..8b9a2cd07f9 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -882,7 +882,8 @@ enum pipe_cap
    PIPE_CAP_IMAGE_LOAD_FORMATTED,
    PIPE_CAP_MAX_FRAMES_IN_FLIGHT,
    PIPE_CAP_DMABUF,
-   PIPE_CAP_PREFER_COMPUTE_BLIT_FOR_MULTIMEDIA,
+   PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA,
+   PIPE_CAP_TGSI_DIV,
 };
 
 /**
diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c
index f7672bcae06..8e78da7a06a 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -72,6 +72,10 @@ struct dri2_format_mapping {
 };
 
 static const struct dri2_format_mapping dri2_format_table[] = {
+      { __DRI_IMAGE_FOURCC_ABGR16161616F, __DRI_IMAGE_FORMAT_ABGR16161616F,
+        __DRI_IMAGE_COMPONENTS_RGBA,      PIPE_FORMAT_R16G16B16A16_FLOAT },
+      { __DRI_IMAGE_FOURCC_XBGR16161616F, __DRI_IMAGE_FORMAT_XBGR16161616F,
+        __DRI_IMAGE_COMPONENTS_RGB,       PIPE_FORMAT_R16G16B16X16_FLOAT },
       { __DRI_IMAGE_FOURCC_ARGB2101010,   __DRI_IMAGE_FORMAT_ARGB2101010,
         __DRI_IMAGE_COMPONENTS_RGBA,      PIPE_FORMAT_B10G10R10A2_UNORM },
       { __DRI_IMAGE_FOURCC_XRGB2101010,   __DRI_IMAGE_FORMAT_XRGB2101010,
@@ -222,6 +226,12 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable,
        * may occur as the stvis->color_format.
        */
       switch(format) {
+      case PIPE_FORMAT_R16G16B16A16_FLOAT:
+         depth = 64;
+         break;
+      case PIPE_FORMAT_R16G16B16X16_FLOAT:
+         depth = 48;
+         break;
       case PIPE_FORMAT_B10G10R10A2_UNORM:
       case PIPE_FORMAT_R10G10B10A2_UNORM:
       case PIPE_FORMAT_BGRA8888_UNORM:
@@ -300,6 +310,12 @@ dri_image_drawable_get_buffers(struct dri_drawable *drawable,
       }
 
       switch (pf) {
+      case PIPE_FORMAT_R16G16B16A16_FLOAT:
+         image_format = __DRI_IMAGE_FORMAT_ABGR16161616F;
+         break;
+      case PIPE_FORMAT_R16G16B16X16_FLOAT:
+         image_format = __DRI_IMAGE_FORMAT_XBGR16161616F;
+         break;
       case PIPE_FORMAT_B5G5R5A1_UNORM:
          image_format = __DRI_IMAGE_FORMAT_ARGB1555;
          break;
@@ -373,6 +389,12 @@ dri2_allocate_buffer(__DRIscreen *sPriv,
    bind |= PIPE_BIND_SHARED;
 
    switch (format) {
+      case 64:
+         pf = PIPE_FORMAT_R16G16B16A16_FLOAT;
+         break;
+      case 48:
+         pf = PIPE_FORMAT_R16G16B16X16_FLOAT;
+         break;
       case 32:
          pf = PIPE_FORMAT_BGRA8888_UNORM;
          break;
diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c
index c1de3bed9dd..df375b67f3f 100644
--- a/src/gallium/state_trackers/dri/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/dri_drawable.c
@@ -260,6 +260,9 @@ dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target,
       if (format == __DRI_TEXTURE_FORMAT_RGB)  {
          /* only need to cover the formats recognized by dri_fill_st_visual */
          switch (internal_format) {
+         case PIPE_FORMAT_R16G16B16A16_FLOAT:
+            internal_format = PIPE_FORMAT_R16G16B16X16_FLOAT;
+            break;
          case PIPE_FORMAT_B10G10R10A2_UNORM:
             internal_format = PIPE_FORMAT_B10G10R10X2_UNORM;
             break;
diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c
index 82a0988a634..b7a6734e98c 100644
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -121,6 +121,8 @@ dri_fill_in_modes(struct dri_screen *screen)
       MESA_FORMAT_B8G8R8A8_SRGB,
       MESA_FORMAT_B8G8R8X8_SRGB,
       MESA_FORMAT_B5G6R5_UNORM,
+      MESA_FORMAT_RGBA_FLOAT16,
+      MESA_FORMAT_RGBX_FLOAT16,
 
       /* The 32-bit RGBA format must not precede the 32-bit BGRA format.
        * Likewise for RGBX and BGRX.  Otherwise, the GLX client and the GLX
@@ -153,6 +155,8 @@ dri_fill_in_modes(struct dri_screen *screen)
       PIPE_FORMAT_BGRA8888_SRGB,
       PIPE_FORMAT_BGRX8888_SRGB,
       PIPE_FORMAT_B5G6R5_UNORM,
+      PIPE_FORMAT_R16G16B16A16_FLOAT,
+      PIPE_FORMAT_R16G16B16X16_FLOAT,
       PIPE_FORMAT_RGBA8888_UNORM,
       PIPE_FORMAT_RGBX8888_UNORM,
    };
@@ -166,7 +170,9 @@ dri_fill_in_modes(struct dri_screen *screen)
    struct pipe_screen *p_screen = screen->base.screen;
    boolean pf_z16, pf_x8z24, pf_z24x8, pf_s8z24, pf_z24s8, pf_z32;
    boolean mixed_color_depth;
+   boolean allow_rgba_ordering;
    boolean allow_rgb10;
+   boolean allow_fp16;
 
    static const GLenum back_buffer_modes[] = {
       __DRI_ATTRIB_SWAP_NONE, __DRI_ATTRIB_SWAP_UNDEFINED,
@@ -183,7 +189,10 @@ dri_fill_in_modes(struct dri_screen *screen)
       depth_buffer_factor = 1;
    }
 
+   allow_rgba_ordering = dri_loader_get_cap(screen, DRI_LOADER_CAP_RGBA_ORDERING);
    allow_rgb10 = driQueryOptionb(&screen->dev->option_cache, "allow_rgb10_configs");
+   allow_fp16 = driQueryOptionb(&screen->dev->option_cache, "allow_fp16_configs");
+   allow_fp16 &= dri_loader_get_cap(screen, DRI_LOADER_CAP_FP16);
 
    msaa_samples_max = (screen->st_api->feature_mask & ST_API_FEATURE_MS_VISUALS_MASK)
       ? MSAA_VISUAL_MAX_SAMPLES : 1;
@@ -231,19 +240,18 @@ dri_fill_in_modes(struct dri_screen *screen)
 
    assert(ARRAY_SIZE(mesa_formats) == ARRAY_SIZE(pipe_formats));
 
-   /* Expose only BGRA ordering if the loader doesn't support RGBA ordering. */
-   unsigned num_formats;
-   if (dri_loader_get_cap(screen, DRI_LOADER_CAP_RGBA_ORDERING))
-      num_formats = ARRAY_SIZE(mesa_formats);
-   else
-      num_formats = ARRAY_SIZE(mesa_formats) - 2; /* all - RGBA_ORDERING formats */
-
    /* Add configs. */
-   for (format = 0; format < num_formats; format++) {
+   for (format = 0; format < ARRAY_SIZE(mesa_formats); format++) {
       __DRIconfig **new_configs = NULL;
       unsigned num_msaa_modes = 0; /* includes a single-sample mode */
       uint8_t msaa_modes[MSAA_VISUAL_MAX_SAMPLES];
 
+      /* Expose only BGRA ordering if the loader doesn't support RGBA ordering. */
+      if (!allow_rgba_ordering &&
+          (mesa_formats[format] == MESA_FORMAT_R8G8B8A8_UNORM ||
+           mesa_formats[format] == MESA_FORMAT_R8G8B8X8_UNORM))
+         continue;
+
       if (!allow_rgb10 &&
           (mesa_formats[format] == MESA_FORMAT_B10G10R10A2_UNORM ||
            mesa_formats[format] == MESA_FORMAT_B10G10R10X2_UNORM ||
@@ -251,6 +259,11 @@ dri_fill_in_modes(struct dri_screen *screen)
            mesa_formats[format] == MESA_FORMAT_R10G10B10X2_UNORM))
          continue;
 
+      if (!allow_fp16 &&
+          (mesa_formats[format] == MESA_FORMAT_RGBA_FLOAT16 ||
+           mesa_formats[format] == MESA_FORMAT_RGBX_FLOAT16))
+         continue;
+
       if (!p_screen->is_format_supported(p_screen, pipe_formats[format],
                                          PIPE_TEXTURE_2D, 0, 0,
                                          PIPE_BIND_RENDER_TARGET |
@@ -315,6 +328,17 @@ dri_fill_st_visual(struct st_visual *stvis,
 
    /* Deduce the color format. */
    switch (mode->redMask) {
+   case 0:
+      /* Formats > 32 bpp */
+      assert(mode->floatMode);
+      if (mode->alphaShift > -1) {
+         assert(mode->alphaShift == 48);
+         stvis->color_format = PIPE_FORMAT_R16G16B16A16_FLOAT;
+      } else {
+         stvis->color_format = PIPE_FORMAT_R16G16B16X16_FLOAT;
+      }
+      break;
+
    case 0x3FF00000:
       if (mode->alphaMask) {
          assert(mode->alphaMask == 0xC0000000);
diff --git a/src/gallium/state_trackers/va/postproc.c b/src/gallium/state_trackers/va/postproc.c
index fbc55b7714b..3431b1b48c7 100644
--- a/src/gallium/state_trackers/va/postproc.c
+++ b/src/gallium/state_trackers/va/postproc.c
@@ -222,7 +222,7 @@ static VAStatus vlVaPostProcBlit(vlVaDriver *drv, vlVaContext *context,
       blit.filter = PIPE_TEX_MIPFILTER_LINEAR;
 
       if (drv->pipe->screen->get_param(drv->pipe->screen,
-                                       PIPE_CAP_PREFER_COMPUTE_BLIT_FOR_MULTIMEDIA))
+                                       PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA))
          util_compute_blit(drv->pipe, &blit, &context->blit_cs);
       else
          drv->pipe->blit(drv->pipe, &blit);
diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index b0b9bb12f2c..657c619ac42 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -243,8 +243,10 @@ drm_create_adapter( int fd,
         return D3DERR_DRIVERINTERNALERROR;
     }
 
-    ctx->base.throttling_value =
-       ctx->base.hal->get_param(ctx->base.hal, PIPE_CAP_MAX_FRAMES_IN_FLIGHT);
+    /* Previously was set to PIPE_CAP_MAX_FRAMES_IN_FLIGHT,
+     * but the change of value of this cap to 1 seems to cause
+     * regressions. */
+    ctx->base.throttling_value = 2;
     ctx->base.throttling = ctx->base.throttling_value > 0;
 
     driParseOptionInfo(&defaultInitOptions, __driConfigOptionsNine);
diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk
index 6134251b5ca..10b27a80ef3 100644
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -53,6 +53,10 @@ LOCAL_SHARED_LIBRARIES += \
 	libexpat
 endif
 
+LOCAL_STATIC_LIBRARIES += \
+	libfreedreno_drm \
+	libfreedreno_ir3
+
 ifeq ($(USE_LIBBACKTRACE),true)
 	LOCAL_SHARED_LIBRARIES += libbacktrace
 endif
diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build
index dd40969a166..45daf647960 100644
--- a/src/gallium/targets/dri/meson.build
+++ b/src/gallium/targets/dri/meson.build
@@ -78,8 +78,8 @@ foreach d : [[with_gallium_kmsro, [
                'pl111_dri.so',
                'repaper_dri.so',
                'rockchip_dri.so',
-               'st7586.so',
-               'st7735r.so',
+               'st7586_dri.so',
+               'st7735r_dri.so',
 	       'sun4i-drm_dri.so',
              ]],
              [with_gallium_radeonsi, 'radeonsi_dri.so'],
diff --git a/src/gallium/targets/osmesa/meson.build b/src/gallium/targets/osmesa/meson.build
index b4ae8f4b6ec..e873e311aa0 100644
--- a/src/gallium/targets/osmesa/meson.build
+++ b/src/gallium/targets/osmesa/meson.build
@@ -43,9 +43,9 @@ libosmesa = shared_library(
     inc_gallium_drivers,
   ],
   link_depends : osmesa_link_deps,
-  link_whole : [libosmesa_st],
+  link_whole : [libosmesa_st, libglapi_static],
   link_with : [
-    libmesa_gallium, libgallium, libglapi_static, libws_null, osmesa_link_with,
+    libmesa_gallium, libgallium, libws_null, osmesa_link_with,
   ],
   dependencies : [
     dep_selinux, dep_thread, dep_clock, dep_unwind,
diff --git a/src/gallium/tests/trivial/meson.build b/src/gallium/tests/trivial/meson.build
index bbb25519e12..1f912d5aa46 100644
--- a/src/gallium/tests/trivial/meson.build
+++ b/src/gallium/tests/trivial/meson.build
@@ -24,6 +24,7 @@ foreach t : ['compute', 'tri', 'quad-tex']
     '@0@.c'.format(t),
     include_directories : inc_common,
     link_with : [libmesa_util, libgallium, libpipe_loader_dynamic],
+    dependencies : dep_thread,
     install : false,
   )
 endforeach
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 4a2377f7e09..972030eaaa8 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -378,7 +378,8 @@ static bool amdgpu_cs_has_user_fence(struct amdgpu_cs_context *cs)
           cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE &&
           cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD_ENC &&
           cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC &&
-          cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC;
+          cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC &&
+          cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_JPEG;
 }
 
 static bool amdgpu_cs_has_chaining(struct amdgpu_cs *cs)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 490c246d6e0..2e0e79a3969 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -752,7 +752,9 @@ radeon_cs_create_fence(struct radeon_cmdbuf *rcs)
 
     /* Create a fence, which is a dummy BO. */
     fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1,
-                                       RADEON_DOMAIN_GTT, RADEON_FLAG_NO_SUBALLOC);
+                                       RADEON_DOMAIN_GTT,
+                                       RADEON_FLAG_NO_SUBALLOC
+                                       | RADEON_FLAG_NO_INTERPROCESS_SHARING);
     if (!fence)
        return NULL;
 
diff --git a/src/gallium/winsys/svga/drm/vmw_msg.c b/src/gallium/winsys/svga/drm/vmw_msg.c
index 8cce2241f36..3e8ed2a0fb5 100644
--- a/src/gallium/winsys/svga/drm/vmw_msg.c
+++ b/src/gallium/winsys/svga/drm/vmw_msg.c
@@ -177,17 +177,23 @@ typedef uint64_t VMW_REG;
 
 typedef uint32_t VMW_REG;
 
-/* In the 32-bit version of this macro, we use "m" because there is no
- * more register left for bp
+/* In the 32-bit version of this macro, we store bp in a memory location
+ * because we've ran out of registers.
+ * Now we can't reference that memory location while we've modified
+ * %esp or %ebp, so we first push it on the stack, just before we push
+ * %ebp, and then when we need it we read it from the stack where we
+ * just pushed it.
  */
 #define VMW_PORT_HB_OUT(cmd, in_cx, in_si, in_di, \
          port_num, magic, bp,                     \
          ax, bx, cx, dx, si, di)                  \
 ({                                                \
-   __asm__ volatile ("push %%ebp;"                    \
-      "mov %12, %%ebp;"                           \
+   __asm__ volatile ("push %12;"                  \
+      "push %%ebp;"                               \
+      "mov 0x04(%%esp), %%ebp;"                   \
       "rep outsb;"                                \
-      "pop %%ebp;" :                              \
+      "pop %%ebp;"                                \
+      "add $0x04, %%esp;" :                       \
       "=a"(ax),                                   \
       "=b"(bx),                                   \
       "=c"(cx),                                   \
@@ -209,10 +215,12 @@ typedef uint32_t VMW_REG;
          port_num, magic, bp,                     \
          ax, bx, cx, dx, si, di)                  \
 ({                                                \
-   __asm__ volatile ("push %%ebp;"                    \
-      "mov %12, %%ebp;"                           \
+   __asm__ volatile ("push %12;"                  \
+      "push %%ebp;"                               \
+      "mov 0x04(%%esp), %%ebp;"                   \
       "rep insb;"                                 \
-      "pop %%ebp" :                               \
+      "pop %%ebp;"                                \
+      "add $0x04, %%esp;" :                       \
       "=a"(ax),                                   \
       "=b"(bx),                                   \
       "=c"(cx),                                   \
@@ -418,6 +426,7 @@ vmw_svga_winsys_host_log(struct svga_winsys_screen *sws, const char *log)
    struct rpc_channel channel;
    char *msg;
    int msg_len;
+   int ret;
 
 #ifdef MSG_NOT_IMPLEMENTED
    return;
@@ -435,12 +444,14 @@ vmw_svga_winsys_host_log(struct svga_winsys_screen *sws, const char *log)
 
    util_sprintf(msg, "log %s", log);
 
-   if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM) ||
-       vmw_send_msg(&channel, msg) ||
-       vmw_close_channel(&channel)) {
-      debug_printf("Failed to send log\n");
+   if (!(ret = vmw_open_channel(&channel, RPCI_PROTOCOL_NUM))) {
+      ret = vmw_send_msg(&channel, msg);
+      vmw_close_channel(&channel);
    }
 
+   if (ret)
+      debug_printf("Failed to send log\n");
+
    FREE(msg);
 
    return;
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
index 3b14f1d3513..e27d51013fb 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
@@ -210,6 +210,10 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
                             SVGA3dMSQualityLevel qualityLevel,
                             struct vmw_region **p_region)
 {
+   union {
+      union drm_vmw_gb_surface_create_ext_arg ext_arg;
+      union drm_vmw_gb_surface_create_arg arg;
+   } s_arg;
    struct drm_vmw_gb_surface_create_rep *rep;
    struct vmw_region *region = NULL;
    int ret;
@@ -222,12 +226,11 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
          return SVGA3D_INVALID_ID;
    }
 
-   if (vws->ioctl.have_drm_2_15) {
-      union drm_vmw_gb_surface_create_ext_arg s_arg;
-      struct drm_vmw_gb_surface_create_ext_req *req = &s_arg.req;
-      rep = &s_arg.rep;
+   memset(&s_arg, 0, sizeof(s_arg));
 
-      memset(&s_arg, 0, sizeof(s_arg));
+   if (vws->ioctl.have_drm_2_15) {
+      struct drm_vmw_gb_surface_create_ext_req *req = &s_arg.ext_arg.req;
+      rep = &s_arg.ext_arg.rep;
 
       req->version = drm_vmw_gb_surface_v1;
       req->multisample_pattern = multisamplePattern;
@@ -264,17 +267,15 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
          buffer_handle : SVGA3D_INVALID_ID;
 
       ret = drmCommandWriteRead(vws->ioctl.drm_fd,
-                                DRM_VMW_GB_SURFACE_CREATE_EXT, &s_arg,
-                                sizeof(s_arg));
+                                DRM_VMW_GB_SURFACE_CREATE_EXT, &s_arg.ext_arg,
+                                sizeof(s_arg.ext_arg));
 
       if (ret)
          goto out_fail_create;
    } else {
-      union drm_vmw_gb_surface_create_arg s_arg;
-      struct drm_vmw_gb_surface_create_req *req = &s_arg.req;
-      rep = &s_arg.rep;
+      struct drm_vmw_gb_surface_create_req *req = &s_arg.arg.req;
+      rep = &s_arg.arg.rep;
 
-      memset(&s_arg, 0, sizeof(s_arg));
       req->svga3d_flags = (uint32_t) flags;
       req->format = (uint32_t) format;
 
@@ -305,7 +306,7 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws,
          buffer_handle : SVGA3D_INVALID_ID;
 
       ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GB_SURFACE_CREATE,
-			        &s_arg, sizeof(s_arg));
+			        &s_arg.arg, sizeof(s_arg.arg));
 
       if (ret)
          goto out_fail_create;
diff --git a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
index 6401f55bbd3..d9b417dc4da 100644
--- a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
+++ b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
@@ -183,7 +183,7 @@ kms_sw_displaytarget_create(struct sw_winsys *ws,
    kms_sw_dt->format = format;
 
    memset(&create_req, 0, sizeof(create_req));
-   create_req.bpp = 32;
+   create_req.bpp = util_format_get_blocksizebits(format);
    create_req.width = width;
    create_req.height = height;
    ret = drmIoctl(kms_sw->fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_req);
diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
index 58005df5595..42a58a6dcc1 100644
--- a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
@@ -125,7 +125,7 @@ static int virgl_vtest_send_init(struct virgl_vtest_winsys *vws)
    ret = os_get_process_name(cmdline, 63);
    if (ret == FALSE)
       strcpy(cmdline, nstr);
-#if defined(__GLIBC__) || defined(__CYGWIN__)
+#if defined(HAVE_PROGRAM_INVOCATION_NAME)
    if (!strcmp(cmdline, "shader_runner")) {
       const char *name;
       /* hack to get better testname */
diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 99396f658f2..a5586b107ad 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -110,6 +110,18 @@ dri_get_buffers_with_format(__DRIdrawable * driDrawable,
                                    count, out_count, surf->dri_private);
 }
 
+static unsigned
+dri_get_capability(void *loaderPrivate, enum dri_loader_cap cap)
+{
+   /* Note: loaderPrivate is _EGLDisplay* */
+   switch (cap) {
+   case DRI_LOADER_CAP_FP16:
+      return 1;
+   default:
+      return 0;
+   }
+}
+
 static int
 image_get_buffers(__DRIdrawable *driDrawable,
                   unsigned int format,
@@ -207,11 +219,12 @@ static const __DRIimageLookupExtension image_lookup_extension = {
 };
 
 static const __DRIdri2LoaderExtension dri2_loader_extension = {
-   .base = { __DRI_DRI2_LOADER, 3 },
+   .base = { __DRI_DRI2_LOADER, 4 },
 
    .getBuffers              = dri_get_buffers,
    .flushFrontBuffer        = dri_flush_front_buffer,
    .getBuffersWithFormat    = dri_get_buffers_with_format,
+   .getCapability           = dri_get_capability,
 };
 
 static const __DRIimageLoaderExtension image_loader_extension = {
@@ -478,51 +491,75 @@ dri_screen_create_sw(struct gbm_dri_device *dri)
 static const struct gbm_dri_visual gbm_dri_visuals_table[] = {
    {
      GBM_FORMAT_R8, __DRI_IMAGE_FORMAT_R8,
-     { 0x000000ff, 0x00000000, 0x00000000, 0x00000000 },
+     { 0, -1, -1, -1 },
+     { 8, 0, 0, 0 },
    },
    {
      GBM_FORMAT_GR88, __DRI_IMAGE_FORMAT_GR88,
-     { 0x000000ff, 0x0000ff00, 0x00000000, 0x00000000 },
+     { 0, 8, -1, -1 },
+     { 8, 8, 0, 0 },
    },
    {
      GBM_FORMAT_ARGB1555, __DRI_IMAGE_FORMAT_ARGB1555,
-     { 0x00007c00, 0x000003e0, 0x0000001f, 0x00008000 },
+     { 10, 5, 0, 11 },
+     { 5, 5, 5, 1 },
    },
    {
      GBM_FORMAT_RGB565, __DRI_IMAGE_FORMAT_RGB565,
-     { 0x0000f800, 0x000007e0, 0x0000001f, 0x00000000 },
+     { 11, 5, 0, -1 },
+     { 5, 6, 5, 0 },
    },
    {
      GBM_FORMAT_XRGB8888, __DRI_IMAGE_FORMAT_XRGB8888,
-     { 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000 },
+     { 16, 8, 0, -1 },
+     { 8, 8, 8, 0 },
    },
    {
      GBM_FORMAT_ARGB8888, __DRI_IMAGE_FORMAT_ARGB8888,
-     { 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 },
+     { 16, 8, 0, 24 },
+     { 8, 8, 8, 8 },
    },
    {
      GBM_FORMAT_XBGR8888, __DRI_IMAGE_FORMAT_XBGR8888,
-     { 0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000 },
+     { 0, 8, 16, -1 },
+     { 8, 8, 8, 0 },
    },
    {
      GBM_FORMAT_ABGR8888, __DRI_IMAGE_FORMAT_ABGR8888,
-     { 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 },
+     { 0, 8, 16, 24 },
+     { 8, 8, 8, 8 },
    },
    {
      GBM_FORMAT_XRGB2101010, __DRI_IMAGE_FORMAT_XRGB2101010,
-     { 0x3ff00000, 0x000ffc00, 0x000003ff, 0x00000000 },
+     { 20, 10, 0, -1 },
+     { 10, 10, 10, 0 },
    },
    {
      GBM_FORMAT_ARGB2101010, __DRI_IMAGE_FORMAT_ARGB2101010,
-     { 0x3ff00000, 0x000ffc00, 0x000003ff, 0xc0000000 },
+     { 20, 10, 0, 30 },
+     { 10, 10, 10, 2 },
    },
    {
      GBM_FORMAT_XBGR2101010, __DRI_IMAGE_FORMAT_XBGR2101010,
-     { 0x000003ff, 0x000ffc00, 0x3ff00000, 0x00000000 },
+     { 0, 10, 20, -1 },
+     { 10, 10, 10, 0 },
    },
    {
      GBM_FORMAT_ABGR2101010, __DRI_IMAGE_FORMAT_ABGR2101010,
-     { 0x000003ff, 0x000ffc00, 0x3ff00000, 0xc0000000 },
+     { 0, 10, 20, 30 },
+     { 10, 10, 10, 2 },
+   },
+   {
+     GBM_FORMAT_XBGR16161616F, __DRI_IMAGE_FORMAT_XBGR16161616F,
+     { 0, 16, 32, -1 },
+     { 16, 16, 16, 0 },
+     true,
+   },
+   {
+     GBM_FORMAT_ABGR16161616F, __DRI_IMAGE_FORMAT_ABGR16161616F,
+     { 0, 16, 32, 48 },
+     { 16, 16, 16, 16 },
+     true,
    },
 };
 
diff --git a/src/gbm/backends/dri/gbm_driint.h b/src/gbm/backends/dri/gbm_driint.h
index 8497be3e8f6..a8bfa39e522 100644
--- a/src/gbm/backends/dri/gbm_driint.h
+++ b/src/gbm/backends/dri/gbm_driint.h
@@ -44,11 +44,18 @@ struct gbm_dri_visual {
    uint32_t gbm_format;
    int dri_image_format;
    struct {
-      uint32_t red;
-      uint32_t green;
-      uint32_t blue;
-      uint32_t alpha;
-   } rgba_masks;
+      int red;
+      int green;
+      int blue;
+      int alpha;
+   } rgba_shifts;
+   struct {
+      unsigned int red;
+      unsigned int green;
+      unsigned int blue;
+      unsigned int alpha;
+   } rgba_sizes;
+   bool is_float;
 };
 
 struct gbm_dri_device {
diff --git a/src/gbm/main/gbm.c b/src/gbm/main/gbm.c
index 38480ca966c..ceeed5082e9 100644
--- a/src/gbm/main/gbm.c
+++ b/src/gbm/main/gbm.c
@@ -271,6 +271,9 @@ gbm_bo_get_bpp(struct gbm_bo *bo)
       case GBM_FORMAT_RGBA1010102:
       case GBM_FORMAT_BGRA1010102:
          return 32;
+      case GBM_FORMAT_XBGR16161616F:
+      case GBM_FORMAT_ABGR16161616F:
+         return 64;
    }
 }
 
diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h
index 9b5288710a5..4c6ab377699 100644
--- a/src/gbm/main/gbm.h
+++ b/src/gbm/main/gbm.h
@@ -150,6 +150,15 @@ enum gbm_bo_format {
 #define GBM_FORMAT_RGBA1010102	__gbm_fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */
 #define GBM_FORMAT_BGRA1010102	__gbm_fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */
 
+/*
+ * Floating point 64bpp RGB
+ * IEEE 754-2008 binary16 half-precision float
+ * [15:0] sign:exponent:mantissa 1:5:10
+ */
+#define GBM_FORMAT_XBGR16161616F __gbm_fourcc_code('X', 'B', '4', 'H') /* [63:0] x:B:G:R 16:16:16:16 little endian */
+
+#define GBM_FORMAT_ABGR16161616F __gbm_fourcc_code('A', 'B', '4', 'H') /* [63:0] A:B:G:R 16:16:16:16 little endian */
+
 /* packed YCbCr */
 #define GBM_FORMAT_YUYV		__gbm_fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
 #define GBM_FORMAT_YVYU		__gbm_fourcc_code('Y', 'V', 'Y', 'U') /* [31:0] Cb0:Y1:Cr0:Y0 8:8:8:8 little endian */
diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
index 2db29fd6abb..fc34d85d54a 100644
--- a/src/glx/dri_common.c
+++ b/src/glx/dri_common.c
@@ -166,6 +166,10 @@ static const struct
       __ATTRIB(__DRI_ATTRIB_GREEN_MASK, greenMask),
       __ATTRIB(__DRI_ATTRIB_BLUE_MASK, blueMask),
       __ATTRIB(__DRI_ATTRIB_ALPHA_MASK, alphaMask),
+      __ATTRIB(__DRI_ATTRIB_RED_SHIFT, redShift),
+      __ATTRIB(__DRI_ATTRIB_GREEN_SHIFT, greenShift),
+      __ATTRIB(__DRI_ATTRIB_BLUE_SHIFT, blueShift),
+      __ATTRIB(__DRI_ATTRIB_ALPHA_SHIFT, alphaShift),
 #endif
       __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_WIDTH, maxPbufferWidth),
       __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_HEIGHT, maxPbufferHeight),
diff --git a/src/glx/g_glxglvnddispatchfuncs.c b/src/glx/g_glxglvnddispatchfuncs.c
index 5b65afc8602..cec52c554e6 100644
--- a/src/glx/g_glxglvnddispatchfuncs.c
+++ b/src/glx/g_glxglvnddispatchfuncs.c
@@ -128,7 +128,7 @@ static void dispatch_BindTexImageEXT(Display *dpy, GLXDrawable drawable,
 
 
 static GLXFBConfigSGIX *dispatch_ChooseFBConfigSGIX(Display *dpy, int screen,
-                                                    const int *attrib_list,
+                                                    int *attrib_list,
                                                     int *nelements)
 {
     PFNGLXCHOOSEFBCONFIGSGIXPROC pChooseFBConfigSGIX;
@@ -220,7 +220,7 @@ static GLXPbuffer dispatch_CreateGLXPbufferSGIX(Display *dpy,
                                                 GLXFBConfig config,
                                                 unsigned int width,
                                                 unsigned int height,
-                                                const int *attrib_list)
+                                                int *attrib_list)
 {
     PFNGLXCREATEGLXPBUFFERSGIXPROC pCreateGLXPbufferSGIX;
     __GLXvendorInfo *dd;
diff --git a/src/glx/glx_error.c b/src/glx/glx_error.c
index 712ecf8213d..653cbeb2d2a 100644
--- a/src/glx/glx_error.c
+++ b/src/glx/glx_error.c
@@ -54,7 +54,7 @@ __glXSendError(Display * dpy, int_fast8_t errorCode, uint_fast32_t resourceID,
       error.errorCode = glx_dpy->codes->first_error + errorCode;
    }
 
-   error.sequenceNumber = dpy->last_request_read;
+   error.sequenceNumber = dpy->request;
    error.resourceID = resourceID;
    error.minorCode = minorCode;
    error.majorCode = glx_dpy->majorOpcode;
@@ -73,7 +73,7 @@ __glXSendErrorForXcb(Display * dpy, const xcb_generic_error_t *err)
 
    error.type = X_Error;
    error.errorCode = err->error_code;
-   error.sequenceNumber = dpy->last_request_read;
+   error.sequenceNumber = err->sequence;
    error.resourceID = err->resource_id;
    error.minorCode = err->minor_code;
    error.majorCode = err->major_code;
diff --git a/src/glx/glxconfig.h b/src/glx/glxconfig.h
index 2f1074ca5fa..13f5a4e292c 100644
--- a/src/glx/glxconfig.h
+++ b/src/glx/glxconfig.h
@@ -41,6 +41,7 @@ struct glx_config {
 
     GLint redBits, greenBits, blueBits, alphaBits;	/* bits per comp */
     GLuint redMask, greenMask, blueMask, alphaMask;
+    GLuint redShift, greenShift, blueShift, alphaShift;
     GLint rgbBits;		/* total bits for rgb */
     GLint indexBits;		/* total bits for colorindex */
 
diff --git a/src/glx/glxglvnd.c b/src/glx/glxglvnd.c
index b6b415114c9..bf5c2a06b0c 100644
--- a/src/glx/glxglvnd.c
+++ b/src/glx/glxglvnd.c
@@ -41,7 +41,7 @@ static void *__glXGLVNDGetDispatchAddress(const GLubyte *procName)
 {
     unsigned internalIndex = FindGLXFunction(procName);
 
-    return __glXDispatchFunctions[internalIndex];
+    return (void*)__glXDispatchFunctions[internalIndex];
 }
 
 static void __glXGLVNDSetDispatchIndex(const GLubyte *procName, int index)
diff --git a/src/glx/meson.build b/src/glx/meson.build
index 0e3245a254f..d7b4d086642 100644
--- a/src/glx/meson.build
+++ b/src/glx/meson.build
@@ -1,4 +1,4 @@
-# Copyright © 2017 Intel Corporation
+# Copyright © 2017-2019 Intel Corporation
 
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -70,7 +70,7 @@ extra_libs_libglx = []
 extra_deps_libgl = []
 extra_ld_args_libgl = []
 
-if with_dri
+if with_glx == 'dri'
   files_libglx += files(
     'dri_common.c',
     'dri_common.h',
@@ -83,7 +83,7 @@ if with_dri
 endif
 
 # dri2
-if with_dri and with_dri_platform == 'drm' and dep_libdrm.found()
+if with_glx == 'dri' and with_dri_platform == 'drm' and dep_libdrm.found()
   files_libglx += files(
     'dri2.c',
     'dri2_glx.c',
diff --git a/src/intel/Android.compiler.mk b/src/intel/Android.compiler.mk
index c2b01221dfc..41af7b20b9c 100644
--- a/src/intel/Android.compiler.mk
+++ b/src/intel/Android.compiler.mk
@@ -28,7 +28,7 @@
 # ---------------------------------------
 
 include $(CLEAR_VARS)
-
+LOCAL_CFLAGS += -Wno-error
 LOCAL_MODULE := libmesa_intel_compiler
 LOCAL_MODULE_CLASS := STATIC_LIBRARIES
 
diff --git a/src/intel/Android.dev.mk b/src/intel/Android.dev.mk
index 4f14b0362d7..e4a5058bbf7 100644
--- a/src/intel/Android.dev.mk
+++ b/src/intel/Android.dev.mk
@@ -33,5 +33,8 @@ LOCAL_C_INCLUDES := $(MESA_TOP)/include
 
 LOCAL_SRC_FILES := $(DEV_FILES)
 
+LOCAL_CFLAGS := \
+           -Wno-gnu-variable-sized-type-not-at-end
+
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
diff --git a/src/intel/Android.vulkan.mk b/src/intel/Android.vulkan.mk
index 05922afdbe0..53e3d18e962 100644
--- a/src/intel/Android.vulkan.mk
+++ b/src/intel/Android.vulkan.mk
@@ -71,6 +71,7 @@ LOCAL_C_INCLUDES := \
 
 LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_entrypoints.h
 LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/dummy.c
+LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.h
 
 $(intermediates)/vulkan/dummy.c:
 	@mkdir -p $(dir $@)
@@ -85,6 +86,14 @@ $(intermediates)/vulkan/anv_entrypoints.h: $(intermediates)/vulkan/dummy.c \
 		--outdir $(dir $@) \
 		--xml $(VULKAN_API_XML)
 
+$(intermediates)/vulkan/anv_extensions.h: $(ANV_ENTRYPOINTS_GEN_SCRIPT) \
+					  $(ANV_EXTENSIONS_SCRIPT) \
+					  $(VULKAN_API_XML)
+	@mkdir -p $(dir $@)
+	$(MESA_PYTHON2) $(ANV_EXTENSIONS_GEN_SCRIPT) \
+		--xml $(VULKAN_API_XML) \
+		--out-h $@
+
 LOCAL_EXPORT_C_INCLUDE_DIRS := \
         $(intermediates)
 
@@ -239,7 +248,7 @@ include $(BUILD_STATIC_LIBRARY)
 include $(CLEAR_VARS)
 LOCAL_MODULE := libmesa_vulkan_common
 LOCAL_MODULE_CLASS := STATIC_LIBRARIES
-
+LOCAL_CFLAGS += -Wno-error
 intermediates := $(call local-generated-sources-dir)
 
 LOCAL_SRC_FILES := $(VULKAN_FILES)
@@ -261,7 +270,6 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
 
 LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_entrypoints.c
 LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.c
-LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.h
 
 $(intermediates)/vulkan/anv_entrypoints.c: $(ANV_ENTRYPOINTS_GEN_SCRIPT) \
 					   $(ANV_EXTENSIONS_SCRIPT) \
@@ -279,14 +287,6 @@ $(intermediates)/vulkan/anv_extensions.c: $(ANV_EXTENSIONS_GEN_SCRIPT) \
 		--xml $(VULKAN_API_XML) \
 		--out-c $@
 
-$(intermediates)/vulkan/anv_extensions.h: $(ANV_EXTENSIONS_GEN_SCRIPT) \
-					   $(ANV_EXTENSIONS_SCRIPT) \
-					   $(VULKAN_API_XML)
-	@mkdir -p $(dir $@)
-	$(MESA_PYTHON2) $(ANV_EXTENSIONS_GEN_SCRIPT) \
-		--xml $(VULKAN_API_XML) \
-		--out-h $@
-
 LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
 LOCAL_HEADER_LIBRARIES += $(VULKAN_COMMON_HEADER_LIBRARIES)
 
diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index 9e964d02f36..222a7bcf4b2 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -2506,15 +2506,8 @@ blorp_surf_convert_to_uncompressed(const struct isl_device *isl_dev,
       *y /= fmtl->bh;
    }
 
-   info->surf.logical_level0_px.width =
-      DIV_ROUND_UP(info->surf.logical_level0_px.width, fmtl->bw);
-   info->surf.logical_level0_px.height =
-      DIV_ROUND_UP(info->surf.logical_level0_px.height, fmtl->bh);
-
-   assert(info->surf.phys_level0_sa.width % fmtl->bw == 0);
-   assert(info->surf.phys_level0_sa.height % fmtl->bh == 0);
-   info->surf.phys_level0_sa.width /= fmtl->bw;
-   info->surf.phys_level0_sa.height /= fmtl->bh;
+   info->surf.logical_level0_px = isl_surf_get_logical_level0_el(&info->surf);
+   info->surf.phys_level0_sa = isl_surf_get_phys_level0_el(&info->surf);
 
    assert(info->tile_x_sa % fmtl->bw == 0);
    assert(info->tile_y_sa % fmtl->bh == 0);
diff --git a/src/intel/compiler/brw_cfg.cpp b/src/intel/compiler/brw_cfg.cpp
index 600b428a492..6c40889088d 100644
--- a/src/intel/compiler/brw_cfg.cpp
+++ b/src/intel/compiler/brw_cfg.cpp
@@ -128,9 +128,6 @@ void
 bblock_t::combine_with(bblock_t *that)
 {
    assert(this->can_combine_with(that));
-   foreach_list_typed (bblock_link, link, link, &this->children) {
-      assert(link->block == that);
-   }
    foreach_list_typed (bblock_link, link, link, &that->parents) {
       assert(link->block == this);
    }
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index 44296083711..0c18efa67f8 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -141,7 +141,8 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
                        nir_lower_ineg64 |
                        nir_lower_logic64 |
                        nir_lower_minmax64 |
-                       nir_lower_shift64;
+                       nir_lower_shift64 |
+                       nir_lower_extract64;
       fp64_options |= nir_lower_fp64_full_software;
    }
 
@@ -187,6 +188,8 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
       /* Prior to Gen6, there are no three source operations. */
       nir_options->lower_ffma = devinfo->gen < 6;
 
+      nir_options->lower_bitfield_reverse = devinfo->gen < 7;
+
       nir_options->lower_int64_options = int64_options;
       nir_options->lower_doubles_options = fp64_options;
       compiler->glsl_compiler_options[i].NirOptions = nir_options;
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index d8532a0ff6e..29965e60a7f 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -1113,7 +1113,9 @@ brw_untyped_surface_write(struct brw_codegen *p,
 void
 brw_memory_fence(struct brw_codegen *p,
                  struct brw_reg dst,
-                 enum opcode send_op);
+                 struct brw_reg src,
+                 enum opcode send_op,
+                 bool stall);
 
 void
 brw_pixel_interpolator_query(struct brw_codegen *p,
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index a271621393d..ad209a5a535 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -707,9 +707,9 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
    gen7_convert_mrf_to_grf(p, &dest);
 
    assert(dest.nr < 128);
-   assert(src0.file != BRW_IMMEDIATE_VALUE || src0.nr < 128);
-   assert(src1.file != BRW_IMMEDIATE_VALUE || src1.nr < 128);
-   assert(src2.file != BRW_IMMEDIATE_VALUE || src2.nr < 128);
+   assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128);
+   assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128);
+   assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128);
    assert(dest.address_mode == BRW_ADDRESS_DIRECT);
    assert(src0.address_mode == BRW_ADDRESS_DIRECT);
    assert(src1.address_mode == BRW_ADDRESS_DIRECT);
@@ -2523,8 +2523,8 @@ brw_send_indirect_message(struct brw_codegen *p,
 
    if (desc.file == BRW_IMMEDIATE_VALUE) {
       send = next_insn(p, BRW_OPCODE_SEND);
+      brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
       brw_set_desc(p, send, desc.ud | desc_imm);
-
    } else {
       struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
 
@@ -2543,11 +2543,11 @@ brw_send_indirect_message(struct brw_codegen *p,
       brw_pop_insn_state(p);
 
       send = next_insn(p, BRW_OPCODE_SEND);
+      brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
       brw_set_src1(p, send, addr);
    }
 
    brw_set_dest(p, send, dst);
-   brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
    brw_inst_set_sfid(devinfo, send, sfid);
    brw_inst_set_eot(devinfo, send, eot);
 }
@@ -3037,10 +3037,12 @@ brw_set_memory_fence_message(struct brw_codegen *p,
 void
 brw_memory_fence(struct brw_codegen *p,
                  struct brw_reg dst,
-                 enum opcode send_op)
+                 struct brw_reg src,
+                 enum opcode send_op,
+                 bool stall)
 {
    const struct gen_device_info *devinfo = p->devinfo;
-   const bool commit_enable =
+   const bool commit_enable = stall ||
       devinfo->gen >= 10 || /* HSD ES # 1404612949 */
       (devinfo->gen == 7 && !devinfo->is_haswell);
    struct brw_inst *insn;
@@ -3048,15 +3050,15 @@ brw_memory_fence(struct brw_codegen *p,
    brw_push_insn_state(p);
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
    brw_set_default_exec_size(p, BRW_EXECUTE_1);
-   dst = vec1(dst);
+   dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
+   src = retype(vec1(src), BRW_REGISTER_TYPE_UD);
 
    /* Set dst as destination for dependency tracking, the MEMORY_FENCE
     * message doesn't write anything back.
     */
    insn = next_insn(p, send_op);
-   dst = retype(dst, BRW_REGISTER_TYPE_UW);
    brw_set_dest(p, insn, dst);
-   brw_set_src0(p, insn, dst);
+   brw_set_src0(p, insn, src);
    brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
                                 commit_enable);
 
@@ -3067,7 +3069,7 @@ brw_memory_fence(struct brw_codegen *p,
        */
       insn = next_insn(p, send_op);
       brw_set_dest(p, insn, offset(dst, 1));
-      brw_set_src0(p, insn, offset(dst, 1));
+      brw_set_src0(p, insn, src);
       brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
                                    commit_enable);
 
@@ -3079,6 +3081,9 @@ brw_memory_fence(struct brw_codegen *p,
       brw_MOV(p, dst, offset(dst, 1));
    }
 
+   if (stall)
+      brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
+
    brw_pop_insn_state(p);
 }
 
diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
index 943f724e60f..203280570aa 100644
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -289,6 +289,18 @@ sources_not_null(const struct gen_device_info *devinfo,
    return error_msg;
 }
 
+static struct string
+alignment_supported(const struct gen_device_info *devinfo,
+                    const brw_inst *inst)
+{
+   struct string error_msg = { .str = NULL, .len = 0 };
+
+   ERROR_IF(devinfo->gen >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16,
+            "Align16 not supported");
+
+   return error_msg;
+}
+
 static bool
 inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
 {
@@ -600,17 +612,31 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf
    unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
    struct string error_msg = { .str = NULL, .len = 0 };
 
+   if (devinfo->gen >= 11) {
+      if (num_sources == 3) {
+         ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
+                  brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
+                  "Byte data type is not supported for src1/2 register regioning. This includes "
+                  "byte broadcast as well.");
+      }
+      if (num_sources == 2) {
+         ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1,
+                  "Byte data type is not supported for src1 register regioning. This includes "
+                  "byte broadcast as well.");
+      }
+   }
+
    if (num_sources == 3)
-      return (struct string){};
+      return error_msg;
 
    if (inst_is_send(devinfo, inst))
-      return (struct string){};
+      return error_msg;
 
    if (exec_size == 1)
-      return (struct string){};
+      return error_msg;
 
    if (desc->ndst == 0)
-      return (struct string){};
+      return error_msg;
 
    /* The PRMs say:
     *
@@ -635,12 +661,9 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf
 
    if (dst_type_is_byte) {
       if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
-         if (!inst_is_raw_move(devinfo, inst)) {
+         if (!inst_is_raw_move(devinfo, inst))
             ERROR("Only raw MOV supports a packed-byte destination");
-            return error_msg;
-         } else {
-            return (struct string){};
-         }
+         return error_msg;
       }
    }
 
@@ -1823,6 +1846,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo,
       } else {
          CHECK(sources_not_null);
          CHECK(send_restrictions);
+         CHECK(alignment_supported);
          CHECK(general_restrictions_based_on_operand_types);
          CHECK(general_restrictions_on_region_parameters);
          CHECK(special_restrictions_for_mixed_float_mode);
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 335eaa0e934..f7e37d57b22 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1890,8 +1890,8 @@ fs_visitor::split_virtual_grfs()
     * destination), we mark the used slots as inseparable.  Then we go
     * through and split the registers into the smallest pieces we can.
     */
-   bool split_points[reg_count];
-   memset(split_points, 0, sizeof(split_points));
+   bool *split_points = new bool[reg_count];
+   memset(split_points, 0, reg_count * sizeof(*split_points));
 
    /* Mark all used registers as fully splittable */
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
@@ -1925,8 +1925,8 @@ fs_visitor::split_virtual_grfs()
       }
    }
 
-   int new_virtual_grf[reg_count];
-   int new_reg_offset[reg_count];
+   int *new_virtual_grf = new int[reg_count];
+   int *new_reg_offset = new int[reg_count];
 
    int reg = 0;
    for (int i = 0; i < num_vars; i++) {
@@ -1982,6 +1982,10 @@ fs_visitor::split_virtual_grfs()
       }
    }
    invalidate_live_intervals();
+
+   delete[] split_points;
+   delete[] new_virtual_grf;
+   delete[] new_reg_offset;
 }
 
 /**
@@ -1997,8 +2001,8 @@ bool
 fs_visitor::compact_virtual_grfs()
 {
    bool progress = false;
-   int remap_table[this->alloc.count];
-   memset(remap_table, -1, sizeof(remap_table));
+   int *remap_table = new int[this->alloc.count];
+   memset(remap_table, -1, this->alloc.count * sizeof(int));
 
    /* Mark which virtual GRFs are used. */
    foreach_block_and_inst(block, const fs_inst, inst, cfg) {
@@ -2054,6 +2058,8 @@ fs_visitor::compact_virtual_grfs()
       }
    }
 
+   delete[] remap_table;
+
    return progress;
 }
 
@@ -6124,9 +6130,6 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
 
    case FS_OPCODE_LINTERP:
    case SHADER_OPCODE_GET_BUFFER_SIZE:
-   case FS_OPCODE_DDX_COARSE:
-   case FS_OPCODE_DDX_FINE:
-   case FS_OPCODE_DDY_COARSE:
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
    case FS_OPCODE_PACK_HALF_2x16_SPLIT:
    case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
@@ -6143,6 +6146,9 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
        */
       return (devinfo->gen == 4 ? 16 : MIN2(16, inst->exec_size));
 
+   case FS_OPCODE_DDX_COARSE:
+   case FS_OPCODE_DDX_FINE:
+   case FS_OPCODE_DDY_COARSE:
    case FS_OPCODE_DDY_FINE:
       /* The implementation of this virtual opcode may require emitting
        * compressed Align16 instructions, which are severely limited on some
@@ -8303,7 +8309,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
       if (!v32->run_cs(min_dispatch_width)) {
          compiler->shader_perf_log(log_data,
                                    "SIMD32 shader failed to compile: %s",
-                                   v16->fail_msg);
+                                   v32->fail_msg);
          if (!cfg) {
             fail_msg =
                "Couldn't generate SIMD32 program and not "
diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h
index a69e3c6ae80..b7e8de85405 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -322,10 +322,11 @@ namespace brw {
          case SHADER_OPCODE_INT_REMAINDER:
             return emit(instruction(opcode, dispatch_width(), dst,
                                     fix_math_operand(src0),
-                                    fix_math_operand(src1)));
+                                    fix_math_operand(fix_byte_src(src1))));
 
          default:
-            return emit(instruction(opcode, dispatch_width(), dst, src0, src1));
+            return emit(instruction(opcode, dispatch_width(), dst,
+                                    src0, fix_byte_src(src1)));
 
          }
       }
@@ -344,12 +345,12 @@ namespace brw {
          case BRW_OPCODE_LRP:
             return emit(instruction(opcode, dispatch_width(), dst,
                                     fix_3src_operand(src0),
-                                    fix_3src_operand(src1),
-                                    fix_3src_operand(src2)));
+                                    fix_3src_operand(fix_byte_src(src1)),
+                                    fix_3src_operand(fix_byte_src(src2))));
 
          default:
             return emit(instruction(opcode, dispatch_width(), dst,
-                                    src0, src1, src2));
+                                    src0, fix_byte_src(src1), fix_byte_src(src2)));
          }
       }
 
@@ -399,8 +400,11 @@ namespace brw {
       {
          assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
 
-         return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
-                                     fix_unsigned_negate(src1)));
+         /* In some cases we can't have bytes as operand for src1, so use the
+          * same type for both operand.
+          */
+         return set_condmod(mod, SEL(dst, fix_unsigned_negate(fix_byte_src(src0)),
+                                     fix_unsigned_negate(fix_byte_src(src1))));
       }
 
       /**
@@ -657,8 +661,8 @@ namespace brw {
                             emit(BRW_OPCODE_CSEL,
                                  retype(dst, BRW_REGISTER_TYPE_F),
                                  retype(src0, BRW_REGISTER_TYPE_F),
-                                 retype(src1, BRW_REGISTER_TYPE_F),
-                                 src2));
+                                 retype(fix_byte_src(src1), BRW_REGISTER_TYPE_F),
+                                 fix_byte_src(src2)));
       }
 
       /**
@@ -708,6 +712,22 @@ namespace brw {
 
       backend_shader *shader;
 
+      /**
+       * Byte sized operands are not supported for src1 on Gen11+.
+       */
+      src_reg
+      fix_byte_src(const src_reg &src) const
+      {
+         if ((shader->devinfo->gen < 11 && !shader->devinfo->is_geminilake) ||
+             type_sz(src.type) != 1)
+            return src;
+
+         dst_reg temp = vgrf(src.type == BRW_REGISTER_TYPE_UB ?
+                             BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_D);
+         MOV(temp, src);
+         return src_reg(temp);
+      }
+
    private:
       /**
        * Workaround for negation of UD registers.  See comment in
diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp
index 8d16be4c4bb..c12e0d62293 100644
--- a/src/intel/compiler/brw_fs_combine_constants.cpp
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp
@@ -289,7 +289,7 @@ get_alignment_for_imm(const struct imm *imm)
 }
 
 static bool
-needs_negate(const struct fs_reg *reg, const struct imm *imm)
+needs_negate(const fs_reg *reg, const struct imm *imm)
 {
    switch (reg->type) {
    case BRW_REGISTER_TYPE_DF:
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index af8350aed6c..67740c783f1 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1206,27 +1206,50 @@ fs_generator::generate_ddx(const fs_inst *inst,
 {
    unsigned vstride, width;
 
-   if (inst->opcode == FS_OPCODE_DDX_FINE) {
-      /* produce accurate derivatives */
-      vstride = BRW_VERTICAL_STRIDE_2;
-      width = BRW_WIDTH_2;
-   } else {
-      /* replicate the derivative at the top-left pixel to other pixels */
-      vstride = BRW_VERTICAL_STRIDE_4;
-      width = BRW_WIDTH_4;
-   }
+   if (devinfo->gen >= 8) {
+      if (inst->opcode == FS_OPCODE_DDX_FINE) {
+         /* produce accurate derivatives */
+         vstride = BRW_VERTICAL_STRIDE_2;
+         width = BRW_WIDTH_2;
+      } else {
+         /* replicate the derivative at the top-left pixel to other pixels */
+         vstride = BRW_VERTICAL_STRIDE_4;
+         width = BRW_WIDTH_4;
+      }
+
+      struct brw_reg src0 = byte_offset(src, type_sz(src.type));;
+      struct brw_reg src1 = src;
 
-   struct brw_reg src0 = byte_offset(src, type_sz(src.type));;
-   struct brw_reg src1 = src;
+      src0.vstride = vstride;
+      src0.width   = width;
+      src0.hstride = BRW_HORIZONTAL_STRIDE_0;
+      src1.vstride = vstride;
+      src1.width   = width;
+      src1.hstride = BRW_HORIZONTAL_STRIDE_0;
 
-   src0.vstride = vstride;
-   src0.width   = width;
-   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
-   src1.vstride = vstride;
-   src1.width   = width;
-   src1.hstride = BRW_HORIZONTAL_STRIDE_0;
+      brw_ADD(p, dst, src0, negate(src1));
+   } else {
+      /* On Haswell and earlier, the region used above appears to not work
+       * correctly for compressed instructions.  At least on Haswell and
+       * Iron Lake, compressed ALIGN16 instructions do work.  Since we
+       * would have to split to SIMD8 no matter which method we choose, we
+       * may as well use ALIGN16 on all platforms gen7 and earlier.
+       */
+      struct brw_reg src0 = stride(src, 4, 4, 1);
+      struct brw_reg src1 = stride(src, 4, 4, 1);
+      if (inst->opcode == FS_OPCODE_DDX_FINE) {
+         src0.swizzle = BRW_SWIZZLE_XXZZ;
+         src1.swizzle = BRW_SWIZZLE_YYWW;
+      } else {
+         src0.swizzle = BRW_SWIZZLE_XXXX;
+         src1.swizzle = BRW_SWIZZLE_YYYY;
+      }
 
-   brw_ADD(p, dst, src0, negate(src1));
+      brw_push_insn_state(p);
+      brw_set_default_access_mode(p, BRW_ALIGN_16);
+      brw_ADD(p, dst, negate(src0), src1);
+      brw_pop_insn_state(p);
+   }
 }
 
 /* The negate_value boolean is used to negate the derivative computation for
@@ -1256,31 +1279,15 @@ fs_generator::generate_ddy(const fs_inst *inst,
       if (devinfo->gen >= 11 ||
           (devinfo->is_broadwell && src.type == BRW_REGISTER_TYPE_HF)) {
          src = stride(src, 0, 2, 1);
-         struct brw_reg src_0  = byte_offset(src,  0 * type_size);
-         struct brw_reg src_2  = byte_offset(src,  2 * type_size);
-         struct brw_reg src_4  = byte_offset(src,  4 * type_size);
-         struct brw_reg src_6  = byte_offset(src,  6 * type_size);
-         struct brw_reg src_8  = byte_offset(src,  8 * type_size);
-         struct brw_reg src_10 = byte_offset(src, 10 * type_size);
-         struct brw_reg src_12 = byte_offset(src, 12 * type_size);
-         struct brw_reg src_14 = byte_offset(src, 14 * type_size);
-
-         struct brw_reg dst_0  = byte_offset(dst,  0 * type_size);
-         struct brw_reg dst_4  = byte_offset(dst,  4 * type_size);
-         struct brw_reg dst_8  = byte_offset(dst,  8 * type_size);
-         struct brw_reg dst_12 = byte_offset(dst, 12 * type_size);
 
          brw_push_insn_state(p);
          brw_set_default_exec_size(p, BRW_EXECUTE_4);
-
-         brw_ADD(p, dst_0, negate(src_0), src_2);
-         brw_ADD(p, dst_4, negate(src_4), src_6);
-
-         if (inst->exec_size == 16) {
-            brw_ADD(p, dst_8,  negate(src_8),  src_10);
-            brw_ADD(p, dst_12, negate(src_12), src_14);
+         for (uint32_t g = 0; g < inst->exec_size; g += 4) {
+            brw_set_default_group(p, inst->group + g);
+            brw_ADD(p, byte_offset(dst, g * type_size),
+                       negate(byte_offset(src,  g * type_size)),
+                       byte_offset(src, (g + 2) * type_size));
          }
-
          brw_pop_insn_state(p);
       } else {
          struct brw_reg src0 = stride(src, 4, 4, 1);
@@ -1295,10 +1302,28 @@ fs_generator::generate_ddy(const fs_inst *inst,
       }
    } else {
       /* replicate the derivative at the top-left pixel to other pixels */
-      struct brw_reg src0 = byte_offset(stride(src, 4, 4, 0), 0 * type_size);
-      struct brw_reg src1 = byte_offset(stride(src, 4, 4, 0), 2 * type_size);
+      if (devinfo->gen >= 8) {
+         struct brw_reg src0 = byte_offset(stride(src, 4, 4, 0), 0 * type_size);
+         struct brw_reg src1 = byte_offset(stride(src, 4, 4, 0), 2 * type_size);
 
-      brw_ADD(p, dst, negate(src0), src1);
+         brw_ADD(p, dst, negate(src0), src1);
+      } else {
+         /* On Haswell and earlier, the region used above appears to not work
+          * correctly for compressed instructions.  At least on Haswell and
+          * Iron Lake, compressed ALIGN16 instructions do work.  Since we
+          * would have to split to SIMD8 no matter which method we choose, we
+          * may as well use ALIGN16 on all platforms gen7 and earlier.
+          */
+         struct brw_reg src0 = stride(src, 4, 4, 1);
+         struct brw_reg src1 = stride(src, 4, 4, 1);
+         src0.swizzle = BRW_SWIZZLE_XXXX;
+         src1.swizzle = BRW_SWIZZLE_ZZZZ;
+
+         brw_push_insn_state(p);
+         brw_set_default_access_mode(p, BRW_ALIGN_16);
+         brw_ADD(p, dst, negate(src0), src1);
+         brw_pop_insn_state(p);
+      }
    }
 }
 
@@ -2070,13 +2095,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          break;
 
       case SHADER_OPCODE_MEMORY_FENCE:
-         brw_memory_fence(p, dst, BRW_OPCODE_SEND);
+         assert(src[1].file == BRW_IMMEDIATE_VALUE);
+         brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, src[1].ud);
          break;
 
       case SHADER_OPCODE_INTERLOCK:
          assert(devinfo->gen >= 9);
          /* The interlock is basically a memory fence issued via sendc */
-         brw_memory_fence(p, dst, BRW_OPCODE_SENDC);
+         brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC, false);
          break;
 
       case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index cdd3f7bccaa..2505c6e885c 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1318,9 +1318,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
    case nir_op_ine32: {
       fs_reg dest = result;
 
+      /* On Gen11 we have an additional issue being that src1 cannot be a byte
+       * type. So we convert both operands for the comparison.
+       */
+      fs_reg temp_op[2];
+      temp_op[0] = bld.fix_byte_src(op[0]);
+      temp_op[1] = bld.fix_byte_src(op[1]);
+
       const uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
       if (bit_size != 32)
-         dest = bld.vgrf(op[0].type, 1);
+         dest = bld.vgrf(temp_op[0].type, 1);
 
       brw_conditional_mod cond;
       switch (instr->op) {
@@ -1341,7 +1348,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
       default:
          unreachable("bad opcode");
       }
-      bld.CMP(dest, op[0], op[1], cond);
+      bld.CMP(dest, temp_op[0], temp_op[1], cond);
 
       if (bit_size > 32) {
          bld.MOV(result, subscript(dest, BRW_REGISTER_TYPE_UD, 0));
@@ -4169,7 +4176,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
    case nir_intrinsic_memory_barrier: {
       const fs_builder ubld = bld.group(8, 0);
       const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
-      ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
+      ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
+                brw_vec8_grf(0, 0), brw_imm_ud(0))
          ->size_written = 2 * REG_SIZE;
       break;
    }
@@ -4821,16 +4829,29 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
    case nir_intrinsic_quad_swap_horizontal: {
       const fs_reg value = get_nir_src(instr->src[0]);
       const fs_reg tmp = bld.vgrf(value.type);
-      const fs_builder ubld = bld.exec_all().group(dispatch_width / 2, 0);
+      if (devinfo->gen <= 7) {
+         /* The hardware doesn't seem to support these crazy regions with
+          * compressed instructions on gen7 and earlier so we fall back to
+          * using quad swizzles.  Fortunately, we don't support 64-bit
+          * anything in Vulkan on gen7.
+          */
+         assert(nir_src_bit_size(instr->src[0]) == 32);
+         const fs_builder ubld = bld.exec_all();
+         ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value,
+                   brw_imm_ud(BRW_SWIZZLE4(1,0,3,2)));
+         bld.MOV(retype(dest, value.type), tmp);
+      } else {
+         const fs_builder ubld = bld.exec_all().group(dispatch_width / 2, 0);
 
-      const fs_reg src_left = horiz_stride(value, 2);
-      const fs_reg src_right = horiz_stride(horiz_offset(value, 1), 2);
-      const fs_reg tmp_left = horiz_stride(tmp, 2);
-      const fs_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2);
+         const fs_reg src_left = horiz_stride(value, 2);
+         const fs_reg src_right = horiz_stride(horiz_offset(value, 1), 2);
+         const fs_reg tmp_left = horiz_stride(tmp, 2);
+         const fs_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2);
 
-      ubld.MOV(tmp_left, src_right);
-      ubld.MOV(tmp_right, src_left);
+         ubld.MOV(tmp_left, src_right);
+         ubld.MOV(tmp_right, src_left);
 
+      }
       bld.MOV(retype(dest, value.type), tmp);
       break;
    }
@@ -4970,14 +4991,26 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       const fs_builder ubld = bld.group(8, 0);
       const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
 
-      ubld.emit(SHADER_OPCODE_INTERLOCK, tmp)->size_written = 2 *
-         REG_SIZE;
-
+      ubld.emit(SHADER_OPCODE_INTERLOCK, tmp, brw_vec8_grf(0, 0))
+         ->size_written = 2 * REG_SIZE;
       break;
    }
 
    case nir_intrinsic_end_invocation_interlock: {
-      /* We don't need to do anything here */
+      /* For endInvocationInterlock(), we need to insert a memory fence which
+       * stalls in the shader until the memory transactions prior to that
+       * fence are complete.  This ensures that the shader does not end before
+       * any writes from its critical section have landed.  Otherwise, you can
+       * end up with a case where the next invocation on that pixel properly
+       * stalls for previous FS invocation on its pixel to complete but
+       * doesn't actually wait for the dataport memory transactions from that
+       * thread to land before submitting its own.
+       */
+      const fs_builder ubld = bld.group(8, 0);
+      const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+      ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
+                brw_vec8_grf(0, 0), brw_imm_ud(1))
+         ->size_written = 2 * REG_SIZE;
       break;
    }
 
diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp
index 17a9dc8e9c4..35903c4030e 100644
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp
@@ -591,7 +591,7 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
     */
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       if (inst->dst.file == VGRF && inst->has_source_and_destination_hazard()) {
-         for (unsigned i = 0; i < 3; i++) {
+         for (unsigned i = 0; i < inst->sources; i++) {
             if (inst->src[i].file == VGRF) {
                ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
             }
@@ -710,14 +710,9 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
          if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
              inst->src[2].file == VGRF &&
              inst->src[3].file == VGRF &&
-             inst->src[2].nr != inst->src[3].nr) {
-            for (unsigned i = 0; i < inst->mlen; i++) {
-               for (unsigned j = 0; j < inst->ex_mlen; j++) {
-                  ra_add_node_interference(g, inst->src[2].nr + i,
-                                           inst->src[3].nr + j);
-               }
-            }
-         }
+             inst->src[2].nr != inst->src[3].nr)
+            ra_add_node_interference(g, inst->src[2].nr,
+                                     inst->src[3].nr);
       }
    }
 
diff --git a/src/intel/compiler/brw_predicated_break.cpp b/src/intel/compiler/brw_predicated_break.cpp
index 607715dace4..e60052f3608 100644
--- a/src/intel/compiler/brw_predicated_break.cpp
+++ b/src/intel/compiler/brw_predicated_break.cpp
@@ -128,14 +128,8 @@ opt_predicated_break(backend_shader *s)
          while_inst->predicate = jump_inst->predicate;
          while_inst->predicate_inverse = !jump_inst->predicate_inverse;
 
-         earlier_block->children.make_empty();
-         earlier_block->add_successor(s->cfg->mem_ctx, while_block);
-
          assert(earlier_block->can_combine_with(while_block));
          earlier_block->combine_with(while_block);
-
-         earlier_block->next()->parents.make_empty();
-         earlier_block->add_successor(s->cfg->mem_ctx, earlier_block->next());
       }
 
       progress = true;
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 7d60665b621..6308b280ee7 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -1204,9 +1204,31 @@ vec4_instruction::reswizzle(int dst_writemask, int swizzle)
        opcode != BRW_OPCODE_DP3 && opcode != BRW_OPCODE_DP2 &&
        opcode != VEC4_OPCODE_PACK_BYTES) {
       for (int i = 0; i < 3; i++) {
-         if (src[i].file == BAD_FILE || src[i].file == IMM)
+         if (src[i].file == BAD_FILE)
             continue;
 
+         if (src[i].file == IMM) {
+            assert(src[i].type != BRW_REGISTER_TYPE_V &&
+                   src[i].type != BRW_REGISTER_TYPE_UV);
+
+            /* Vector immediate types need to be reswizzled. */
+            if (src[i].type == BRW_REGISTER_TYPE_VF) {
+               const unsigned imm[] = {
+                  (src[i].ud >>  0) & 0x0ff,
+                  (src[i].ud >>  8) & 0x0ff,
+                  (src[i].ud >> 16) & 0x0ff,
+                  (src[i].ud >> 24) & 0x0ff,
+               };
+
+               src[i] = brw_imm_vf4(imm[BRW_GET_SWZ(swizzle, 0)],
+                                    imm[BRW_GET_SWZ(swizzle, 1)],
+                                    imm[BRW_GET_SWZ(swizzle, 2)],
+                                    imm[BRW_GET_SWZ(swizzle, 3)]);
+            }
+
+            continue;
+         }
+
          src[i].swizzle = brw_compose_swizzle(swizzle, src[i].swizzle);
       }
    }
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp
index 05e6f50ebb6..013b7fb0874 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -1883,7 +1883,7 @@ generate_code(struct brw_codegen *p,
          break;
 
       case SHADER_OPCODE_MEMORY_FENCE:
-         brw_memory_fence(p, dst, BRW_OPCODE_SEND);
+         brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false);
          break;
 
       case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp
index 7a8ae8158a3..4909aa32a4b 100644
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -760,7 +760,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       const vec4_builder bld =
          vec4_builder(this).at_end().annotate(current_annotation, base_ir);
       const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
-      bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
+      bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0))
          ->size_written = 2 * REG_SIZE;
       break;
    }
diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp
index 65326416064..efdae4fd79b 100644
--- a/src/intel/compiler/test_eu_validate.cpp
+++ b/src/intel/compiler/test_eu_validate.cpp
@@ -2372,3 +2372,124 @@ TEST_P(validation_test, qword_low_power_no_depctrl)
       clear_instructions(p);
    }
 }
+
+TEST_P(validation_test, gen11_no_byte_src_1_2)
+{
+   static const struct {
+      enum opcode opcode;
+      unsigned access_mode;
+
+      enum brw_reg_type dst_type;
+      struct {
+         enum brw_reg_type type;
+         unsigned vstride;
+         unsigned width;
+         unsigned hstride;
+      } srcs[3];
+
+      int  gen;
+      bool expected_result;
+   } inst[] = {
+#define INST(opcode, access_mode, dst_type,                             \
+             src0_type, src0_vstride, src0_width, src0_hstride,         \
+             src1_type, src1_vstride, src1_width, src1_hstride,         \
+             src2_type,                                                 \
+             gen, expected_result)                                      \
+      {                                                                 \
+         BRW_OPCODE_##opcode,                                           \
+         BRW_ALIGN_##access_mode,                                       \
+         BRW_REGISTER_TYPE_##dst_type,                                  \
+         {                                                              \
+            {                                                           \
+               BRW_REGISTER_TYPE_##src0_type,                           \
+               BRW_VERTICAL_STRIDE_##src0_vstride,                      \
+               BRW_WIDTH_##src0_width,                                  \
+               BRW_HORIZONTAL_STRIDE_##src0_hstride,                    \
+            },                                                          \
+            {                                                           \
+               BRW_REGISTER_TYPE_##src1_type,                           \
+               BRW_VERTICAL_STRIDE_##src1_vstride,                      \
+               BRW_WIDTH_##src1_width,                                  \
+               BRW_HORIZONTAL_STRIDE_##src1_hstride,                    \
+            },                                                          \
+            {                                                           \
+               BRW_REGISTER_TYPE_##src2_type,                           \
+            },                                                          \
+         },                                                             \
+         gen,                                                           \
+         expected_result,                                               \
+      }
+
+      /* Passes on < 11 */
+      INST(MOV, 16,  F, B, 2, 4, 0, UD, 0, 4, 0,  D,  8, true ),
+      INST(ADD, 16, UD, F, 0, 4, 0, UB, 0, 1, 0,  D,  7, true ),
+      INST(MAD, 16,  D, B, 0, 4, 0, UB, 0, 1, 0,  B, 10, true ),
+
+      /* Fails on 11+ */
+      INST(MAD,  1, UB, W, 1, 1, 0,  D, 0, 4, 0,  B, 11, false ),
+      INST(MAD,  1, UB, W, 1, 1, 1, UB, 1, 1, 0,  W, 11, false ),
+      INST(ADD,  1,  W, W, 1, 4, 1,  B, 1, 1, 0,  D, 11, false ),
+
+      /* Passes on 11+ */
+      INST(MOV,  1,  W, B, 8, 8, 1,  D, 8, 8, 1,  D, 11, true ),
+      INST(ADD,  1, UD, B, 8, 8, 1,  W, 8, 8, 1,  D, 11, true ),
+      INST(MAD,  1,  B, B, 0, 1, 0,  D, 0, 4, 0,  W, 11, true ),
+
+#undef INST
+   };
+
+
+   for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+      /* Skip instruction not meant for this gen. */
+      if (devinfo.gen != inst[i].gen)
+         continue;
+
+      brw_push_insn_state(p);
+
+      brw_set_default_exec_size(p, BRW_EXECUTE_8);
+      brw_set_default_access_mode(p, inst[i].access_mode);
+
+      switch (inst[i].opcode) {
+      case BRW_OPCODE_MOV:
+         brw_MOV(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].srcs[0].type));
+         brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+         brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+         break;
+      case BRW_OPCODE_ADD:
+         brw_ADD(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].srcs[0].type),
+                    retype(g0, inst[i].srcs[1].type));
+         brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+         brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
+         brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+         brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].srcs[1].vstride);
+         brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
+         brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].srcs[1].hstride);
+         break;
+      case BRW_OPCODE_MAD:
+         brw_MAD(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].srcs[0].type),
+                    retype(g0, inst[i].srcs[1].type),
+                    retype(g0, inst[i].srcs[2].type));
+         brw_inst_set_3src_a1_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+         brw_inst_set_3src_a1_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+         brw_inst_set_3src_a1_src1_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+         brw_inst_set_3src_a1_src1_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+         break;
+      default:
+         unreachable("invalid opcode");
+      }
+
+      brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
+
+      brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
+      brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
+
+      brw_pop_insn_state(p);
+
+      EXPECT_EQ(inst[i].expected_result, validate(p));
+
+      clear_instructions(p);
+   }
+}
diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index acfed5119ba..c1b5178ce8e 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -717,7 +717,7 @@ isl_surf_choose_dim_layout(const struct isl_device *dev,
 
 /**
  * Calculate the physical extent of the surface's first level, in units of
- * surface samples. The result is aligned to the format's compression block.
+ * surface samples.
  */
 static void
 isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
@@ -746,8 +746,8 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
       case ISL_DIM_LAYOUT_GEN4_2D:
       case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
          *phys_level0_sa = (struct isl_extent4d) {
-            .w = isl_align_npot(info->width, fmtl->bw),
-            .h = fmtl->bh,
+            .w = info->width,
+            .h = 1,
             .d = 1,
             .a = info->array_len,
          };
@@ -771,8 +771,8 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
          assert(info->samples == 1);
 
          *phys_level0_sa = (struct isl_extent4d) {
-            .w = isl_align_npot(info->width, fmtl->bw),
-            .h = isl_align_npot(info->height, fmtl->bh),
+            .w = info->width,
+            .h = info->height,
             .d = 1,
             .a = info->array_len,
          };
@@ -807,9 +807,6 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
          isl_msaa_interleaved_scale_px_to_sa(info->samples,
                                              &phys_level0_sa->w,
                                              &phys_level0_sa->h);
-
-         phys_level0_sa->w = isl_align(phys_level0_sa->w, fmtl->bw);
-         phys_level0_sa->h = isl_align(phys_level0_sa->h, fmtl->bh);
          break;
       }
       break;
@@ -832,8 +829,8 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
          assert(ISL_DEV_GEN(dev) >= 9);
 
          *phys_level0_sa = (struct isl_extent4d) {
-            .w = isl_align_npot(info->width, fmtl->bw),
-            .h = isl_align_npot(info->height, fmtl->bh),
+            .w = info->width,
+            .h = info->height,
             .d = 1,
             .a = info->depth,
          };
@@ -842,8 +839,8 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
       case ISL_DIM_LAYOUT_GEN4_3D:
          assert(ISL_DEV_GEN(dev) < 9);
          *phys_level0_sa = (struct isl_extent4d) {
-            .w = isl_align(info->width, fmtl->bw),
-            .h = isl_align(info->height, fmtl->bh),
+            .w = info->width,
+            .h = info->height,
             .d = info->depth,
             .a = 1,
          };
@@ -968,13 +965,10 @@ isl_calc_phys_slice0_extent_sa_gen4_2d(
       const struct isl_extent4d *phys_level0_sa,
       struct isl_extent2d *phys_slice0_sa)
 {
-   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
-
    assert(phys_level0_sa->depth == 1);
 
    if (info->levels == 1) {
-      /* Do not pad the surface to the image alignment. Instead, pad it only
-       * to the pixel format's block alignment.
+      /* Do not pad the surface to the image alignment.
        *
        * For tiled surfaces, using a reduced alignment here avoids wasting CPU
        * cycles on the below mipmap layout caluclations. Reducing the
@@ -989,8 +983,8 @@ isl_calc_phys_slice0_extent_sa_gen4_2d(
        * VkBufferImageCopy::bufferRowLength.
        */
       *phys_slice0_sa = (struct isl_extent2d) {
-         .w = isl_align_npot(phys_level0_sa->w, fmtl->bw),
-         .h = isl_align_npot(phys_level0_sa->h, fmtl->bh),
+         .w = phys_level0_sa->w,
+         .h = phys_level0_sa->h,
       };
       return;
    }
@@ -1055,9 +1049,9 @@ isl_calc_phys_total_extent_el_gen4_2d(
                                            array_pitch_span,
                                            &phys_slice0_sa);
    *total_extent_el = (struct isl_extent2d) {
-      .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw),
+      .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
       .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
-           isl_assert_div(phys_slice0_sa.h, fmtl->bh),
+           isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
    };
 }
 
@@ -1201,7 +1195,7 @@ isl_calc_phys_total_extent_el_gen9_1d(
 {
    MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
 
-   assert(phys_level0_sa->height / fmtl->bh == 1);
+   assert(phys_level0_sa->height == 1);
    assert(phys_level0_sa->depth == 1);
    assert(info->samples == 1);
    assert(image_align_sa->w >= fmtl->bw);
@@ -1478,8 +1472,6 @@ isl_surf_init_s(const struct isl_device *dev,
    struct isl_extent4d phys_level0_sa;
    isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout,
                                   &phys_level0_sa);
-   assert(phys_level0_sa.w % fmtl->bw == 0);
-   assert(phys_level0_sa.h % fmtl->bh == 0);
 
    enum isl_array_pitch_span array_pitch_span =
       isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa);
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index 6790ba002ad..ae21fef3b35 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -1176,7 +1176,7 @@ struct isl_surf {
 
    /**
     * Physical extent of the surface's base level, in units of physical
-    * surface samples and aligned to the format's compression block.
+    * surface samples.
     *
     * Consider isl_dim_layout as an operator that transforms a logical surface
     * layout to a physical surface layout. Then
@@ -1885,6 +1885,34 @@ isl_surf_get_image_alignment_sa(const struct isl_surf *surf)
                        fmtl->bd * surf->image_alignment_el.d);
 }
 
+/**
+ * Logical extent of level 0 in units of surface elements.
+ */
+static inline struct isl_extent4d
+isl_surf_get_logical_level0_el(const struct isl_surf *surf)
+{
+   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
+
+   return isl_extent4d(DIV_ROUND_UP(surf->logical_level0_px.w, fmtl->bw),
+                       DIV_ROUND_UP(surf->logical_level0_px.h, fmtl->bh),
+                       DIV_ROUND_UP(surf->logical_level0_px.d, fmtl->bd),
+                       surf->logical_level0_px.a);
+}
+
+/**
+ * Physical extent of level 0 in units of surface elements.
+ */
+static inline struct isl_extent4d
+isl_surf_get_phys_level0_el(const struct isl_surf *surf)
+{
+   const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
+
+   return isl_extent4d(DIV_ROUND_UP(surf->phys_level0_sa.w, fmtl->bw),
+                       DIV_ROUND_UP(surf->phys_level0_sa.h, fmtl->bh),
+                       DIV_ROUND_UP(surf->phys_level0_sa.d, fmtl->bd),
+                       surf->phys_level0_sa.a);
+}
+
 /**
  * Pitch between vertically adjacent surface elements, in bytes.
  */
diff --git a/src/intel/isl/isl_format.c b/src/intel/isl/isl_format.c
index b429e7779a4..9847f231923 100644
--- a/src/intel/isl/isl_format.c
+++ b/src/intel/isl/isl_format.c
@@ -294,7 +294,11 @@ static const struct surface_format_info format_info[] = {
    SF( 70,  70,   x,   x,   x,   x,   x,   x,   x,   x,   x,   x,   BC7_UNORM_SRGB)
    SF( 70,  70,   x,   x,   x,   x,   x,   x,   x,   x,   x,   x,   BC6H_UF16)
    SF(  x,   x,   x,   x,   x,   x,   x,   x,   x,   x,   x,   x,   PLANAR_420_8)
-   SF( 75,  75,   x,   x,   x,   x,   x,   x,   x,   x,   x,   x,   R8G8B8_UNORM_SRGB)
+   /* The format enum for R8G8B8_UNORM_SRGB first shows up in the HSW PRM but
+    * empirical testing indicates that it doesn't actually sRGB decode and
+    * acts identical to R8G8B8_UNORM.  It does work on gen8+.
+    */
+   SF( 80,  80,   x,   x,   x,   x,   x,   x,   x,   x,   x,   x,   R8G8B8_UNORM_SRGB)
    SF( 80,  80,   x,   x,   x,   x,   x,   x,   x,   x,   x,   x,   ETC1_RGB8)
    SF( 80,  80,   x,   x,   x,   x,   x,   x,   x,   x,   x,   x,   ETC2_RGB8)
    SF( 80,  80,   x,   x,   x,   x,   x,   x,   x,   x,   x,   x,   EAC_R11)
diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c
index 66497e457d9..648ec460afc 100644
--- a/src/intel/perf/gen_perf.c
+++ b/src/intel/perf/gen_perf.c
@@ -212,22 +212,10 @@ enumerate_sysfs_metrics(struct gen_perf *perf)
 static bool
 kernel_has_dynamic_config_support(struct gen_perf *perf, int fd)
 {
-   hash_table_foreach(perf->oa_metrics_table, entry) {
-      struct gen_perf_query_info *query = entry->data;
-      char config_path[280];
-      uint64_t config_id;
+   uint64_t invalid_config_id = UINT64_MAX;
 
-      snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id",
-               perf->sysfs_dev_dir, query->guid);
-
-      /* Look for the test config, which we know we can't replace. */
-      if (read_file_uint64(config_path, &config_id) && config_id == 1) {
-         return perf->ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG,
-                            &config_id) < 0 && errno == ENOENT;
-      }
-   }
-
-   return false;
+   return perf->ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG,
+                      &invalid_config_id) < 0 && errno == ENOENT;
 }
 
 bool
@@ -298,8 +286,7 @@ compute_topology_builtins(struct gen_perf *perf,
    for (int i = 0; i < sizeof(devinfo->eu_masks); i++)
       perf->sys_vars.n_eus += __builtin_popcount(devinfo->eu_masks[i]);
 
-   perf->sys_vars.eu_threads_count =
-      perf->sys_vars.n_eus * devinfo->num_thread_per_eu;
+   perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu;
 
    /* The subslice mask builtin contains bits for all slices. Prior to Gen11
     * it had groups of 3bits for each slice, on Gen11 it's 8bits for each
diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h
index 64db89ed6aa..8a09c94e760 100644
--- a/src/intel/perf/gen_perf.h
+++ b/src/intel/perf/gen_perf.h
@@ -28,7 +28,11 @@
 #include <stdint.h>
 #include <string.h>
 
+#if defined(MAJOR_IN_SYSMACROS)
 #include <sys/sysmacros.h>
+#elif defined(MAJOR_IN_MKDEV)
+#include <sys/mkdev.h>
+#endif
 
 #include "util/hash_table.h"
 #include "util/ralloc.h"
@@ -128,6 +132,7 @@ struct gen_perf_query_info {
       GEN_PERF_QUERY_TYPE_OA,
       GEN_PERF_QUERY_TYPE_RAW,
       GEN_PERF_QUERY_TYPE_PIPELINE,
+      GEN_PERF_QUERY_TYPE_NULL,
    } kind;
    const char *name;
    const char *guid;
diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c
index 23112cadc36..74a892c358d 100644
--- a/src/intel/tools/intel_dump_gpu.c
+++ b/src/intel/tools/intel_dump_gpu.c
@@ -567,7 +567,9 @@ ioctl_init_helper(int fd, unsigned long request, ...)
 static void __attribute__ ((destructor))
 fini(void)
 {
-   free(output_filename);
-   aub_file_finish(&aub_file);
-   free(bos);
+   if (devinfo.gen != 0) {
+      free(output_filename);
+      aub_file_finish(&aub_file);
+      free(bos);
+   }
 }
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index 109f5f71bc4..48d41891cfb 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -165,7 +165,7 @@ anv_state_table_init(struct anv_state_table *table,
       goto fail_fd;
    }
 
-   if (!u_vector_init(&table->mmap_cleanups,
+   if (!u_vector_init(&table->cleanups,
                       round_to_power_of_two(sizeof(struct anv_state_table_cleanup)),
                       128)) {
       result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
@@ -179,12 +179,12 @@ anv_state_table_init(struct anv_state_table *table,
    uint32_t initial_size = initial_entries * ANV_STATE_ENTRY_SIZE;
    result = anv_state_table_expand_range(table, initial_size);
    if (result != VK_SUCCESS)
-      goto fail_mmap_cleanups;
+      goto fail_cleanups;
 
    return VK_SUCCESS;
 
- fail_mmap_cleanups:
-   u_vector_finish(&table->mmap_cleanups);
+ fail_cleanups:
+   u_vector_finish(&table->cleanups);
  fail_fd:
    close(table->fd);
 
@@ -195,7 +195,7 @@ static VkResult
 anv_state_table_expand_range(struct anv_state_table *table, uint32_t size)
 {
    void *map;
-   struct anv_mmap_cleanup *cleanup;
+   struct anv_state_table_cleanup *cleanup;
 
    /* Assert that we only ever grow the pool */
    assert(size >= table->state.end);
@@ -204,11 +204,11 @@ anv_state_table_expand_range(struct anv_state_table *table, uint32_t size)
    if (size > BLOCK_POOL_MEMFD_SIZE)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   cleanup = u_vector_add(&table->mmap_cleanups);
+   cleanup = u_vector_add(&table->cleanups);
    if (!cleanup)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   *cleanup = ANV_MMAP_CLEANUP_INIT;
+   *cleanup = ANV_STATE_TABLE_CLEANUP_INIT;
 
    /* Just leak the old map until we destroy the pool.  We can't munmap it
     * without races or imposing locking on the block allocate fast path. On
@@ -272,12 +272,12 @@ anv_state_table_finish(struct anv_state_table *table)
 {
    struct anv_state_table_cleanup *cleanup;
 
-   u_vector_foreach(cleanup, &table->mmap_cleanups) {
+   u_vector_foreach(cleanup, &table->cleanups) {
       if (cleanup->map)
          munmap(cleanup->map, cleanup->size);
    }
 
-   u_vector_finish(&table->mmap_cleanups);
+   u_vector_finish(&table->cleanups);
 
    close(table->fd);
 }
@@ -478,6 +478,11 @@ anv_block_pool_init(struct anv_block_pool *pool,
    if (result != VK_SUCCESS)
       goto fail_mmap_cleanups;
 
+   /* Make the entire pool available in the front of the pool.  If back
+    * allocation needs to use this space, the "ends" will be re-arranged.
+    */
+   pool->state.end = pool->size;
+
    return VK_SUCCESS;
 
  fail_mmap_cleanups:
diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c
index aee11ccacd2..8c785323d36 100644
--- a/src/intel/vulkan/anv_android.c
+++ b/src/intel/vulkan/anv_android.c
@@ -126,7 +126,7 @@ get_ahw_buffer_format_properties(
    /* Fill properties fields based on description. */
    VkAndroidHardwareBufferFormatPropertiesANDROID *p = pProperties;
 
-   p->format = vk_format_from_android(desc.format);
+   p->format = vk_format_from_android(desc.format, desc.usage);
 
    const struct anv_format *anv_format = anv_get_format(p->format);
    p->externalFormat = (uint64_t) (uintptr_t) anv_format;
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 0d3d3f948e6..96ee66f0655 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1075,11 +1075,11 @@ clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
 {
    static const union isl_color_value color_value = { .u32 = { 0, } };
    const struct anv_subpass *subpass = cmd_buffer->state.subpass;
-   const uint32_t att_idx = subpass->depth_stencil_attachment->attachment;
-
-   if (att_idx == VK_ATTACHMENT_UNUSED)
+   if (!subpass->depth_stencil_attachment)
       return;
 
+   const uint32_t att_idx = subpass->depth_stencil_attachment->attachment;
+   assert(att_idx != VK_ATTACHMENT_UNUSED);
    struct anv_render_pass_attachment *pass_att =
       &cmd_buffer->state.pass->attachments[att_idx];
 
diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c
index 348764c61f5..b0ce00f6daf 100644
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -146,9 +146,6 @@ anv_cmd_state_finish(struct anv_cmd_buffer *cmd_buffer)
    anv_cmd_pipeline_state_finish(cmd_buffer, &state->gfx.base);
    anv_cmd_pipeline_state_finish(cmd_buffer, &state->compute.base);
 
-   for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++)
-      vk_free(&cmd_buffer->pool->alloc, state->push_constants[i]);
-
    vk_free(&cmd_buffer->pool->alloc, state->attachments);
 }
 
@@ -159,47 +156,6 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
    anv_cmd_state_init(cmd_buffer);
 }
 
-/**
- * This function updates the size of the push constant buffer we need to emit.
- * This is called in various parts of the driver to ensure that different
- * pieces of push constant data get emitted as needed. However, it is important
- * that we never shrink the size of the buffer. For example, a compute shader
- * dispatch will always call this for the base group id, which has an
- * offset in the push constant buffer that is smaller than the offset for
- * storage image data. If the compute shader has storage images, we will call
- * this again with a larger size during binding table emission. However,
- * if we dispatch the compute shader again without dirtying our descriptors,
- * we would still call this function with a smaller size for the base group
- * id, and not for the images, which would incorrectly shrink the size of the
- * push constant data we emit with that dispatch, making us drop the image data.
- */
-VkResult
-anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer,
-                                          gl_shader_stage stage, uint32_t size)
-{
-   struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage];
-
-   if (*ptr == NULL) {
-      *ptr = vk_alloc(&cmd_buffer->pool->alloc, size, 8,
-                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-      if (*ptr == NULL) {
-         anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
-         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-      }
-      (*ptr)->size = size;
-   } else if ((*ptr)->size < size) {
-      *ptr = vk_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8,
-                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-      if (*ptr == NULL) {
-         anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
-         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-      }
-      (*ptr)->size = size;
-   }
-
-   return VK_SUCCESS;
-}
-
 static VkResult anv_create_cmd_buffer(
     struct anv_device *                         device,
     struct anv_cmd_pool *                       pool,
@@ -766,7 +722,7 @@ anv_push_constant_value(const struct anv_cmd_pipeline_state *state,
    } else if (ANV_PARAM_IS_PUSH(param)) {
       uint32_t offset = ANV_PARAM_PUSH_OFFSET(param);
       assert(offset % sizeof(uint32_t) == 0);
-      if (offset < data->size)
+      if (offset < sizeof(data->client_data))
          return *(uint32_t *)((uint8_t *)data + offset);
       else
          return 0;
@@ -792,12 +748,12 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
       return (struct anv_state) { .offset = 0 };
 
    struct anv_push_constants *data =
-      cmd_buffer->state.push_constants[stage];
+      &cmd_buffer->state.push_constants[stage];
    const struct brw_stage_prog_data *prog_data =
       pipeline->shaders[stage]->prog_data;
 
    /* If we don't actually have any push constants, bail. */
-   if (prog_data == NULL || prog_data->nr_params == 0 || data == NULL)
+   if (prog_data == NULL || prog_data->nr_params == 0)
       return (struct anv_state) { .offset = 0 };
 
    struct anv_state state =
@@ -820,7 +776,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_cmd_pipeline_state *pipeline_state = &cmd_buffer->state.compute.base;
    struct anv_push_constants *data =
-      cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
+      &cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
    struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
    const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
@@ -882,13 +838,7 @@ void anv_CmdPushConstants(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
    anv_foreach_stage(stage, stageFlags) {
-      VkResult result =
-         anv_cmd_buffer_ensure_push_constant_field(cmd_buffer,
-                                                   stage, client_data);
-      if (result != VK_SUCCESS)
-         return;
-
-      memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset,
+      memcpy(cmd_buffer->state.push_constants[stage].client_data + offset,
              pValues, size);
    }
 
diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c
index dc33cc6d9a8..f4e00667334 100644
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -103,6 +103,16 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
         type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC))
       data |= ANV_DESCRIPTOR_ADDRESS_RANGE;
 
+   /* On Ivy Bridge and Bay Trail, we need swizzles textures in the shader
+    * Do not handle VK_DESCRIPTOR_TYPE_STORAGE_IMAGE and
+    * VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT because they already must
+    * have identity swizzle.
+    */
+   if (device->info.gen == 7 && !device->info.is_haswell &&
+       (type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
+        type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER))
+      data |= ANV_DESCRIPTOR_TEXTURE_SWIZZLE;
+
    return data;
 }
 
@@ -123,6 +133,9 @@ anv_descriptor_data_size(enum anv_descriptor_data data)
    if (data & ANV_DESCRIPTOR_ADDRESS_RANGE)
       size += sizeof(struct anv_address_range_descriptor);
 
+   if (data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE)
+      size += sizeof(struct anv_texture_swizzle_descriptor);
+
    return size;
 }
 
@@ -1184,6 +1197,26 @@ anv_descriptor_set_write_image_view(struct anv_device *device,
 
       anv_descriptor_set_write_image_param(desc_map, image_param);
    }
+
+   if (image_view && (bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE)) {
+      assert(!(bind_layout->data & ANV_DESCRIPTOR_SAMPLED_IMAGE));
+      assert(image_view);
+      struct anv_texture_swizzle_descriptor desc_data[3];
+      memset(desc_data, 0, sizeof(desc_data));
+
+      for (unsigned p = 0; p < image_view->n_planes; p++) {
+         desc_data[p] = (struct anv_texture_swizzle_descriptor) {
+            .swizzle = {
+               (uint8_t)image_view->planes[p].isl.swizzle.r,
+               (uint8_t)image_view->planes[p].isl.swizzle.g,
+               (uint8_t)image_view->planes[p].isl.swizzle.b,
+               (uint8_t)image_view->planes[p].isl.swizzle.a,
+            },
+         };
+      }
+      memcpy(desc_map, desc_data,
+             MAX2(1, bind_layout->max_plane_count) * sizeof(desc_data[0]));
+   }
 }
 
 void
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index d74116bd9c9..ab8dee46a86 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1170,6 +1170,11 @@ void anv_GetPhysicalDeviceFeatures2(
    }
 }
 
+#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS   64
+
+#define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
+#define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
+
 void anv_GetPhysicalDeviceProperties(
     VkPhysicalDevice                            physicalDevice,
     VkPhysicalDeviceProperties*                 pProperties)
@@ -1215,20 +1220,20 @@ void anv_GetPhysicalDeviceProperties(
       .sparseAddressSpaceSize                   = 0,
       .maxBoundDescriptorSets                   = MAX_SETS,
       .maxPerStageDescriptorSamplers            = max_samplers,
-      .maxPerStageDescriptorUniformBuffers      = 64,
+      .maxPerStageDescriptorUniformBuffers      = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
       .maxPerStageDescriptorStorageBuffers      = max_ssbos,
       .maxPerStageDescriptorSampledImages       = max_textures,
       .maxPerStageDescriptorStorageImages       = max_images,
-      .maxPerStageDescriptorInputAttachments    = 64,
+      .maxPerStageDescriptorInputAttachments    = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
       .maxPerStageResources                     = max_per_stage,
       .maxDescriptorSetSamplers                 = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
-      .maxDescriptorSetUniformBuffers           = 6 * 64,           /* number of stages * maxPerStageDescriptorUniformBuffers */
+      .maxDescriptorSetUniformBuffers           = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,           /* number of stages * maxPerStageDescriptorUniformBuffers */
       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
       .maxDescriptorSetStorageBuffers           = 6 * max_ssbos,    /* number of stages * maxPerStageDescriptorStorageBuffers */
       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
       .maxDescriptorSetSampledImages            = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
       .maxDescriptorSetStorageImages            = 6 * max_images,   /* number of stages * maxPerStageDescriptorStorageImages */
-      .maxDescriptorSetInputAttachments         = 256,
+      .maxDescriptorSetInputAttachments         = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
       .maxVertexInputAttributes                 = MAX_VBS,
       .maxVertexInputBindings                   = MAX_VBS,
       .maxVertexInputAttributeOffset            = 2047,
@@ -1296,7 +1301,7 @@ void anv_GetPhysicalDeviceProperties(
       .sampledImageStencilSampleCounts          = sample_counts,
       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
       .maxSampleMaskWords                       = 1,
-      .timestampComputeAndGraphics              = false,
+      .timestampComputeAndGraphics              = true,
       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
       .maxClipDistances                         = 8,
       .maxCullDistances                         = 8,
@@ -1393,20 +1398,20 @@ void anv_GetPhysicalDeviceProperties2(
          props->robustBufferAccessUpdateAfterBind = true;
          props->quadDivergentImplicitLod = false;
          props->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views;
-         props->maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0;
+         props->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
          props->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
          props->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views;
          props->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views;
-         props->maxPerStageDescriptorUpdateAfterBindInputAttachments = 0;
+         props->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
          props->maxPerStageUpdateAfterBindResources = UINT32_MAX;
          props->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views;
-         props->maxDescriptorSetUpdateAfterBindUniformBuffers = 0;
-         props->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0;
+         props->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
+         props->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
          props->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX;
          props->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
          props->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views;
          props->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views;
-         props->maxDescriptorSetUpdateAfterBindInputAttachments = 0;
+         props->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
          break;
       }
 
@@ -2146,16 +2151,18 @@ VkResult anv_CreateDevice(
    if (!device)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   const unsigned decode_flags =
-      GEN_BATCH_DECODE_FULL |
-      ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) |
-      GEN_BATCH_DECODE_OFFSETS |
-      GEN_BATCH_DECODE_FLOATS;
+   if (INTEL_DEBUG & DEBUG_BATCH) {
+      const unsigned decode_flags =
+         GEN_BATCH_DECODE_FULL |
+         ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) |
+         GEN_BATCH_DECODE_OFFSETS |
+         GEN_BATCH_DECODE_FLOATS;
 
-   gen_batch_decode_ctx_init(&device->decoder_ctx,
-                             &physical_device->info,
-                             stderr, decode_flags, NULL,
-                             decode_get_bo, NULL, device);
+      gen_batch_decode_ctx_init(&device->decoder_ctx,
+                                &physical_device->info,
+                                stderr, decode_flags, NULL,
+                                decode_get_bo, NULL, device);
+   }
 
    device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
    device->instance = physical_device->instance;
@@ -2438,7 +2445,8 @@ void anv_DestroyDevice(
 
    anv_gem_destroy_context(device, device->context_id);
 
-   gen_batch_decode_ctx_finish(&device->decoder_ctx);
+   if (INTEL_DEBUG & DEBUG_BATCH)
+      gen_batch_decode_ctx_finish(&device->decoder_ctx);
 
    close(device->fd);
 
@@ -2995,6 +3003,9 @@ void anv_FreeMemory(
    if (mem->map)
       anv_UnmapMemory(_device, _mem);
 
+   p_atomic_add(&pdevice->memory.heaps[mem->type->heapIndex].used,
+                -mem->bo->size);
+
    anv_bo_cache_release(device, &device->bo_cache, mem->bo);
 
 #if defined(ANDROID) && ANDROID_API_LEVEL >= 26
@@ -3002,9 +3013,6 @@ void anv_FreeMemory(
       AHardwareBuffer_release(mem->ahw);
 #endif
 
-   p_atomic_add(&pdevice->memory.heaps[mem->type->heapIndex].used,
-                -mem->bo->size);
-
    vk_free2(&device->alloc, pAllocator, mem);
 }
 
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
index 962ebdbc58d..1f103e86364 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -67,18 +67,18 @@ def __init__(self, version, enable):
 # the those extension strings, then tests dEQP-VK.api.info.instance.extensions
 # and dEQP-VK.api.info.device fail due to the duplicated strings.
 EXTENSIONS = [
-    Extension('VK_KHR_8bit_storage',                      1, 'device->info.gen >= 8'),
+    Extension('VK_KHR_8bit_storage',                      1, 'device->info.gen >= 8 && !ANDROID'),
     Extension('VK_KHR_16bit_storage',                     1, 'device->info.gen >= 8'),
     Extension('VK_KHR_bind_memory2',                      1, True),
-    Extension('VK_KHR_create_renderpass2',                1, True),
+    Extension('VK_KHR_create_renderpass2',                1, '!ANDROID'),
     Extension('VK_KHR_dedicated_allocation',              1, True),
-    Extension('VK_KHR_depth_stencil_resolve',             1, True),
+    Extension('VK_KHR_depth_stencil_resolve',             1, '!ANDROID'),
     Extension('VK_KHR_descriptor_update_template',        1, True),
     Extension('VK_KHR_device_group',                      1, True),
     Extension('VK_KHR_device_group_creation',             1, True),
     Extension('VK_KHR_display',                          23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_KHR_draw_indirect_count',               1, True),
-    Extension('VK_KHR_driver_properties',                 1, True),
+    Extension('VK_KHR_driver_properties',                 1, '!ANDROID'),
     Extension('VK_KHR_external_fence',                    1,
               'device->has_syncobj_wait'),
     Extension('VK_KHR_external_fence_capabilities',       1, True),
@@ -105,9 +105,9 @@ def __init__(self, version, enable):
     Extension('VK_KHR_sampler_mirror_clamp_to_edge',      1, True),
     Extension('VK_KHR_sampler_ycbcr_conversion',          1, True),
     Extension('VK_KHR_shader_atomic_int64',               1,
-              'device->info.gen >= 9 && device->use_softpin'),
+              'device->info.gen >= 9 && device->use_softpin && !ANDROID'),
     Extension('VK_KHR_shader_draw_parameters',            1, True),
-    Extension('VK_KHR_shader_float16_int8',               1, 'device->info.gen >= 8'),
+    Extension('VK_KHR_shader_float16_int8',               1, 'device->info.gen >= 8 && !ANDROID'),
     Extension('VK_KHR_storage_buffer_storage_class',      1, True),
     Extension('VK_KHR_surface',                          25, 'ANV_HAS_SURFACE'),
     Extension('VK_KHR_surface_protected_capabilities',    1, 'ANV_HAS_SURFACE'),
@@ -142,13 +142,13 @@ def __init__(self, version, enable):
     Extension('VK_EXT_scalar_block_layout',               1, True),
     Extension('VK_EXT_shader_stencil_export',             1, 'device->info.gen >= 9'),
     Extension('VK_EXT_shader_viewport_index_layer',       1, True),
-    Extension('VK_EXT_transform_feedback',                1, True),
+    Extension('VK_EXT_transform_feedback',                1, 'device->info.gen >= 8'),
     Extension('VK_EXT_vertex_attribute_divisor',          3, True),
     Extension('VK_EXT_ycbcr_image_arrays',                1, True),
     Extension('VK_ANDROID_external_memory_android_hardware_buffer', 3, 'ANDROID'),
     Extension('VK_ANDROID_native_buffer',                 5, 'ANDROID'),
-    Extension('VK_GOOGLE_decorate_string',                1, True),
-    Extension('VK_GOOGLE_hlsl_functionality1',            1, True),
+    Extension('VK_GOOGLE_decorate_string',                1, '!ANDROID'),
+    Extension('VK_GOOGLE_hlsl_functionality1',            1, '!ANDROID'),
     Extension('VK_NV_compute_shader_derivatives',         1, True),
 ]
 
diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c
index 4e15e626eed..5be02b3e86e 100644
--- a/src/intel/vulkan/anv_formats.c
+++ b/src/intel/vulkan/anv_formats.c
@@ -69,6 +69,7 @@
            .aspect = VK_IMAGE_ASPECT_DEPTH_BIT, \
          }, \
       }, \
+      .vk_format = __vk_fmt, \
       .n_planes = 1, \
    }
 
@@ -80,6 +81,7 @@
            .aspect = VK_IMAGE_ASPECT_STENCIL_BIT, \
          }, \
       }, \
+      .vk_format = __vk_fmt, \
       .n_planes = 1, \
    }
 
@@ -465,6 +467,14 @@ anv_get_format_plane(const struct gen_device_info *devinfo, VkFormat vk_format,
    const struct isl_format_layout *isl_layout =
       isl_format_get_layout(plane_format.isl_format);
 
+   /* On Ivy Bridge we don't even have enough 24 and 48-bit formats that we
+    * can reliably do texture upload with BLORP so just don't claim support
+    * for any of them.
+    */
+   if (devinfo->gen == 7 && !devinfo->is_haswell &&
+       (isl_layout->bpb == 24 || isl_layout->bpb == 48))
+      return unsupported;
+
    if (tiling == VK_IMAGE_TILING_OPTIMAL &&
        !util_is_power_of_two_or_zero(isl_layout->bpb)) {
       /* Tiled formats *must* be power-of-two because we need up upload
@@ -798,6 +808,7 @@ anv_get_image_format_properties(
    if (format == NULL)
       goto unsupported;
 
+   assert(format->vk_format == info->format);
    format_feature_flags = anv_get_image_format_features(devinfo, info->format,
                                                         format, info->tiling);
 
@@ -977,6 +988,13 @@ static const VkExternalMemoryProperties prime_fd_props = {
       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
 };
 
+static const VkExternalMemoryProperties userptr_props = {
+   .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT,
+   .exportFromImportedHandleTypes = 0,
+   .compatibleHandleTypes =
+      VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+};
+
 static const VkExternalMemoryProperties android_buffer_props = {
    .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
                              VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT,
@@ -1069,6 +1087,10 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties2(
          if (external_props)
             external_props->externalMemoryProperties = prime_fd_props;
          break;
+      case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
+         if (external_props)
+            external_props->externalMemoryProperties = userptr_props;
+         break;
       case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID:
          if (ahw_supported && external_props) {
             external_props->externalMemoryProperties = android_image_props;
@@ -1159,6 +1181,9 @@ void anv_GetPhysicalDeviceExternalBufferProperties(
    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
       pExternalBufferProperties->externalMemoryProperties = prime_fd_props;
       return;
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
+      pExternalBufferProperties->externalMemoryProperties = userptr_props;
+      return;
    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID:
       if (physical_device->supported_extensions.ANDROID_external_memory_android_hardware_buffer) {
          pExternalBufferProperties->externalMemoryProperties = android_buffer_props;
@@ -1170,8 +1195,14 @@ void anv_GetPhysicalDeviceExternalBufferProperties(
    }
 
  unsupported:
+   /* From the Vulkan 1.1.113 spec:
+    *
+    *    compatibleHandleTypes must include at least handleType.
+    */
    pExternalBufferProperties->externalMemoryProperties =
-      (VkExternalMemoryProperties) {0};
+      (VkExternalMemoryProperties) {
+         .compatibleHandleTypes = pExternalBufferInfo->handleType,
+      };
 }
 
 VkResult anv_CreateSamplerYcbcrConversion(
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 3841234df14..10885d8451d 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -816,12 +816,12 @@ resolve_ahw_image(struct anv_device *device,
           vk_tiling == VK_IMAGE_TILING_OPTIMAL);
 
    /* Check format. */
-   VkFormat vk_format = vk_format_from_android(desc.format);
+   VkFormat vk_format = vk_format_from_android(desc.format, desc.usage);
    enum isl_format isl_fmt = anv_get_isl_format(&device->info,
                                                 vk_format,
                                                 VK_IMAGE_ASPECT_COLOR_BIT,
                                                 vk_tiling);
-   assert(format != ISL_FORMAT_UNSUPPORTED);
+   assert(isl_fmt != ISL_FORMAT_UNSUPPORTED);
 
    /* Handle RGB(X)->RGBA fallback. */
    switch (desc.format) {
@@ -1278,6 +1278,10 @@ anv_image_fill_surface_state(struct anv_device *device,
    if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT)
       view.swizzle = anv_swizzle_for_render(view.swizzle);
 
+   /* On Ivy Bridge and Bay Trail we do the swizzle in the shader */
+   if (device->info.gen == 7 && !device->info.is_haswell)
+      view.swizzle = ISL_SWIZZLE_IDENTITY;
+
    /* If this is a HiZ buffer we can sample from with a programmable clear
     * value (SKL+), define the clear value to the optimal constant.
     */
@@ -1355,13 +1359,10 @@ anv_image_fill_surface_state(struct anv_device *device,
           */
          const struct isl_format_layout *fmtl =
             isl_format_get_layout(surface->isl.format);
+         tmp_surf.logical_level0_px =
+            isl_surf_get_logical_level0_el(&tmp_surf);
+         tmp_surf.phys_level0_sa = isl_surf_get_phys_level0_el(&tmp_surf);
          tmp_surf.format = view.format;
-         tmp_surf.logical_level0_px.width =
-            DIV_ROUND_UP(tmp_surf.logical_level0_px.width, fmtl->bw);
-         tmp_surf.logical_level0_px.height =
-            DIV_ROUND_UP(tmp_surf.logical_level0_px.height, fmtl->bh);
-         tmp_surf.phys_level0_sa.width /= fmtl->bw;
-         tmp_surf.phys_level0_sa.height /= fmtl->bh;
          tile_x_sa /= fmtl->bw;
          tile_y_sa /= fmtl->bh;
 
diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c
index 08bff9585bc..f6b9584b410 100644
--- a/src/intel/vulkan/anv_intel.c
+++ b/src/intel/vulkan/anv_intel.c
@@ -64,7 +64,8 @@ VkResult anv_CreateDmaBufImageINTEL(
          .samples = 1,
          /* FIXME: Need a way to use X tiling to allow scanout */
          .tiling = VK_IMAGE_TILING_OPTIMAL,
-         .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+         .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+                  VK_IMAGE_USAGE_SAMPLED_BIT,
          .flags = 0,
       }},
       pAllocator, &image_h);
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 3d9ba5c3ecd..94ec56252ba 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -725,6 +725,10 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin,
    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
    nir_variable *var = nir_deref_instr_get_variable(deref);
 
+   unsigned set = var->data.descriptor_set;
+   unsigned binding = var->data.binding;
+   unsigned binding_offset = state->set[set].surface_offsets[binding];
+
    nir_builder *b = &state->builder;
    b->cursor = nir_before_instr(&intrin->instr);
 
@@ -742,7 +746,7 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin,
                                intrin->dest.ssa.bit_size, state);
 
       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc));
-   } else if (use_bindless) {
+   } else if (binding_offset > MAX_BINDING_TABLE_SIZE) {
       const bool write_only =
          (var->data.image.access & ACCESS_NON_READABLE) != 0;
       nir_ssa_def *desc =
@@ -750,9 +754,6 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin,
       nir_ssa_def *handle = nir_channel(b, desc, write_only ? 1 : 0);
       nir_rewrite_image_intrinsic(intrin, handle, true);
    } else {
-      unsigned set = var->data.descriptor_set;
-      unsigned binding = var->data.binding;
-      unsigned binding_offset = state->set[set].surface_offsets[binding];
       unsigned array_size =
          state->layout->set[set].layout->binding[binding].array_size;
 
@@ -856,8 +857,21 @@ lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type,
          assert(deref->deref_type == nir_deref_type_array);
 
          if (nir_src_is_const(deref->arr.index)) {
-            unsigned arr_index = nir_src_as_uint(deref->arr.index);
-            *base_index += MIN2(arr_index, array_size - 1);
+            unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1);
+            struct anv_sampler **immutable_samplers =
+               state->layout->set[set].layout->binding[binding].immutable_samplers;
+            if (immutable_samplers) {
+               /* Array of YCbCr samplers are tightly packed in the binding
+                * tables, compute the offset of an element in the array by
+                * adding the number of planes of all preceding elements.
+                */
+               unsigned desc_arr_index = 0;
+               for (int i = 0; i < arr_index; i++)
+                  desc_arr_index += immutable_samplers[i]->n_planes;
+               *base_index += desc_arr_index;
+            } else {
+               *base_index += arr_index;
+            }
          } else {
             /* From VK_KHR_sampler_ycbcr_conversion:
              *
@@ -899,13 +913,100 @@ tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
    return plane;
 }
 
+static nir_ssa_def *
+build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx,
+                       unsigned start, unsigned end)
+{
+   if (start == end - 1) {
+      return srcs[start];
+   } else {
+      unsigned mid = start + (end - start) / 2;
+      return nir_bcsel(b, nir_ilt(b, idx, nir_imm_int(b, mid)),
+                       build_def_array_select(b, srcs, idx, start, mid),
+                       build_def_array_select(b, srcs, idx, mid, end));
+   }
+}
+
 static void
-lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
+lower_gen7_tex_swizzle(nir_tex_instr *tex, unsigned plane,
+                       struct apply_pipeline_layout_state *state)
 {
-   state->builder.cursor = nir_before_instr(&tex->instr);
+   assert(state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell);
+   if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ||
+       nir_tex_instr_is_query(tex) ||
+       tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */
+       (tex->is_shadow && tex->is_new_style_shadow))
+      return;
+
+   int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
+   assert(deref_src_idx >= 0);
+
+   nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   unsigned set = var->data.descriptor_set;
+   unsigned binding = var->data.binding;
+   const struct anv_descriptor_set_binding_layout *bind_layout =
+      &state->layout->set[set].layout->binding[binding];
+
+   if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0)
+      return;
+
+   nir_builder *b = &state->builder;
+   b->cursor = nir_before_instr(&tex->instr);
+
+   const unsigned plane_offset =
+      plane * sizeof(struct anv_texture_swizzle_descriptor);
+   nir_ssa_def *swiz =
+      build_descriptor_load(deref, plane_offset, 1, 32, state);
+
+   b->cursor = nir_after_instr(&tex->instr);
+
+   assert(tex->dest.ssa.bit_size == 32);
+   assert(tex->dest.ssa.num_components == 4);
+
+   /* Initializing to undef is ok; nir_opt_undef will clean it up. */
+   nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
+   nir_ssa_def *comps[8];
+   for (unsigned i = 0; i < ARRAY_SIZE(comps); i++)
+      comps[i] = undef;
+
+   comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0);
+   if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float)
+      comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1);
+   else
+      comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1);
+   comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0);
+   comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1);
+   comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2);
+   comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3);
+
+   nir_ssa_def *swiz_comps[4];
+   for (unsigned i = 0; i < 4; i++) {
+      nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i));
+      swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8);
+   }
+   nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4);
 
+   /* Rewrite uses before we insert so we don't rewrite this use */
+   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa,
+                                  nir_src_for_ssa(swiz_tex_res),
+                                  swiz_tex_res->parent_instr);
+}
+
+static void
+lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
+{
    unsigned plane = tex_instr_get_and_remove_plane_src(tex);
 
+   /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader.  Do this
+    * before we lower the derefs away so we can still find the descriptor.
+    */
+   if (state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell)
+      lower_gen7_tex_swizzle(tex, plane, state);
+
+   state->builder.cursor = nir_before_instr(&tex->instr);
+
    lower_tex_deref(tex, nir_tex_src_texture_deref,
                    &tex->texture_index, plane, state);
 
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index b9c9bfd7598..4012a6d5a71 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -400,12 +400,12 @@ populate_wm_prog_key(const struct gen_device_info *devinfo,
        * harmless to compute it and then let dead-code take care of it.
        */
       if (ms_info->rasterizationSamples > 1) {
-         key->persample_interp =
+         key->persample_interp = ms_info->sampleShadingEnable &&
             (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
          key->multisample_fbo = true;
       }
 
-      key->frag_coord_adds_sample_pos = ms_info->sampleShadingEnable;
+      key->frag_coord_adds_sample_pos = key->persample_interp;
    }
 }
 
@@ -825,14 +825,24 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
          continue;
 
       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
-      /* Unused or out-of-bounds */
-      if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & (1 << rt)))
+      /* Out-of-bounds */
+      if (rt >= MAX_RTS)
          continue;
 
       const unsigned array_len =
          glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
       assert(rt + array_len <= max_rt);
 
+      /* Unused */
+      if (!(stage->key.wm.color_outputs_valid & BITFIELD_RANGE(rt, array_len))) {
+         /* If this is the RT at location 0 and we have alpha to coverage
+          * enabled we will have to create a null RT for it, so mark it as
+          * used.
+          */
+         if (rt > 0 || !stage->key.wm.alpha_to_coverage)
+            continue;
+      }
+
       for (unsigned i = 0; i < array_len; i++)
          rt_used[rt + i] = true;
    }
@@ -843,11 +853,22 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
          continue;
 
       rt_to_bindings[i] = num_rts;
-      rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
-         .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
-         .binding = 0,
-         .index = i,
-      };
+
+      if (stage->key.wm.color_outputs_valid & (1 << i)) {
+         rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
+            .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
+            .binding = 0,
+            .index = i,
+         };
+      } else {
+         /* Setup a null render target */
+         rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
+            .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
+            .binding = 0,
+            .index = UINT32_MAX,
+         };
+      }
+
       num_rts++;
    }
 
@@ -857,9 +878,11 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
          continue;
 
       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
-      if (rt >= MAX_RTS ||
-          !(stage->key.wm.color_outputs_valid & (1 << rt))) {
-         /* Unused or out-of-bounds, throw it away */
+
+      if (rt >= MAX_RTS || !rt_used[rt]) {
+         /* Unused or out-of-bounds, throw it away, unless it is the first
+          * RT and we have alpha to coverage enabled.
+          */
          deleted_output = true;
          var->data.mode = nir_var_function_temp;
          exec_node_remove(&var->node);
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 8727d56d90f..e86561adf97 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -744,7 +744,7 @@ struct anv_state_table {
    struct anv_free_entry *map;
    uint32_t size;
    struct anv_block_state state;
-   struct u_vector mmap_cleanups;
+   struct u_vector cleanups;
 };
 
 struct anv_state_pool {
@@ -1548,6 +1548,17 @@ struct anv_sampled_image_descriptor {
    uint32_t sampler;
 };
 
+struct anv_texture_swizzle_descriptor {
+   /** Texture swizzle
+    *
+    * See also nir_intrinsic_channel_select_intel
+    */
+   uint8_t swizzle[4];
+
+   /** Unused padding to ensure the struct is a multiple of 64 bits */
+   uint32_t _pad;
+};
+
 /** Struct representing a storage image descriptor */
 struct anv_storage_image_descriptor {
    /** Bindless image handles
@@ -1589,6 +1600,8 @@ enum anv_descriptor_data {
    ANV_DESCRIPTOR_SAMPLED_IMAGE  = (1 << 6),
    /** Storage image handles */
    ANV_DESCRIPTOR_STORAGE_IMAGE  = (1 << 7),
+   /** Storage image handles */
+   ANV_DESCRIPTOR_TEXTURE_SWIZZLE  = (1 << 8),
 };
 
 struct anv_descriptor_set_binding_layout {
@@ -2157,12 +2170,6 @@ struct anv_xfb_binding {
 #define ANV_PARAM_DYN_OFFSET_IDX(param)   ((param) & 0xffff)
 
 struct anv_push_constants {
-   /* Current allocated size of this push constants data structure.
-    * Because a decent chunk of it may not be used (images on SKL, for
-    * instance), we won't actually allocate the entire structure up-front.
-    */
-   uint32_t size;
-
    /* Push constant data provided by the client through vkPushConstants */
    uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
 
@@ -2345,7 +2352,7 @@ struct anv_cmd_state {
    bool                                         xfb_enabled;
    struct anv_xfb_binding                       xfb_bindings[MAX_XFB_BUFFERS];
    VkShaderStageFlags                           push_constant_stages;
-   struct anv_push_constants *                  push_constants[MESA_SHADER_STAGES];
+   struct anv_push_constants                    push_constants[MESA_SHADER_STAGES];
    struct anv_state                             binding_tables[MESA_SHADER_STAGES];
    struct anv_state                             samplers[MESA_SHADER_STAGES];
 
@@ -2465,14 +2472,6 @@ VkResult anv_cmd_buffer_execbuf(struct anv_device *device,
 
 VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
 
-VkResult
-anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer,
-                                          gl_shader_stage stage, uint32_t size);
-#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \
-   anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \
-      (offsetof(struct anv_push_constants, field) + \
-       sizeof(cmd_buffer->state.push_constants[0]->field)))
-
 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
                                              const void *data, uint32_t size, uint32_t alignment);
 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
@@ -3201,7 +3200,13 @@ anv_can_sample_with_hiz(const struct gen_device_info * const devinfo,
    if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
       return false;
 
-   if (devinfo->gen < 8)
+   /* Allow this feature on BDW even though it is disabled in the BDW devinfo
+    * struct. There's documentation which suggests that this feature actually
+    * reduces performance on BDW, but it has only been observed to help so
+    * far. Sampling fast-cleared blocks on BDW must also be handled with care
+    * (see depth_stencil_attachment_compute_aux_usage() for more info).
+    */
+   if (devinfo->gen != 8 && !devinfo->has_sample_with_hiz)
       return false;
 
    return image->samples == 1;
diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
index dcefed9e4dc..92b7c6f3ff8 100644
--- a/src/intel/vulkan/anv_queue.c
+++ b/src/intel/vulkan/anv_queue.c
@@ -27,7 +27,6 @@
 
 #include <fcntl.h>
 #include <unistd.h>
-#include <sys/eventfd.h>
 
 #include "anv_private.h"
 #include "vk_util.h"
diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c
index 380283bdd56..115d12b3536 100644
--- a/src/intel/vulkan/gen7_cmd_buffer.c
+++ b/src/intel/vulkan/gen7_cmd_buffer.c
@@ -286,41 +286,3 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
 {
    /* The NP PMA fix doesn't exist on gen7 */
 }
-
-void genX(CmdSetEvent)(
-    VkCommandBuffer                             commandBuffer,
-    VkEvent                                     event,
-    VkPipelineStageFlags                        stageMask)
-{
-   anv_finishme("Implement events on gen7");
-}
-
-void genX(CmdResetEvent)(
-    VkCommandBuffer                             commandBuffer,
-    VkEvent                                     event,
-    VkPipelineStageFlags                        stageMask)
-{
-   anv_finishme("Implement events on gen7");
-}
-
-void genX(CmdWaitEvents)(
-    VkCommandBuffer                             commandBuffer,
-    uint32_t                                    eventCount,
-    const VkEvent*                              pEvents,
-    VkPipelineStageFlags                        srcStageMask,
-    VkPipelineStageFlags                        destStageMask,
-    uint32_t                                    memoryBarrierCount,
-    const VkMemoryBarrier*                      pMemoryBarriers,
-    uint32_t                                    bufferMemoryBarrierCount,
-    const VkBufferMemoryBarrier*                pBufferMemoryBarriers,
-    uint32_t                                    imageMemoryBarrierCount,
-    const VkImageMemoryBarrier*                 pImageMemoryBarriers)
-{
-   anv_finishme("Implement events on gen7");
-
-   genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
-                            false, /* byRegion */
-                            memoryBarrierCount, pMemoryBarriers,
-                            bufferMemoryBarrierCount, pBufferMemoryBarriers,
-                            imageMemoryBarrierCount, pImageMemoryBarriers);
-}
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
index 6568d2c7511..762cc373cb6 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -355,6 +355,8 @@ want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer)
     */
    const bool stc_write_en =
       (ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+      (cmd_buffer->state.gfx.dynamic.stencil_write_mask.front ||
+       cmd_buffer->state.gfx.dynamic.stencil_write_mask.back) &&
       pipeline->writes_stencil;
 
    /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
@@ -565,105 +567,3 @@ void genX(CmdBindIndexBuffer)(
 
    cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
 }
-
-/* Set of stage bits for which are pipelined, i.e. they get queued by the
- * command streamer for later execution.
- */
-#define ANV_PIPELINE_STAGE_PIPELINED_BITS \
-   (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | \
-    VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | \
-    VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | \
-    VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | \
-    VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | \
-    VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | \
-    VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | \
-    VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | \
-    VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | \
-    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | \
-    VK_PIPELINE_STAGE_TRANSFER_BIT | \
-    VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | \
-    VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | \
-    VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)
-
-void genX(CmdSetEvent)(
-    VkCommandBuffer                             commandBuffer,
-    VkEvent                                     _event,
-    VkPipelineStageFlags                        stageMask)
-{
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   ANV_FROM_HANDLE(anv_event, event, _event);
-
-   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
-      if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
-         pc.StallAtPixelScoreboard = true;
-         pc.CommandStreamerStallEnable = true;
-      }
-
-      pc.DestinationAddressType  = DAT_PPGTT,
-      pc.PostSyncOperation       = WriteImmediateData,
-      pc.Address = (struct anv_address) {
-         cmd_buffer->device->dynamic_state_pool.block_pool.bo,
-         event->state.offset
-      };
-      pc.ImmediateData           = VK_EVENT_SET;
-   }
-}
-
-void genX(CmdResetEvent)(
-    VkCommandBuffer                             commandBuffer,
-    VkEvent                                     _event,
-    VkPipelineStageFlags                        stageMask)
-{
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   ANV_FROM_HANDLE(anv_event, event, _event);
-
-   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
-      if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
-         pc.StallAtPixelScoreboard = true;
-         pc.CommandStreamerStallEnable = true;
-      }
-
-      pc.DestinationAddressType  = DAT_PPGTT;
-      pc.PostSyncOperation       = WriteImmediateData;
-      pc.Address = (struct anv_address) {
-         cmd_buffer->device->dynamic_state_pool.block_pool.bo,
-         event->state.offset
-      };
-      pc.ImmediateData           = VK_EVENT_RESET;
-   }
-}
-
-void genX(CmdWaitEvents)(
-    VkCommandBuffer                             commandBuffer,
-    uint32_t                                    eventCount,
-    const VkEvent*                              pEvents,
-    VkPipelineStageFlags                        srcStageMask,
-    VkPipelineStageFlags                        destStageMask,
-    uint32_t                                    memoryBarrierCount,
-    const VkMemoryBarrier*                      pMemoryBarriers,
-    uint32_t                                    bufferMemoryBarrierCount,
-    const VkBufferMemoryBarrier*                pBufferMemoryBarriers,
-    uint32_t                                    imageMemoryBarrierCount,
-    const VkImageMemoryBarrier*                 pImageMemoryBarriers)
-{
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   for (uint32_t i = 0; i < eventCount; i++) {
-      ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
-
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), sem) {
-         sem.WaitMode            = PollingMode,
-         sem.CompareOperation    = COMPARE_SAD_EQUAL_SDD,
-         sem.SemaphoreDataDword  = VK_EVENT_SET,
-         sem.SemaphoreAddress = (struct anv_address) {
-            cmd_buffer->device->dynamic_state_pool.block_pool.bo,
-            event->state.offset
-         };
-      }
-   }
-
-   genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
-                            false, /* byRegion */
-                            memoryBarrierCount, pMemoryBarriers,
-                            bufferMemoryBarrierCount, pBufferMemoryBarriers,
-                            imageMemoryBarrierCount, pImageMemoryBarriers);
-}
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 1af36bced24..dc1117f80ad 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -76,6 +76,8 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
       sba.GeneralStateMOCS = GENX(MOCS);
       sba.GeneralStateBaseAddressModifyEnable = true;
 
+      sba.StatelessDataPortAccessMOCS = GENX(MOCS);
+
       sba.SurfaceStateBaseAddress =
          anv_cmd_buffer_surface_base_address(cmd_buffer);
       sba.SurfaceStateMOCS = GENX(MOCS);
@@ -108,6 +110,23 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
       sba.IndirectObjectBufferSizeModifyEnable  = true;
       sba.InstructionBufferSize                 = 0xfffff;
       sba.InstructionBuffersizeModifyEnable     = true;
+#  else
+      /* On gen7, we have upper bounds instead.  According to the docs,
+       * setting an upper bound of zero means that no bounds checking is
+       * performed so, in theory, we should be able to leave them zero.
+       * However, border color is broken and the GPU bounds-checks anyway.
+       * To avoid this and other potential problems, we may as well set it
+       * for everything.
+       */
+      sba.GeneralStateAccessUpperBound =
+         (struct anv_address) { .bo = NULL, .offset = 0xfffff000 };
+      sba.GeneralStateAccessUpperBoundModifyEnable = true;
+      sba.DynamicStateAccessUpperBound =
+         (struct anv_address) { .bo = NULL, .offset = 0xfffff000 };
+      sba.DynamicStateAccessUpperBoundModifyEnable = true;
+      sba.InstructionAccessUpperBound =
+         (struct anv_address) { .bo = NULL, .offset = 0xfffff000 };
+      sba.InstructionAccessUpperBoundModifyEnable = true;
 #  endif
 #  if (GEN_GEN >= 9)
       if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
@@ -762,27 +781,21 @@ init_fast_clear_color(struct anv_cmd_buffer *cmd_buffer,
    set_image_fast_clear_state(cmd_buffer, image, aspect,
                               ANV_FAST_CLEAR_NONE);
 
-   /* The fast clear value dword(s) will be copied into a surface state object.
-    * Ensure that the restrictions of the fields in the dword(s) are followed.
-    *
-    * CCS buffers on SKL+ can have any value set for the clear colors.
-    */
-   if (image->samples == 1 && GEN_GEN >= 9)
-      return;
-
-   /* Other combinations of auxiliary buffers and platforms require specific
-    * values in the clear value dword(s).
+   /* Initialize the struct fields that are accessed for fast-clears so that
+    * the HW restrictions on the field values are satisfied.
     */
    struct anv_address addr =
       anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
 
    if (GEN_GEN >= 9) {
-      for (unsigned i = 0; i < 4; i++) {
+      const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
+      const unsigned num_dwords = GEN_GEN >= 10 ?
+                                  isl_dev->ss.clear_color_state_size / 4 :
+                                  isl_dev->ss.clear_value_size / 4;
+      for (unsigned i = 0; i < num_dwords; i++) {
          anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
             sdi.Address = addr;
             sdi.Address.offset += i * 4;
-            /* MCS buffers on SKL+ can only have 1/0 clear colors. */
-            assert(image->samples > 1);
             sdi.ImmediateData = 0;
          }
       }
@@ -3535,16 +3548,8 @@ anv_cmd_buffer_push_base_group_id(struct anv_cmd_buffer *cmd_buffer,
    if (anv_batch_has_error(&cmd_buffer->batch))
       return;
 
-   VkResult result =
-      anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, MESA_SHADER_COMPUTE,
-                                                base_work_group_id);
-   if (result != VK_SUCCESS) {
-      cmd_buffer->batch.status = result;
-      return;
-   }
-
    struct anv_push_constants *push =
-      cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
+      &cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
    if (push->base_work_group_id[0] != baseGroupX ||
        push->base_work_group_id[1] != baseGroupY ||
        push->base_work_group_id[2] != baseGroupZ) {
@@ -3750,6 +3755,25 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), t);
 #endif
 
+#if GEN_GEN == 9
+   if (pipeline == _3D) {
+      /* There is a mid-object preemption workaround which requires you to
+       * re-emit MEDIA_VFE_STATE after switching from GPGPU to 3D.  However,
+       * even without preemption, we have issues with geometry flickering when
+       * GPGPU and 3D are back-to-back and this seems to fix it.  We don't
+       * really know why.
+       */
+      const uint32_t subslices =
+         MAX2(cmd_buffer->device->instance->physicalDevice.subslice_total, 1);
+      anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_VFE_STATE), vfe) {
+         vfe.MaximumNumberofThreads =
+            devinfo->max_cs_threads * subslices - 1;
+         vfe.NumberofURBEntries     = 2;
+         vfe.URBEntryAllocationSize = 2;
+      }
+   }
+#endif
+
    /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
     * PIPELINE_SELECT [DevBWR+]":
     *
@@ -4743,3 +4767,110 @@ void genX(CmdEndConditionalRenderingEXT)(
    cmd_state->conditional_render_enabled = false;
 }
 #endif
+
+/* Set of stage bits for which are pipelined, i.e. they get queued by the
+ * command streamer for later execution.
+ */
+#define ANV_PIPELINE_STAGE_PIPELINED_BITS \
+   (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | \
+    VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | \
+    VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | \
+    VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | \
+    VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | \
+    VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | \
+    VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | \
+    VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | \
+    VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | \
+    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | \
+    VK_PIPELINE_STAGE_TRANSFER_BIT | \
+    VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | \
+    VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | \
+    VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)
+
+void genX(CmdSetEvent)(
+    VkCommandBuffer                             commandBuffer,
+    VkEvent                                     _event,
+    VkPipelineStageFlags                        stageMask)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_event, event, _event);
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
+         pc.StallAtPixelScoreboard = true;
+         pc.CommandStreamerStallEnable = true;
+      }
+
+      pc.DestinationAddressType  = DAT_PPGTT,
+      pc.PostSyncOperation       = WriteImmediateData,
+      pc.Address = (struct anv_address) {
+         cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+         event->state.offset
+      };
+      pc.ImmediateData           = VK_EVENT_SET;
+   }
+}
+
+void genX(CmdResetEvent)(
+    VkCommandBuffer                             commandBuffer,
+    VkEvent                                     _event,
+    VkPipelineStageFlags                        stageMask)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_event, event, _event);
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
+         pc.StallAtPixelScoreboard = true;
+         pc.CommandStreamerStallEnable = true;
+      }
+
+      pc.DestinationAddressType  = DAT_PPGTT;
+      pc.PostSyncOperation       = WriteImmediateData;
+      pc.Address = (struct anv_address) {
+         cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+         event->state.offset
+      };
+      pc.ImmediateData           = VK_EVENT_RESET;
+   }
+}
+
+void genX(CmdWaitEvents)(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    eventCount,
+    const VkEvent*                              pEvents,
+    VkPipelineStageFlags                        srcStageMask,
+    VkPipelineStageFlags                        destStageMask,
+    uint32_t                                    memoryBarrierCount,
+    const VkMemoryBarrier*                      pMemoryBarriers,
+    uint32_t                                    bufferMemoryBarrierCount,
+    const VkBufferMemoryBarrier*                pBufferMemoryBarriers,
+    uint32_t                                    imageMemoryBarrierCount,
+    const VkImageMemoryBarrier*                 pImageMemoryBarriers)
+{
+#if GEN_GEN >= 8
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   for (uint32_t i = 0; i < eventCount; i++) {
+      ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), sem) {
+         sem.WaitMode            = PollingMode,
+         sem.CompareOperation    = COMPARE_SAD_EQUAL_SDD,
+         sem.SemaphoreDataDword  = VK_EVENT_SET,
+         sem.SemaphoreAddress = (struct anv_address) {
+            cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+            event->state.offset
+         };
+      }
+   }
+#else
+   anv_finishme("Implement events on gen7");
+#endif
+
+   genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
+                            false, /* byRegion */
+                            memoryBarrierCount, pMemoryBarriers,
+                            bufferMemoryBarrierCount, pBufferMemoryBarriers,
+                            imageMemoryBarrierCount, pImageMemoryBarriers);
+}
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 0b58dce05b0..8db2e1e6aa0 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1075,7 +1075,7 @@ emit_3dstate_clip(struct anv_pipeline *pipeline,
       clip.ClipEnable               = true;
       clip.StatisticsEnable         = true;
       clip.EarlyCullEnable          = true;
-      clip.APIMode                  = APIMODE_D3D,
+      clip.APIMode                  = APIMODE_D3D;
       clip.ViewportXYClipTestEnable = true;
 
 #if GEN_GEN >= 8
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index 146435c3f8f..aa0cf8b9471 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -346,14 +346,23 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
 }
 
 static void
-emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
-                        struct anv_address addr)
+emit_query_mi_availability(struct gen_mi_builder *b,
+                           struct anv_address addr,
+                           bool available)
+{
+   gen_mi_store(b, gen_mi_mem64(addr), gen_mi_imm(available));
+}
+
+static void
+emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer,
+                           struct anv_address addr,
+                           bool available)
 {
    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
       pc.DestinationAddressType  = DAT_PPGTT;
       pc.PostSyncOperation       = WriteImmediateData;
       pc.Address                 = addr;
-      pc.ImmediateData           = 1;
+      pc.ImmediateData           = available;
    }
 }
 
@@ -366,11 +375,39 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
                   struct gen_mi_builder *b, struct anv_query_pool *pool,
                   uint32_t first_index, uint32_t num_queries)
 {
-   for (uint32_t i = 0; i < num_queries; i++) {
-      struct anv_address slot_addr =
-         anv_query_address(pool, first_index + i);
-      gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
-      emit_query_availability(cmd_buffer, slot_addr);
+   switch (pool->type) {
+   case VK_QUERY_TYPE_OCCLUSION:
+   case VK_QUERY_TYPE_TIMESTAMP:
+      /* These queries are written with a PIPE_CONTROL so clear them using the
+       * PIPE_CONTROL as well so we don't have to synchronize between 2 types
+       * of operations.
+       */
+      assert((pool->stride % 8) == 0);
+      for (uint32_t i = 0; i < num_queries; i++) {
+         struct anv_address slot_addr =
+            anv_query_address(pool, first_index + i);
+
+         for (uint32_t qword = 1; qword < (pool->stride / 8); qword++) {
+            emit_query_pc_availability(cmd_buffer,
+                                       anv_address_add(slot_addr, qword * 8),
+                                       false);
+         }
+         emit_query_pc_availability(cmd_buffer, slot_addr, true);
+      }
+      break;
+
+   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+      for (uint32_t i = 0; i < num_queries; i++) {
+         struct anv_address slot_addr =
+            anv_query_address(pool, first_index + i);
+         gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
+         emit_query_mi_availability(b, slot_addr, true);
+      }
+      break;
+
+   default:
+      unreachable("Unsupported query type");
    }
 }
 
@@ -383,11 +420,28 @@ void genX(CmdResetQueryPool)(
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
 
-   for (uint32_t i = 0; i < queryCount; i++) {
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) {
-         sdm.Address = anv_query_address(pool, firstQuery + i);
-         sdm.ImmediateData = 0;
+   switch (pool->type) {
+   case VK_QUERY_TYPE_OCCLUSION:
+   case VK_QUERY_TYPE_TIMESTAMP:
+      for (uint32_t i = 0; i < queryCount; i++) {
+         emit_query_pc_availability(cmd_buffer,
+                                    anv_query_address(pool, firstQuery + i),
+                                    false);
       }
+      break;
+
+   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+   case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
+      struct gen_mi_builder b;
+      gen_mi_builder_init(&b, &cmd_buffer->batch);
+
+      for (uint32_t i = 0; i < queryCount; i++)
+         emit_query_mi_availability(&b, anv_query_address(pool, firstQuery + i), false);
+      break;
+   }
+
+   default:
+      unreachable("Unsupported query type");
    }
 }
 
@@ -525,7 +579,7 @@ void genX(CmdEndQueryIndexedEXT)(
    switch (pool->type) {
    case VK_QUERY_TYPE_OCCLUSION:
       emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 16));
-      emit_query_availability(cmd_buffer, query_addr);
+      emit_query_pc_availability(cmd_buffer, query_addr, true);
       break;
 
    case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
@@ -543,7 +597,7 @@ void genX(CmdEndQueryIndexedEXT)(
          offset += 16;
       }
 
-      emit_query_availability(cmd_buffer, query_addr);
+      emit_query_mi_availability(&b, query_addr, true);
       break;
    }
 
@@ -554,7 +608,7 @@ void genX(CmdEndQueryIndexedEXT)(
       }
 
       emit_xfb_query(&b, index, anv_address_add(query_addr, 16));
-      emit_query_availability(cmd_buffer, query_addr);
+      emit_query_mi_availability(&b, query_addr, true);
       break;
 
    default:
@@ -613,7 +667,7 @@ void genX(CmdWriteTimestamp)(
       break;
    }
 
-   emit_query_availability(cmd_buffer, query_addr);
+   emit_query_pc_availability(cmd_buffer, query_addr, true);
 
    /* When multiview is active the spec requires that N consecutive query
     * indices are used, where N is the number of active views in the subpass.
@@ -684,7 +738,20 @@ void genX(CmdCopyQueryPoolResults)(
    }
 
    if ((flags & VK_QUERY_RESULT_WAIT_BIT) ||
-       (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS)) {
+       (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS) ||
+       /* Occlusion & timestamp queries are written using a PIPE_CONTROL and
+        * because we're about to copy values from MI commands, we need to
+        * stall the command streamer to make sure the PIPE_CONTROL values have
+        * landed, otherwise we could see inconsistent values & availability.
+        *
+        *  From the vulkan spec:
+        *
+        *     "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
+        *     previous uses of vkCmdResetQueryPool in the same queue, without
+        *     any additional synchronization."
+        */
+       pool->type == VK_QUERY_TYPE_OCCLUSION ||
+       pool->type == VK_QUERY_TYPE_TIMESTAMP) {
       cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
    }
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index 9276dc9470b..c2266b68207 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -200,18 +200,6 @@ genX(init_device_state)(struct anv_device *device)
       lri.DataDWord      = half_slice_chicken7;
    }
 
-   /* WA_2204188704: Pixel Shader Panic dispatch must be disabled.
-    */
-   uint32_t common_slice_chicken3;
-   anv_pack_struct(&common_slice_chicken3, GENX(COMMON_SLICE_CHICKEN3),
-                   .PSThreadPanicDispatch = 0x3,
-                   .PSThreadPanicDispatchMask = 0x3);
-
-    anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
-      lri.RegisterOffset = GENX(COMMON_SLICE_CHICKEN3_num);
-      lri.DataDWord      = common_slice_chicken3;
-   }
-
    /* WaEnableStateCacheRedirectToCS:icl */
    uint32_t slice_common_eco_chicken1;
    anv_pack_struct(&slice_common_eco_chicken1,
diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build
index c12040c84eb..d0120d8026c 100644
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -98,20 +98,20 @@ foreach g : [['70', ['gen7_cmd_buffer.c']], ['75', ['gen7_cmd_buffer.c']],
     'anv_gen@0@'.format(_gen),
     [anv_gen_files, g[1], anv_entrypoints[0], anv_extensions_h],
     include_directories : [
-      inc_common, inc_compiler, inc_include, inc_intel, inc_vulkan_util,
-      inc_vulkan_wsi,
+      inc_common, inc_compiler, inc_include, inc_intel, inc_vulkan_wsi,
     ],
     c_args : [
       c_vis_args, no_override_init_args, c_sse2_args,
       '-DGEN_VERSIONx10=@0@'.format(_gen),
     ],
-    dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml],
+    dependencies : [
+      dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml, idep_vulkan_util_headers,
+    ],
   )
 endforeach
 
 libanv_files = files(
   'anv_allocator.c',
-  'anv_android_stubs.c',
   'anv_android.h',
   'anv_batch_chain.c',
   'anv_blorp.c',
@@ -144,6 +144,7 @@ anv_deps = [
   dep_libdrm,
   dep_valgrind,
   idep_nir_headers,
+  idep_vulkan_util_headers,
 ]
 anv_flags = [
   c_vis_args,
@@ -176,6 +177,14 @@ if with_xlib_lease
   anv_flags += '-DVK_USE_PLATFORM_XLIB_XRANDR_EXT'
 endif
 
+if with_platform_android
+  anv_deps += dep_android
+  anv_flags += '-DVK_USE_PLATFORM_ANDROID_KHR'
+  libanv_files += files('anv_android.c')
+else
+  libanv_files += files('anv_android_stubs.c')
+endif
+
 libanv_common = static_library(
   'anv_common',
   [
@@ -183,7 +192,7 @@ libanv_common = static_library(
     gen_xml_pack,
   ],
   include_directories : [
-    inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_util,
+    inc_common, inc_intel, inc_compiler, inc_include,
     inc_vulkan_wsi,
   ],
   c_args : anv_flags,
@@ -194,16 +203,15 @@ libvulkan_intel = shared_library(
   'vulkan_intel',
   [files('anv_gem.c'), anv_entrypoints[0], anv_extensions_h],
   include_directories : [
-    inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_util,
-    inc_vulkan_wsi,
+    inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_wsi,
   ],
   link_whole : [libanv_common, libanv_gen_libs],
   link_with : [
     libintel_compiler, libintel_common, libintel_dev, libisl, libblorp,
-    libvulkan_util, libvulkan_wsi, libmesa_util,
+    libvulkan_wsi, libmesa_util,
   ],
   dependencies : [
-    dep_thread, dep_dl, dep_m, anv_deps, idep_nir, idep_genxml,
+    dep_thread, dep_dl, dep_m, anv_deps, idep_nir, idep_genxml, idep_vulkan_util
   ],
   c_args : anv_flags,
   link_args : ['-Wl,--build-id=sha1', ld_args_bsymbolic, ld_args_gc_sections],
@@ -215,23 +223,22 @@ if with_tests
     'vulkan_intel_test',
     [files('anv_gem_stubs.c'), anv_entrypoints[0], anv_extensions_h],
     include_directories : [
-      inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_util,
-      inc_vulkan_wsi,
+      inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_wsi,
     ],
     link_whole : libanv_common,
     link_with : [
       libanv_gen_libs, libintel_compiler, libintel_common, libintel_dev,
-      libisl, libblorp, libvulkan_util, libvulkan_wsi, libmesa_util,
+      libisl, libblorp, libvulkan_wsi, libmesa_util,
     ],
     dependencies : [
-      dep_thread, dep_dl, dep_m, anv_deps, idep_nir,
+      dep_thread, dep_dl, dep_m, anv_deps, idep_nir, idep_vulkan_util
     ],
     c_args : anv_flags,
   )
 
-  foreach t : ['block_pool_no_free', 'state_pool_no_free',
-               'state_pool_free_list_only', 'state_pool',
-               'state_pool_padding']
+  foreach t : ['block_pool_no_free', 'block_pool_grow_first',
+               'state_pool_no_free', 'state_pool_free_list_only',
+               'state_pool', 'state_pool_padding']
     test(
       'anv_@0@'.format(t),
       executable(
@@ -239,9 +246,9 @@ if with_tests
         ['tests/@0@.c'.format(t), anv_entrypoints[0], anv_extensions_h],
         c_args : [ c_sse2_args ],
         link_with : libvulkan_intel_test,
-        dependencies : [dep_libdrm, dep_thread, dep_m, dep_valgrind],
+        dependencies : [dep_libdrm, dep_thread, dep_m, dep_valgrind, idep_vulkan_util, ],
         include_directories : [
-          inc_common, inc_intel, inc_compiler, inc_vulkan_util, inc_vulkan_wsi,
+          inc_common, inc_intel, inc_compiler, inc_vulkan_wsi,
         ],
       ),
       suite : ['intel'],
diff --git a/src/intel/vulkan/tests/block_pool_grow_first.c b/src/intel/vulkan/tests/block_pool_grow_first.c
new file mode 100644
index 00000000000..aea12b29de8
--- /dev/null
+++ b/src/intel/vulkan/tests/block_pool_grow_first.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#undef NDEBUG
+
+#include "anv_private.h"
+
+int main(int argc, char **argv)
+{
+   struct anv_instance instance;
+   struct anv_device device = {
+      .instance = &instance,
+   };
+   struct anv_block_pool pool;
+
+   /* Create a pool with initial size smaller than the block allocated, so
+    * that it must grow in the first allocation.
+    */
+   const uint32_t block_size = 16 * 1024;
+   const uint32_t initial_size = block_size / 2;
+
+   anv_block_pool_init(&pool, &device, 4096, initial_size, EXEC_OBJECT_PINNED);
+   assert(pool.size == initial_size);
+
+   uint32_t padding;
+   int32_t offset = anv_block_pool_alloc(&pool, block_size, &padding);
+
+   /* Pool will have grown at least space to fit the new allocation. */
+   assert(pool.size > initial_size);
+   assert(pool.size >= initial_size + block_size);
+
+   /* The whole initial size is considered padding and the allocation should be
+    * right next to it.
+    */
+   assert(padding == initial_size);
+   assert(offset == initial_size);
+
+   /* Use the memory to ensure it is valid. */
+   void *map = anv_block_pool_map(&pool, offset);
+   memset(map, 22, block_size);
+
+   anv_block_pool_finish(&pool);
+}
diff --git a/src/intel/vulkan/vk_format_info.h b/src/intel/vulkan/vk_format_info.h
index 2e126645763..fe88773cda9 100644
--- a/src/intel/vulkan/vk_format_info.h
+++ b/src/intel/vulkan/vk_format_info.h
@@ -32,8 +32,13 @@
 /* See i915_private_android_types.h in minigbm. */
 #define HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL 0x100
 
+enum {
+   /* Usage bit equal to GRALLOC_USAGE_HW_CAMERA_MASK */
+   AHARDWAREBUFFER_USAGE_CAMERA_MASK = 0x00060000U,
+};
+
 static inline VkFormat
-vk_format_from_android(unsigned android_format)
+vk_format_from_android(unsigned android_format, unsigned android_usage)
 {
    switch (android_format) {
    case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
@@ -49,6 +54,11 @@ vk_format_from_android(unsigned android_format)
       return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
    case HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL:
       return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+   case AHARDWAREBUFFER_FORMAT_IMPLEMENTATION_DEFINED:
+      if (android_usage & AHARDWAREBUFFER_USAGE_CAMERA_MASK)
+         return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+      else
+         return VK_FORMAT_R8G8B8_UNORM;
    case AHARDWAREBUFFER_FORMAT_BLOB:
    default:
       return VK_FORMAT_UNDEFINED;
diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
index 7ef59f0477b..6b611e18ae9 100644
--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -1120,6 +1120,9 @@ dri3_cpp_for_format(uint32_t format) {
    case  __DRI_IMAGE_FORMAT_SARGB8:
    case  __DRI_IMAGE_FORMAT_SABGR8:
       return 4;
+   case __DRI_IMAGE_FORMAT_XBGR16161616F:
+   case __DRI_IMAGE_FORMAT_ABGR16161616F:
+      return 8;
    case  __DRI_IMAGE_FORMAT_NONE:
    default:
       return 0;
@@ -1178,6 +1181,8 @@ image_format_to_fourcc(int format)
    case __DRI_IMAGE_FORMAT_ARGB2101010: return __DRI_IMAGE_FOURCC_ARGB2101010;
    case __DRI_IMAGE_FORMAT_XBGR2101010: return __DRI_IMAGE_FOURCC_XBGR2101010;
    case __DRI_IMAGE_FORMAT_ABGR2101010: return __DRI_IMAGE_FOURCC_ABGR2101010;
+   case __DRI_IMAGE_FORMAT_XBGR16161616F: return __DRI_IMAGE_FOURCC_XBGR16161616F;
+   case __DRI_IMAGE_FORMAT_ABGR16161616F: return __DRI_IMAGE_FOURCC_ABGR16161616F;
    }
    return 0;
 }
diff --git a/src/mapi/glapi/gen/gl_XML.py b/src/mapi/glapi/gen/gl_XML.py
index 478f82ca314..2854a9a5688 100644
--- a/src/mapi/glapi/gen/gl_XML.py
+++ b/src/mapi/glapi/gen/gl_XML.py
@@ -49,7 +49,7 @@ def parse_GL_API( file_name, factory = None ):
     # that are not part of the ABI.
 
     for func in api.functionIterateByCategory():
-        if func.assign_offset:
+        if func.assign_offset and func.offset < 0:
             func.offset = api.next_offset;
             api.next_offset += 1
 
@@ -683,8 +683,12 @@ def process_element(self, element):
 
             if name in static_data.offsets and static_data.offsets[name] <= static_data.MAX_OFFSETS:
                 self.offset = static_data.offsets[name]
+            elif name in static_data.offsets and static_data.offsets[name] > static_data.MAX_OFFSETS:
+                self.offset = static_data.offsets[name]
+                self.assign_offset = True
             else:
-                self.offset = -1
+                if self.exec_flavor != "skip":
+                    raise RuntimeError("Entry-point %s is missing offset in static_data.py. Add one at the bottom of the list." % (name))
                 self.assign_offset = self.exec_flavor != "skip" or name in static_data.unused_functions
 
         if not self.name:
diff --git a/src/mapi/glapi/gen/gl_gentable.py b/src/mapi/glapi/gen/gl_gentable.py
index 9d8923cf8db..92e1a546cff 100644
--- a/src/mapi/glapi/gen/gl_gentable.py
+++ b/src/mapi/glapi/gen/gl_gentable.py
@@ -45,7 +45,7 @@
 #endif
 
 #if (defined(GLXEXT) && defined(HAVE_BACKTRACE)) \\
-	|| (!defined(GLXEXT) && defined(DEBUG) && !defined(__CYGWIN__) && !defined(__MINGW32__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__))
+	|| (!defined(GLXEXT) && defined(DEBUG) && defined(HAVE_EXECINFO_H))
 #define USE_BACKTRACE
 #endif
 
diff --git a/src/mapi/glapi/gen/static_data.py b/src/mapi/glapi/gen/static_data.py
index bc49324348f..5044e0f78cf 100644
--- a/src/mapi/glapi/gen/static_data.py
+++ b/src/mapi/glapi/gen/static_data.py
@@ -29,7 +29,7 @@
 """Table of functions that have ABI-mandated offsets in the dispatch table.
 
 The first MAX_OFFSETS entries are required by indirect GLX. The rest are
-required to preserve the glapi <> drivers ABI. This is to be addressed shortly.
+required to preserve the glapi <> GL/GLES ABI. This is to be addressed shortly.
 
 This list will never change."""
 offsets = {
@@ -1453,6 +1453,7 @@
     "TexParameterxv": 1417,
     "BlendBarrier": 1418,
     "PrimitiveBoundingBox": 1419,
+    "MaxShaderCompilerThreadsKHR": 1420,
 }
 
 functions = [
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 3515e312023..0a8b9bb885d 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -104,6 +104,8 @@ static void cleanup_temp_texture(struct gl_context *ctx,
                                  struct temp_texture *tex);
 static void meta_glsl_clear_cleanup(struct gl_context *ctx,
                                     struct clear_state *clear);
+static void meta_copypix_cleanup(struct gl_context *ctx,
+                                    struct copypix_state *copypix);
 static void meta_decompress_cleanup(struct gl_context *ctx,
                                     struct decompress_state *decompress);
 static void meta_drawpix_cleanup(struct gl_context *ctx,
@@ -422,6 +424,7 @@ _mesa_meta_free(struct gl_context *ctx)
    _mesa_make_current(ctx, NULL, NULL);
    _mesa_meta_glsl_blit_cleanup(ctx, &ctx->Meta->Blit);
    meta_glsl_clear_cleanup(ctx, &ctx->Meta->Clear);
+   meta_copypix_cleanup(ctx, &ctx->Meta->CopyPix);
    _mesa_meta_glsl_generate_mipmap_cleanup(ctx, &ctx->Meta->Mipmap);
    cleanup_temp_texture(ctx, &ctx->Meta->TempTex);
    meta_decompress_cleanup(ctx, &ctx->Meta->Decompress);
@@ -1465,6 +1468,8 @@ _mesa_meta_setup_drawpix_texture(struct gl_context *ctx,
          /* load image */
          _mesa_TexSubImage2D(tex->Target, 0,
                              0, 0, width, height, format, type, pixels);
+
+         _mesa_reference_buffer_object(ctx, &save_unpack_obj, NULL);
       }
    }
    else {
@@ -1595,6 +1600,17 @@ meta_glsl_clear_cleanup(struct gl_context *ctx, struct clear_state *clear)
    }
 }
 
+static void
+meta_copypix_cleanup(struct gl_context *ctx, struct copypix_state *copypix)
+{
+   if (copypix->VAO == 0)
+      return;
+   _mesa_DeleteVertexArrays(1, &copypix->VAO);
+   copypix->VAO = 0;
+   _mesa_reference_buffer_object(ctx, &copypix->buf_obj, NULL);
+}
+
+
 /**
  * Given a bitfield of BUFFER_BIT_x draw buffers, call glDrawBuffers to
  * set GL to only draw to those buffers.
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
index ac3a04bceff..a384cadd557 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -886,6 +886,14 @@ static const struct {
       .image_format = __DRI_IMAGE_FORMAT_XRGB8888,
       .mesa_format  =        MESA_FORMAT_B8G8R8X8_UNORM,
    },
+   {
+      .image_format = __DRI_IMAGE_FORMAT_ABGR16161616F,
+      .mesa_format  =        MESA_FORMAT_RGBA_FLOAT16,
+   },
+   {
+      .image_format = __DRI_IMAGE_FORMAT_XBGR16161616F,
+      .mesa_format  =        MESA_FORMAT_RGBX_FLOAT16,
+   },
    {
       .image_format = __DRI_IMAGE_FORMAT_ARGB2101010,
       .mesa_format  =        MESA_FORMAT_B10G10R10A2_UNORM,
diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c
index 5a66bcf8e05..f9a4acefed1 100644
--- a/src/mesa/drivers/dri/common/utils.c
+++ b/src/mesa/drivers/dri/common/utils.c
@@ -181,28 +181,47 @@ driCreateConfigs(mesa_format format,
 		 GLboolean enable_accum, GLboolean color_depth_match,
 		 GLboolean mutable_render_buffer)
 {
-   static const uint32_t masks_table[][4] = {
+   static const struct {
+      uint32_t masks[4];
+      int shifts[4];
+   } format_table[] = {
       /* MESA_FORMAT_B5G6R5_UNORM */
-      { 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 },
+      {{ 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 },
+       { 11, 5, 0, -1 }},
       /* MESA_FORMAT_B8G8R8X8_UNORM */
-      { 0x00FF0000, 0x0000FF00, 0x000000FF, 0x00000000 },
+      {{ 0x00FF0000, 0x0000FF00, 0x000000FF, 0x00000000 },
+       { 16, 8, 0, -1 }},
       /* MESA_FORMAT_B8G8R8A8_UNORM */
-      { 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000 },
+      {{ 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000 },
+       { 16, 8, 0, 24 }},
       /* MESA_FORMAT_B10G10R10X2_UNORM */
-      { 0x3FF00000, 0x000FFC00, 0x000003FF, 0x00000000 },
+      {{ 0x3FF00000, 0x000FFC00, 0x000003FF, 0x00000000 },
+       { 20, 10, 0, -1 }},
       /* MESA_FORMAT_B10G10R10A2_UNORM */
-      { 0x3FF00000, 0x000FFC00, 0x000003FF, 0xC0000000 },
+      {{ 0x3FF00000, 0x000FFC00, 0x000003FF, 0xC0000000 },
+       { 20, 10, 0, 30 }},
       /* MESA_FORMAT_R8G8B8A8_UNORM */
-      { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 },
+      {{ 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 },
+       { 0, 8, 16, 24 }},
       /* MESA_FORMAT_R8G8B8X8_UNORM */
-      { 0x000000FF, 0x0000FF00, 0x00FF0000, 0x00000000 },
+      {{ 0x000000FF, 0x0000FF00, 0x00FF0000, 0x00000000 },
+       { 0, 8, 16, -1 }},
       /* MESA_FORMAT_R10G10B10X2_UNORM */
-      { 0x000003FF, 0x000FFC00, 0x3FF00000, 0x00000000 },
+      {{ 0x000003FF, 0x000FFC00, 0x3FF00000, 0x00000000 },
+       { 0, 10, 20, -1 }},
       /* MESA_FORMAT_R10G10B10A2_UNORM */
-      { 0x000003FF, 0x000FFC00, 0x3FF00000, 0xC0000000 },
+      {{ 0x000003FF, 0x000FFC00, 0x3FF00000, 0xC0000000 },
+       { 0, 10, 20, 30 }},
+      /* MESA_FORMAT_RGBX_FLOAT16 */
+      {{ 0, 0, 0, 0},
+       { 0, 16, 32, -1 }},
+      /* MESA_FORMAT_RGBA_FLOAT16 */
+      {{ 0, 0, 0, 0},
+       { 0, 16, 32, 48 }},
    };
 
    const uint32_t * masks;
+   const int * shifts;
    __DRIconfig **configs, **c;
    struct gl_config *modes;
    unsigned i, j, k, h;
@@ -213,37 +232,55 @@ driCreateConfigs(mesa_format format,
    int blue_bits;
    int alpha_bits;
    bool is_srgb;
+   bool is_float;
 
    switch (format) {
    case MESA_FORMAT_B5G6R5_UNORM:
-      masks = masks_table[0];
+      masks = format_table[0].masks;
+      shifts = format_table[0].shifts;
       break;
    case MESA_FORMAT_B8G8R8X8_UNORM:
    case MESA_FORMAT_B8G8R8X8_SRGB:
-      masks = masks_table[1];
+      masks = format_table[1].masks;
+      shifts = format_table[1].shifts;
       break;
    case MESA_FORMAT_B8G8R8A8_UNORM:
    case MESA_FORMAT_B8G8R8A8_SRGB:
-      masks = masks_table[2];
+      masks = format_table[2].masks;
+      shifts = format_table[2].shifts;
       break;
    case MESA_FORMAT_R8G8B8A8_UNORM:
    case MESA_FORMAT_R8G8B8A8_SRGB:
-      masks = masks_table[5];
+      masks = format_table[5].masks;
+      shifts = format_table[5].shifts;
       break;
    case MESA_FORMAT_R8G8B8X8_UNORM:
-      masks = masks_table[6];
+      masks = format_table[6].masks;
+      shifts = format_table[6].shifts;
       break;
    case MESA_FORMAT_B10G10R10X2_UNORM:
-      masks = masks_table[3];
+      masks = format_table[3].masks;
+      shifts = format_table[3].shifts;
       break;
    case MESA_FORMAT_B10G10R10A2_UNORM:
-      masks = masks_table[4];
+      masks = format_table[4].masks;
+      shifts = format_table[4].shifts;
+      break;
+   case MESA_FORMAT_RGBX_FLOAT16:
+      masks = format_table[9].masks;
+      shifts = format_table[9].shifts;
+      break;
+   case MESA_FORMAT_RGBA_FLOAT16:
+      masks = format_table[10].masks;
+      shifts = format_table[10].shifts;
       break;
    case MESA_FORMAT_R10G10B10X2_UNORM:
-      masks = masks_table[7];
+      masks = format_table[7].masks;
+      shifts = format_table[7].shifts;
       break;
    case MESA_FORMAT_R10G10B10A2_UNORM:
-      masks = masks_table[8];
+      masks = format_table[8].masks;
+      shifts = format_table[8].shifts;
       break;
    default:
       fprintf(stderr, "[%s:%u] Unknown framebuffer type %s (%d).\n",
@@ -257,6 +294,7 @@ driCreateConfigs(mesa_format format,
    blue_bits = _mesa_get_format_bits(format, GL_BLUE_BITS);
    alpha_bits = _mesa_get_format_bits(format, GL_ALPHA_BITS);
    is_srgb = _mesa_get_format_color_encoding(format) == GL_SRGB;
+   is_float = _mesa_get_format_datatype(format) == GL_FLOAT;
 
    num_modes = num_depth_stencil_bits * num_db_modes * num_accum_bits * num_msaa_modes;
    configs = calloc(num_modes + 1, sizeof *configs);
@@ -286,6 +324,7 @@ driCreateConfigs(mesa_format format,
 		    c++;
 
 		    memset(modes, 0, sizeof *modes);
+		    modes->floatMode = is_float;
 		    modes->redBits   = red_bits;
 		    modes->greenBits = green_bits;
 		    modes->blueBits  = blue_bits;
@@ -294,6 +333,10 @@ driCreateConfigs(mesa_format format,
 		    modes->greenMask = masks[1];
 		    modes->blueMask  = masks[2];
 		    modes->alphaMask = masks[3];
+		    modes->redShift   = shifts[0];
+		    modes->greenShift = shifts[1];
+		    modes->blueShift  = shifts[2];
+		    modes->alphaShift = shifts[3];
 		    modes->rgbBits   = modes->redBits + modes->greenBits
 		    	+ modes->blueBits + modes->alphaBits;
 
@@ -414,9 +457,13 @@ static const struct { unsigned int attrib, offset; } attribMap[] = {
     __ATTRIB(__DRI_ATTRIB_TRANSPARENT_BLUE_VALUE,	transparentBlue),
     __ATTRIB(__DRI_ATTRIB_TRANSPARENT_ALPHA_VALUE,	transparentAlpha),
     __ATTRIB(__DRI_ATTRIB_RED_MASK,			redMask),
+    __ATTRIB(__DRI_ATTRIB_RED_SHIFT,			redShift),
     __ATTRIB(__DRI_ATTRIB_GREEN_MASK,			greenMask),
+    __ATTRIB(__DRI_ATTRIB_GREEN_SHIFT,			greenShift),
     __ATTRIB(__DRI_ATTRIB_BLUE_MASK,			blueMask),
+    __ATTRIB(__DRI_ATTRIB_BLUE_SHIFT,			blueShift),
     __ATTRIB(__DRI_ATTRIB_ALPHA_MASK,			alphaMask),
+    __ATTRIB(__DRI_ATTRIB_ALPHA_SHIFT,			alphaShift),
     __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_WIDTH,		maxPbufferWidth),
     __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_HEIGHT,		maxPbufferHeight),
     __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_PIXELS,		maxPbufferPixels),
@@ -451,6 +498,8 @@ driGetConfigAttribIndex(const __DRIconfig *config,
     case __DRI_ATTRIB_RENDER_TYPE:
         /* no support for color index mode */
 	*value = __DRI_ATTRIB_RGBA_BIT;
+        if (config->modes.floatMode)
+            *value |= __DRI_ATTRIB_FLOAT_BIT;
 	break;
     case __DRI_ATTRIB_CONFIG_CAVEAT:
 	if (config->modes.visualRating == GLX_NON_CONFORMANT_CONFIG)
diff --git a/src/mesa/drivers/dri/i915/intel_regions.c b/src/mesa/drivers/dri/i915/intel_regions.c
index fee734801cd..803ea9326e8 100644
--- a/src/mesa/drivers/dri/i915/intel_regions.c
+++ b/src/mesa/drivers/dri/i915/intel_regions.c
@@ -57,7 +57,7 @@
  */
 #define DEBUG_BACKTRACE_SIZE 0
 
-#if DEBUG_BACKTRACE_SIZE == 0
+#if DEBUG_BACKTRACE_SIZE == 0 || !defined(HAVE_EXECINFO_H)
 /* Use the standard debug output */
 #define _DBG(...) DBG(__VA_ARGS__)
 #else
diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk
index 29b46147f39..ab1eb3d99a9 100644
--- a/src/mesa/drivers/dri/i965/Android.mk
+++ b/src/mesa/drivers/dri/i965/Android.mk
@@ -274,6 +274,8 @@ LOCAL_LDFLAGS += $(MESA_DRI_LDFLAGS)
 LOCAL_CFLAGS := \
 	$(MESA_DRI_CFLAGS)
 
+LOCAL_CFLAGS += -Wno-error
+
 LOCAL_C_INCLUDES := \
 	$(MESA_DRI_C_INCLUDES) \
 	$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,) \
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 7b0ddfb64dd..46774c69bd2 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -402,6 +402,8 @@ vma_alloc(struct brw_bufmgr *bufmgr,
    /* Without softpin support, we let the kernel assign addresses. */
    assert(brw_using_softpin(bufmgr));
 
+   alignment = ALIGN(alignment, PAGE_SIZE);
+
    struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
    uint64_t addr;
 
@@ -1487,7 +1489,7 @@ brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
    brw_bo_make_external(bo);
 
    if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
-                          DRM_CLOEXEC, prime_fd) != 0)
+			  DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
       return -errno;
 
    bo->reusable = false;
@@ -1717,6 +1719,9 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
 
    const uint64_t _4GB = 4ull << 30;
 
+   /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
+   const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
+
    if (devinfo->gen >= 8 && gtt_size > _4GB) {
       bufmgr->initial_kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 
@@ -1726,9 +1731,13 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
          bufmgr->initial_kflags |= EXEC_OBJECT_PINNED;
 
          util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_LOW_4G],
-                            PAGE_SIZE, _4GB);
+                            PAGE_SIZE, _4GB_minus_1);
+
+         /* Leave the last 4GB out of the high vma range, so that no state
+          * base address + size can overflow 48 bits.
+          */
          util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_OTHER],
-                            1 * _4GB, gtt_size - 1 * _4GB);
+                            1 * _4GB, gtt_size - 2 * _4GB);
       } else if (devinfo->gen >= 10) {
          /* Softpin landed in 4.5, but GVT used an aliasing PPGTT until
           * kernel commit 6b3816d69628becb7ff35978aa0751798b4a940a in
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 30e09861491..1508171da10 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -167,7 +167,7 @@ brw_fast_clear_depth(struct gl_context *ctx)
     */
    float clear_value =
       mt->format == MESA_FORMAT_Z_FLOAT32 ? ctx->Depth.Clear :
-      (unsigned)(ctx->Depth.Clear * fb->_DepthMax) / (float)fb->_DepthMax;
+      _mesa_lroundeven(ctx->Depth.Clear * fb->_DepthMax) / (float)(fb->_DepthMax);
 
    const uint32_t num_layers = depth_att->Layered ? depth_irb->layer_count : 1;
 
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 46791c7d2c8..263c17b4d58 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1183,6 +1183,7 @@ struct brw_context
 
       int n_active_oa_queries;
       int n_active_pipeline_stats_queries;
+      int n_active_null_renderers;
 
       /* The number of queries depending on running OA counters which
        * extends beyond brw_end_perf_query() since we need to wait until
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 17bca1991f1..b42c131d1b9 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1652,11 +1652,17 @@ enum brw_pixel_shader_coverage_mask_mode {
 #define GEN10_CACHE_MODE_SS            0x0e420
 #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
 
-#define INSTPM                             0x20c0
+#define INSTPM                             0x20c0 /* Gen6-8 */
 # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
+# define INSTPM_GLOBAL_DEBUG_ENABLE                    (1 << 4)
+# define INSTPM_MEDIA_INSTRUCTION_DISABLE              (1 << 3)
+# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 2)
+# define INSTPM_3D_STATE_INSTRUCTION_DISABLE           (1 << 1)
 
 #define CS_DEBUG_MODE2                     0x20d8 /* Gen9+ */
 # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
+# define CSDBG2_MEDIA_INSTRUCTION_DISABLE              (1 << 1)
+# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 0)
 
 #define GEN7_RPSTAT1                       0xA01C
 #define  GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT   7
@@ -1676,10 +1682,6 @@ enum brw_pixel_shader_coverage_mask_mode {
 # define GLK_SCEC_BARRIER_MODE_MASK        REG_MASK(1 << 7)
 # define GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE (1 << 11)
 
-#define COMMON_SLICE_CHICKEN3              0x7304
-# define PS_THREAD_PANIC_DISPATCH          (3 << 6)
-# define PS_THREAD_PANIC_DISPATCH_MASK     REG_MASK(3 << 6)
-
 #define HALF_SLICE_CHICKEN7                0xE194
 # define TEXEL_OFFSET_FIX_ENABLE           (1 << 1)
 # define TEXEL_OFFSET_FIX_MASK             REG_MASK(1 << 1)
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index d07349419cc..a3cfa765c0f 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -875,6 +875,16 @@ brw_finish_drawing(struct gl_context *ctx)
       brw_bo_unreference(brw->draw.draw_params_count_bo);
       brw->draw.draw_params_count_bo = NULL;
    }
+
+   if (brw->draw.draw_params_bo) {
+      brw_bo_unreference(brw->draw.draw_params_bo);
+      brw->draw.draw_params_bo = NULL;
+   }
+
+   if (brw->draw.derived_draw_params_bo) {
+      brw_bo_unreference(brw->draw.derived_draw_params_bo);
+      brw->draw.derived_draw_params_bo = NULL;
+   }
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index b752294250e..e73cadc5d3e 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -493,6 +493,27 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
       }
    }
 
+   if (devinfo->gen == 9 && pipeline == BRW_RENDER_PIPELINE) {
+      /* We seem to have issues with geometry flickering when 3D and compute
+       * are combined in the same batch and this appears to fix it.
+       */
+      const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
+      const uint32_t maxNumberofThreads =
+         devinfo->max_cs_threads * subslices - 1;
+
+      BEGIN_BATCH(9);
+      OUT_BATCH(MEDIA_VFE_STATE << 16 | (9 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(2 << 8 | maxNumberofThreads << 16);
+      OUT_BATCH(0);
+      OUT_BATCH(2 << 16);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
    if (devinfo->gen >= 6) {
       /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
        * PIPELINE_SELECT [DevBWR+]":
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
index cd7961905bd..255a0746757 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -287,6 +287,12 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void)
           o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"),
           obj->pipeline_stats.bo ? "yes" : "no");
       break;
+   case GEN_PERF_QUERY_TYPE_NULL:
+      DBG("%4d: %-6s %-8s NULL_RENDERER\n",
+          id,
+          o->Used ? "Dirty," : "New,",
+          o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"));
+      break;
    default:
       unreachable("Unknown query type");
       break;
@@ -388,6 +394,10 @@ brw_get_perf_query_info(struct gl_context *ctx,
       *n_active = brw->perfquery.n_active_pipeline_stats_queries;
       break;
 
+   case GEN_PERF_QUERY_TYPE_NULL:
+      *n_active = brw->perfquery.n_active_null_renderers;
+      break;
+
    default:
       unreachable("Unknown query type");
       break;
@@ -962,6 +972,7 @@ brw_begin_perf_query(struct gl_context *ctx,
    struct brw_context *brw = brw_context(ctx);
    struct brw_perf_query_object *obj = brw_perf_query(o);
    const struct gen_perf_query_info *query = obj->query;
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
    /* We can assume the frontend hides mistaken attempts to Begin a
     * query object multiple times before its End. Similarly if an
@@ -1046,7 +1057,6 @@ brw_begin_perf_query(struct gl_context *ctx,
       /* If the OA counters aren't already on, enable them. */
       if (brw->perfquery.oa_stream_fd == -1) {
          __DRIscreen *screen = brw->screen->driScrnPriv;
-         const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
          /* The period_exponent gives a sampling period as follows:
           *   sample_period = timestamp_period * 2^(period_exponent + 1)
@@ -1191,6 +1201,21 @@ brw_begin_perf_query(struct gl_context *ctx,
       ++brw->perfquery.n_active_pipeline_stats_queries;
       break;
 
+   case GEN_PERF_QUERY_TYPE_NULL:
+      ++brw->perfquery.n_active_null_renderers;
+      if (devinfo->gen >= 9) {
+         brw_load_register_imm32(brw, CS_DEBUG_MODE2,
+                                 REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE) |
+                                 CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE);
+      } else {
+         brw_load_register_imm32(brw, INSTPM,
+                                 REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+                                          INSTPM_MEDIA_INSTRUCTION_DISABLE) |
+                                 INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+                                 INSTPM_MEDIA_INSTRUCTION_DISABLE);
+      }
+      break;
+
    default:
       unreachable("Unknown query type");
       break;
@@ -1211,6 +1236,7 @@ brw_end_perf_query(struct gl_context *ctx,
 {
    struct brw_context *brw = brw_context(ctx);
    struct brw_perf_query_object *obj = brw_perf_query(o);
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
    DBG("End(%d)\n", o->Id);
 
@@ -1253,6 +1279,19 @@ brw_end_perf_query(struct gl_context *ctx,
       --brw->perfquery.n_active_pipeline_stats_queries;
       break;
 
+   case GEN_PERF_QUERY_TYPE_NULL:
+      if (--brw->perfquery.n_active_null_renderers == 0) {
+         if (devinfo->gen >= 9) {
+            brw_load_register_imm32(brw, CS_DEBUG_MODE2,
+                                    REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE));
+         } else {
+            brw_load_register_imm32(brw, INSTPM,
+                                    REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+                                             INSTPM_MEDIA_INSTRUCTION_DISABLE));
+         }
+      }
+      break;
+
    default:
       unreachable("Unknown query type");
       break;
@@ -1278,6 +1317,9 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o)
       bo = obj->pipeline_stats.bo;
       break;
 
+   case GEN_PERF_QUERY_TYPE_NULL:
+      break;
+
    default:
       unreachable("Unknown query type");
       break;
@@ -1328,6 +1370,8 @@ brw_is_perf_query_ready(struct gl_context *ctx,
       return (obj->pipeline_stats.bo &&
               !brw_batch_references(&brw->batch, obj->pipeline_stats.bo) &&
               !brw_bo_busy(obj->pipeline_stats.bo));
+   case GEN_PERF_QUERY_TYPE_NULL:
+      return true;
 
    default:
       unreachable("Unknown query type");
@@ -1506,6 +1550,9 @@ brw_get_perf_query_data(struct gl_context *ctx,
       written = get_pipeline_stats_data(brw, obj, data_size, (uint8_t *)data);
       break;
 
+   case GEN_PERF_QUERY_TYPE_NULL:
+      break;
+
    default:
       unreachable("Unknown query type");
       break;
@@ -1576,6 +1623,9 @@ brw_delete_perf_query(struct gl_context *ctx,
       }
       break;
 
+   case GEN_PERF_QUERY_TYPE_NULL:
+      break;
+
    default:
       unreachable("Unknown query type");
       break;
@@ -1775,6 +1825,7 @@ brw_init_perf_query_info(struct gl_context *ctx)
 
    if (gen_perf_load_oa_metrics(brw->perfquery.perf, screen->fd, devinfo))
       brw_perf_query_register_mdapi_oa_query(brw);
+   brw_perf_query_register_gpa_null_query(brw);
 
    brw->perfquery.unaccumulated =
       ralloc_array(brw, struct brw_perf_query_object *, 2);
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.h b/src/mesa/drivers/dri/i965/brw_performance_query.h
index b0bf60cc4ff..4592dc3c7ff 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.h
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.h
@@ -116,5 +116,6 @@ struct brw_perf_query_object
 
 void brw_perf_query_register_mdapi_oa_query(struct brw_context *brw);
 void brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw);
+void brw_perf_query_register_gpa_null_query(struct brw_context *brw);
 
 #endif /* BRW_PERFORMANCE_QUERY_H */
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c
index 379515d328f..21ed33a38e9 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c
@@ -258,3 +258,13 @@ brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw)
 
    query->data_size = sizeof(uint64_t) * query->n_counters;
 }
+
+void
+brw_perf_query_register_gpa_null_query(struct brw_context *brw)
+{
+   struct gen_perf_query_info *query =
+      gen_perf_query_append_query_info(brw->perfquery.perf, 0);
+
+   query->kind = GEN_PERF_QUERY_TYPE_NULL;
+   query->name = "Intel_Null_Hardware_Query";
+}
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 938b9defeda..c41d9551a1e 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -109,12 +109,6 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
       brw_load_register_imm32(brw, GEN8_L3CNTLREG,
                               GEN8_L3CNTLREG_EDBC_NO_HANG);
 
-      /* WA_2204188704: Pixel Shader Panic dispatch must be disabled.
-       */
-       brw_load_register_imm32(brw, COMMON_SLICE_CHICKEN3,
-                               PS_THREAD_PANIC_DISPATCH_MASK |
-                               PS_THREAD_PANIC_DISPATCH);
-
        /* WaEnableStateCacheRedirectToCS:icl */
        brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1,
                                GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE |
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 8175fbf0db4..f1defb3f148 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1685,6 +1685,11 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw)
                                     ISL_FORMAT_RAW,
                                     3 * sizeof(GLuint), 1,
                                     RELOC_WRITE);
+
+      /* The state buffer now holds a reference to our upload, drop ours. */
+      if (bo != brw->compute.num_work_groups_bo)
+         brw_bo_unreference(bo);
+
       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
    }
 }
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index d84793f71f8..28a54f33e7c 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -99,6 +99,7 @@ DRI_CONF_BEGIN
       DRI_CONF_GLSL_ZERO_INIT("false")
       DRI_CONF_ALLOW_RGB10_CONFIGS("false")
       DRI_CONF_ALLOW_RGB565_CONFIGS("true")
+      DRI_CONF_ALLOW_FP16_CONFIGS("false")
    DRI_CONF_SECTION_END
 DRI_CONF_END
 };
@@ -188,6 +189,12 @@ static const struct __DRI2flushExtensionRec intelFlushExtension = {
 };
 
 static const struct intel_image_format intel_image_formats[] = {
+   { __DRI_IMAGE_FOURCC_ABGR16161616F, __DRI_IMAGE_COMPONENTS_RGBA, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR16161616F, 8 } } },
+
+   { __DRI_IMAGE_FOURCC_XBGR16161616F, __DRI_IMAGE_COMPONENTS_RGB, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR16161616F, 8 } } },
+
    { __DRI_IMAGE_FOURCC_ARGB2101010, __DRI_IMAGE_COMPONENTS_RGBA, 1,
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB2101010, 4 } } },
 
@@ -1383,7 +1390,8 @@ intel_query_dma_buf_modifiers(__DRIscreen *_screen, int fourcc, int max,
       for (i = 0; i < num_mods && i < max; i++) {
          if (f->components == __DRI_IMAGE_COMPONENTS_Y_U_V ||
              f->components == __DRI_IMAGE_COMPONENTS_Y_UV ||
-             f->components == __DRI_IMAGE_COMPONENTS_Y_XUXV) {
+             f->components == __DRI_IMAGE_COMPONENTS_Y_XUXV ||
+             f->components == __DRI_IMAGE_COMPONENTS_Y_UXVX) {
             external_only[i] = GL_TRUE;
          }
          else {
@@ -1732,7 +1740,11 @@ intelCreateBuffer(__DRIscreen *dri_screen,
       fb->Visual.samples = num_samples;
    }
 
-   if (mesaVis->redBits == 10 && mesaVis->alphaBits > 0) {
+   if (mesaVis->redBits == 16 && mesaVis->alphaBits > 0 && mesaVis->floatMode) {
+      rgbFormat = MESA_FORMAT_RGBA_FLOAT16;
+   } else if (mesaVis->redBits == 16 && mesaVis->floatMode) {
+      rgbFormat = MESA_FORMAT_RGBX_FLOAT16;
+   } else if (mesaVis->redBits == 10 && mesaVis->alphaBits > 0) {
       rgbFormat = mesaVis->redMask == 0x3ff00000 ? MESA_FORMAT_B10G10R10A2_UNORM
                                                  : MESA_FORMAT_R10G10B10A2_UNORM;
    } else if (mesaVis->redBits == 10) {
@@ -2146,6 +2158,45 @@ intel_loader_get_cap(const __DRIscreen *dri_screen, enum dri_loader_cap cap)
    return 0;
 }
 
+static bool
+intel_allowed_format(__DRIscreen *dri_screen, mesa_format format)
+{
+   struct intel_screen *screen = dri_screen->driverPrivate;
+
+   /* Expose only BGRA ordering if the loader doesn't support RGBA ordering. */
+   bool allow_rgba_ordering = intel_loader_get_cap(dri_screen, DRI_LOADER_CAP_RGBA_ORDERING);
+   if (!allow_rgba_ordering &&
+       (format == MESA_FORMAT_R8G8B8A8_UNORM ||
+        format == MESA_FORMAT_R8G8B8X8_UNORM ||
+        format == MESA_FORMAT_R8G8B8A8_SRGB))
+      return false;
+
+    /* Shall we expose 10 bpc formats? */
+   bool allow_rgb10_configs = driQueryOptionb(&screen->optionCache,
+                                              "allow_rgb10_configs");
+   if (!allow_rgb10_configs &&
+       (format == MESA_FORMAT_B10G10R10A2_UNORM ||
+        format == MESA_FORMAT_B10G10R10X2_UNORM))
+      return false;
+
+   /* Shall we expose 565 formats? */
+   bool allow_rgb565_configs = driQueryOptionb(&screen->optionCache,
+                                               "allow_rgb565_configs");
+   if (!allow_rgb565_configs && format == MESA_FORMAT_B5G6R5_UNORM)
+      return false;
+
+   /* Shall we expose fp16 formats? */
+   bool allow_fp16_configs = driQueryOptionb(&screen->optionCache,
+                                             "allow_fp16_configs");
+   allow_fp16_configs &= intel_loader_get_cap(dri_screen, DRI_LOADER_CAP_FP16);
+   if (!allow_fp16_configs &&
+       (format == MESA_FORMAT_RGBA_FLOAT16 ||
+        format == MESA_FORMAT_RGBX_FLOAT16))
+      return false;
+
+   return true;
+}
+
 static __DRIconfig**
 intel_screen_make_configs(__DRIscreen *dri_screen)
 {
@@ -2160,6 +2211,9 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
       MESA_FORMAT_B10G10R10A2_UNORM,
       MESA_FORMAT_B10G10R10X2_UNORM,
 
+      MESA_FORMAT_RGBA_FLOAT16,
+      MESA_FORMAT_RGBX_FLOAT16,
+
       /* The 32-bit RGBA format must not precede the 32-bit BGRA format.
        * Likewise for RGBX and BGRX.  Otherwise, the GLX client and the GLX
        * server may disagree on which format the GLXFBConfig represents,
@@ -2196,19 +2250,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
    uint8_t depth_bits[4], stencil_bits[4];
    __DRIconfig **configs = NULL;
 
-   /* Expose only BGRA ordering if the loader doesn't support RGBA ordering. */
-   unsigned num_formats;
-   if (intel_loader_get_cap(dri_screen, DRI_LOADER_CAP_RGBA_ORDERING))
-      num_formats = ARRAY_SIZE(formats);
-   else
-      num_formats = ARRAY_SIZE(formats) - 3; /* all - RGBA_ORDERING formats */
-
-   /* Shall we expose 10 bpc formats? */
-   bool allow_rgb10_configs = driQueryOptionb(&screen->optionCache,
-                                              "allow_rgb10_configs");
-   /* Shall we expose 565 formats? */
-   bool allow_rgb565_configs = driQueryOptionb(&screen->optionCache,
-                                               "allow_rgb565_configs");
+   unsigned num_formats = ARRAY_SIZE(formats);
 
    /* Generate singlesample configs, each without accumulation buffer
     * and with EGL_MUTABLE_RENDER_BUFFER_BIT_KHR.
@@ -2217,12 +2259,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
       __DRIconfig **new_configs;
       int num_depth_stencil_bits = 2;
 
-      if (!allow_rgb10_configs &&
-          (formats[i] == MESA_FORMAT_B10G10R10A2_UNORM ||
-           formats[i] == MESA_FORMAT_B10G10R10X2_UNORM))
-         continue;
-
-      if (!allow_rgb565_configs && formats[i] == MESA_FORMAT_B5G6R5_UNORM)
+      if (!intel_allowed_format(dri_screen, formats[i]))
          continue;
 
       /* Starting with DRI2 protocol version 1.1 we can request a depth/stencil
@@ -2262,12 +2299,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
    for (unsigned i = 0; i < num_formats; i++) {
       __DRIconfig **new_configs;
 
-      if (!allow_rgb10_configs &&
-          (formats[i] == MESA_FORMAT_B10G10R10A2_UNORM ||
-          formats[i] == MESA_FORMAT_B10G10R10X2_UNORM))
-         continue;
-
-      if (!allow_rgb565_configs && formats[i] == MESA_FORMAT_B5G6R5_UNORM)
+      if (!intel_allowed_format(dri_screen, formats[i]))
          continue;
 
       if (formats[i] == MESA_FORMAT_B5G6R5_UNORM) {
@@ -2303,12 +2335,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
       if (devinfo->gen < 6)
          break;
 
-      if (!allow_rgb10_configs &&
-          (formats[i] == MESA_FORMAT_B10G10R10A2_UNORM ||
-          formats[i] == MESA_FORMAT_B10G10R10X2_UNORM))
-         continue;
-
-      if (!allow_rgb565_configs && formats[i] == MESA_FORMAT_B5G6R5_UNORM)
+      if (!intel_allowed_format(dri_screen, formats[i]))
          continue;
 
       __DRIconfig **new_configs;
diff --git a/src/mesa/drivers/osmesa/meson.build b/src/mesa/drivers/osmesa/meson.build
index a406bb3c210..c479b740131 100644
--- a/src/mesa/drivers/osmesa/meson.build
+++ b/src/mesa/drivers/osmesa/meson.build
@@ -33,7 +33,8 @@ libosmesa = shared_library(
   include_directories : [
     inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux,
   ],
-  link_with : [libmesa_classic, libglapi_static, osmesa_link_with],
+  link_whole : libglapi_static,
+  link_with : [libmesa_classic, osmesa_link_with],
   dependencies : [dep_thread, dep_selinux],
   version : '8.0.0',
   install : true,
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index a46599a2872..4e48b76fcb8 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -84,14 +84,8 @@ supported_buffer_bitmask(const struct gl_context *ctx,
    return mask;
 }
 
-
-/**
- * Helper routine used by glDrawBuffer and glDrawBuffersARB.
- * Given a GLenum naming one or more color buffers (such as
- * GL_FRONT_AND_BACK), return the corresponding bitmask of BUFFER_BIT_* flags.
- */
-static GLbitfield
-draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer)
+static GLenum
+back_to_front_if_single_buffered(const struct gl_context *ctx, GLenum buffer)
 {
    /* If the front buffer is the only buffer, GL_BACK and all other flags
     * that include BACK select the front buffer for drawing. There are
@@ -129,6 +123,19 @@ draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer)
       }
    }
 
+   return buffer;
+}
+
+/**
+ * Helper routine used by glDrawBuffer and glDrawBuffersARB.
+ * Given a GLenum naming one or more color buffers (such as
+ * GL_FRONT_AND_BACK), return the corresponding bitmask of BUFFER_BIT_* flags.
+ */
+static GLbitfield
+draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer)
+{
+   buffer = back_to_front_if_single_buffered(ctx, buffer);
+
    switch (buffer) {
       case GL_NONE:
          return 0;
@@ -192,20 +199,12 @@ draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer)
 static gl_buffer_index
 read_buffer_enum_to_index(const struct gl_context *ctx, GLenum buffer)
 {
+   buffer = back_to_front_if_single_buffered(ctx, buffer);
+
    switch (buffer) {
       case GL_FRONT:
          return BUFFER_FRONT_LEFT;
       case GL_BACK:
-         if (_mesa_is_gles(ctx)) {
-            /* In draw_buffer_enum_to_bitmask, when GLES contexts draw to
-             * GL_BACK with a single-buffered configuration, we actually end
-             * up drawing to the sole front buffer in our internal
-             * representation.  For consistency, we must read from that
-             * front left buffer too.
-             */
-            if (!ctx->DrawBuffer->Visual.doubleBufferMode)
-               return BUFFER_FRONT_LEFT;
-         }
          return BUFFER_BACK_LEFT;
       case GL_RIGHT:
          return BUFFER_FRONT_RIGHT;
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 2c3d9a11ce3..dddcf3da0c5 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -361,6 +361,7 @@ static void
 one_time_fini(void)
 {
    _mesa_destroy_shader_compiler();
+   _mesa_destroy_shader_compiler_types();
    _mesa_locale_fini();
 }
 
@@ -393,6 +394,8 @@ one_time_init( struct gl_context *ctx )
 
       _mesa_locale_init();
 
+      _mesa_init_shader_compiler_types();
+
       _mesa_one_time_init_extension_overrides(ctx);
 
       _mesa_get_cpu_features();
@@ -616,6 +619,17 @@ _mesa_init_constants(struct gl_constants *consts, gl_api api)
    consts->MaxProgramMatrices = MAX_PROGRAM_MATRICES;
    consts->MaxProgramMatrixStackDepth = MAX_PROGRAM_MATRIX_STACK_DEPTH;
 
+   /* Set the absolute minimum possible GLSL version.  API_OPENGL_CORE can
+    * mean an OpenGL 3.0 forward-compatible context, so that implies a minimum
+    * possible version of 1.30.  Otherwise, the minimum possible version 1.20.
+    * Since Mesa unconditionally advertises GL_ARB_shading_language_100 and
+    * GL_ARB_shader_objects, every driver has GLSL 1.20... even if they don't
+    * advertise any extensions to enable any shader stages (e.g.,
+    * GL_ARB_vertex_shader).
+    */
+   consts->GLSLVersion = api == API_OPENGL_CORE ? 130 : 120;
+   consts->GLSLVersionCompat = consts->GLSLVersion;
+
    /* Assume that if GLSL 1.30+ (or GLSL ES 3.00+) is supported that
     * gl_VertexID is implemented using a native hardware register with OpenGL
     * semantics.
@@ -1544,9 +1558,12 @@ check_compatible(const struct gl_context *ctx,
        ctxvis->foo != bufvis->foo)     \
       return GL_FALSE
 
-   check_component(redMask);
-   check_component(greenMask);
-   check_component(blueMask);
+   check_component(redShift);
+   check_component(greenShift);
+   check_component(blueShift);
+   check_component(redBits);
+   check_component(greenBits);
+   check_component(blueBits);
    check_component(depthBits);
    check_component(stencilBits);
 
diff --git a/src/mesa/main/format_utils.h b/src/mesa/main/format_utils.h
index 78365cab533..3a528ceb3db 100644
--- a/src/mesa/main/format_utils.h
+++ b/src/mesa/main/format_utils.h
@@ -87,7 +87,7 @@ _mesa_float_to_unorm(float x, unsigned dst_bits)
    else if (x > 1.0f)
       return MAX_UINT(dst_bits);
    else
-      return _mesa_lroundevenf(x * MAX_UINT(dst_bits));
+      return _mesa_i64roundevenf(x * MAX_UINT(dst_bits));
 }
 
 static inline unsigned
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 0ca87561f05..6ba70d0809c 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -170,6 +170,7 @@ struct gl_config
 
    GLint redBits, greenBits, blueBits, alphaBits;	/* bits per comp */
    GLuint redMask, greenMask, blueMask, alphaMask;
+   GLint redShift, greenShift, blueShift, alphaShift;
    GLint rgbBits;		/* total bits for rgb */
    GLint indexBits;		/* total bits for colorindex */
 
diff --git a/src/mesa/main/program_binary.c b/src/mesa/main/program_binary.c
index 7390fef5887..39537cfccce 100644
--- a/src/mesa/main/program_binary.c
+++ b/src/mesa/main/program_binary.c
@@ -178,6 +178,8 @@ write_program_payload(struct gl_context *ctx, struct blob *blob,
                                                       shader->Program);
    }
 
+   blob_write_uint32(blob, sh_prog->SeparateShader);
+
    serialize_glsl_program(blob, ctx, sh_prog);
 
    for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
@@ -195,6 +197,8 @@ static bool
 read_program_payload(struct gl_context *ctx, struct blob_reader *blob,
                      GLenum binary_format, struct gl_shader_program *sh_prog)
 {
+   sh_prog->SeparateShader = blob_read_uint32(blob);
+
    if (!deserialize_glsl_program(blob, ctx, sh_prog))
       return false;
 
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 6b73e6c7e7a..9564664daa3 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -162,6 +162,8 @@ _mesa_free_shader_state(struct gl_context *ctx)
       _mesa_reference_shader_program(ctx,
                                      &ctx->Shader.ReferencedPrograms[i],
                                      NULL);
+      free(ctx->SubroutineIndex[i].IndexPtr);
+      ctx->SubroutineIndex[i].IndexPtr = NULL;
    }
    _mesa_reference_shader_program(ctx, &ctx->Shader.ActiveProgram, NULL);
 
diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c
index 242a8c29909..219674be086 100644
--- a/src/mesa/main/shaderimage.c
+++ b/src/mesa/main/shaderimage.c
@@ -588,11 +588,11 @@ set_image_binding(struct gl_image_unit *u, struct gl_texture_object *texObj,
    if (texObj && _mesa_tex_target_is_layered(texObj->Target)) {
       u->Layered = layered;
       u->Layer = layer;
-      u->_Layer = (u->Layered ? 0 : u->Layer);
    } else {
       u->Layered = GL_FALSE;
       u->Layer = 0;
    }
+   u->_Layer = (u->Layered ? 0 : u->Layer);
 
    _mesa_reference_texobj(&u->TexObj, texObj);
 }
diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk
index c6470e6289e..13d0da85882 100644
--- a/src/mesa/program/Android.mk
+++ b/src/mesa/program/Android.mk
@@ -41,7 +41,7 @@ endef
 include $(MESA_TOP)/src/mesa/Makefile.sources
 
 include $(CLEAR_VARS)
-
+LOCAL_CFLAGS += -Wno-error
 LOCAL_MODULE := libmesa_program
 LOCAL_MODULE_CLASS := STATIC_LIBRARIES
 LOCAL_STATIC_LIBRARIES := libmesa_nir \
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index f875c00238f..005b855230b 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2506,8 +2506,7 @@ _mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx,
 void
 _mesa_associate_uniform_storage(struct gl_context *ctx,
                                 struct gl_shader_program *shader_program,
-                                struct gl_program *prog,
-                                bool propagate_to_storage)
+                                struct gl_program *prog)
 {
    struct gl_program_parameter_list *params = prog->Parameters;
    gl_shader_stage shader_type = prog->info.stage;
@@ -2633,26 +2632,24 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,
           * data from the linker's backing store.  This will cause values from
           * initializers in the source code to be copied over.
           */
-         if (propagate_to_storage) {
-            unsigned array_elements = MAX2(1, storage->array_elements);
-            if (ctx->Const.PackedDriverUniformStorage && !prog->is_arb_asm &&
-                (storage->is_bindless || !storage->type->contains_opaque())) {
-               const int dmul = storage->type->is_64bit() ? 2 : 1;
-               const unsigned components =
-                  storage->type->vector_elements *
-                  storage->type->matrix_columns;
-
-               for (unsigned s = 0; s < storage->num_driver_storage; s++) {
-                  gl_constant_value *uni_storage = (gl_constant_value *)
-                     storage->driver_storage[s].data;
-                  memcpy(uni_storage, storage->storage,
-                         sizeof(storage->storage[0]) * components *
-                         array_elements * dmul);
-               }
-            } else {
-               _mesa_propagate_uniforms_to_driver_storage(storage, 0,
-                                                          array_elements);
+         unsigned array_elements = MAX2(1, storage->array_elements);
+         if (ctx->Const.PackedDriverUniformStorage && !prog->is_arb_asm &&
+             (storage->is_bindless || !storage->type->contains_opaque())) {
+            const int dmul = storage->type->is_64bit() ? 2 : 1;
+            const unsigned components =
+               storage->type->vector_elements *
+               storage->type->matrix_columns;
+
+            for (unsigned s = 0; s < storage->num_driver_storage; s++) {
+               gl_constant_value *uni_storage = (gl_constant_value *)
+                  storage->driver_storage[s].data;
+               memcpy(uni_storage, storage->storage,
+                      sizeof(storage->storage[0]) * components *
+                      array_elements * dmul);
             }
+         } else {
+            _mesa_propagate_uniforms_to_driver_storage(storage, 0,
+                                                       array_elements);
          }
 
 	      last_location = location;
@@ -3011,7 +3008,7 @@ get_mesa_program(struct gl_context *ctx,
     * prog->ParameterValues to get reallocated (e.g., anything that adds a
     * program constant) has to happen before creating this linkage.
     */
-   _mesa_associate_uniform_storage(ctx, shader_program, prog, true);
+   _mesa_associate_uniform_storage(ctx, shader_program, prog);
    if (!shader_program->data->LinkStatus) {
       goto fail_exit;
    }
diff --git a/src/mesa/program/ir_to_mesa.h b/src/mesa/program/ir_to_mesa.h
index f5665e6316e..33eb801bae8 100644
--- a/src/mesa/program/ir_to_mesa.h
+++ b/src/mesa/program/ir_to_mesa.h
@@ -50,8 +50,7 @@ _mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx,
 void
 _mesa_associate_uniform_storage(struct gl_context *ctx,
                                 struct gl_shader_program *shader_program,
-                                struct gl_program *prog,
-                                bool propagate_to_storage);
+                                struct gl_program *prog);
 
 #ifdef __cplusplus
 }
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
index 3a49bd4d6c1..593d15331fd 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -176,8 +176,10 @@ set_vertex_shader(struct st_context *st)
       if (use_nir) {
          st->clear.vs = make_nir_clear_vertex_shader(st, false);
       } else {
-         const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
-                                         TGSI_SEMANTIC_GENERIC };
+         const enum tgsi_semantic semantic_names[] = {
+            TGSI_SEMANTIC_POSITION,
+            TGSI_SEMANTIC_GENERIC
+         };
          const uint semantic_indexes[] = { 0, 0 };
          st->clear.vs = util_make_vertex_passthrough_shader(st->pipe, 2,
                                                             semantic_names,
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 6f6b42596e6..fe6be3ab048 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -324,10 +324,12 @@ st_make_passthrough_vertex_shader(struct st_context *st)
                                         MESA_SHADER_VERTEX, 3,
                                         inputs, outputs, NULL, 0);
    } else {
-      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
-                                      TGSI_SEMANTIC_COLOR,
-        st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD :
-                                      TGSI_SEMANTIC_GENERIC };
+      const enum tgsi_semantic semantic_names[] = {
+         TGSI_SEMANTIC_POSITION,
+         TGSI_SEMANTIC_COLOR,
+         st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD :
+                                       TGSI_SEMANTIC_GENERIC
+      };
       const uint semantic_indexes[] = { 0, 0, 0 };
 
       st->passthrough_vs =
diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c
index f08ffde6b01..bba1ce41376 100644
--- a/src/mesa/state_tracker/st_cb_drawtex.c
+++ b/src/mesa/state_tracker/st_cb_drawtex.c
@@ -92,7 +92,7 @@ semantic_to_varying_slot(unsigned semantic)
 static void *
 lookup_shader(struct st_context *st,
               uint num_attribs,
-              const uint *semantic_names,
+              const enum tgsi_semantic *semantic_names,
               const uint *semantic_indexes)
 {
    struct pipe_context *pipe = st->pipe;
@@ -168,7 +168,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
    struct pipe_resource *vbuffer = NULL;
    GLuint i, numTexCoords, numAttribs;
    GLboolean emitColor;
-   uint semantic_names[2 + MAX_TEXTURE_UNITS];
+   enum tgsi_semantic semantic_names[2 + MAX_TEXTURE_UNITS];
    uint semantic_indexes[2 + MAX_TEXTURE_UNITS];
    struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS];
    unsigned offset;
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 5fe6c79a93a..5e3425a73a6 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -415,9 +415,15 @@ st_new_renderbuffer_fb(enum pipe_format format, unsigned samples, boolean sw)
    case PIPE_FORMAT_R32G32B32A32_FLOAT:
       strb->Base.InternalFormat = GL_RGBA32F;
       break;
+   case PIPE_FORMAT_R32G32B32X32_FLOAT:
+      strb->Base.InternalFormat = GL_RGB32F;
+      break;
    case PIPE_FORMAT_R16G16B16A16_FLOAT:
       strb->Base.InternalFormat = GL_RGBA16F;
       break;
+   case PIPE_FORMAT_R16G16B16X16_FLOAT:
+      strb->Base.InternalFormat = GL_RGB16F;
+      break;
    default:
       _mesa_problem(NULL,
                     "Unexpected format %s in st_new_renderbuffer_fb",
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 8f2acafbca3..3c48d176ca4 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -645,7 +645,7 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe,
                                         PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET));
 
    /* GL limits and extensions */
-   st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions, ctx->API);
+   st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions);
    st_init_extensions(pipe->screen, &ctx->Const,
                       &ctx->Extensions, &st->options, ctx->API);
 
@@ -913,6 +913,19 @@ destroy_tex_sampler_cb(GLuint id, void *data, void *userData)
    st_texture_release_context_sampler_view(st, st_texture_object(texObj));
 }
 
+static void
+destroy_framebuffer_attachment_sampler_cb(GLuint id, void *data, void *userData)
+{
+   struct gl_framebuffer* glfb = (struct gl_framebuffer*) data;
+   struct st_context *st = (struct st_context *) userData;
+
+    for (unsigned i = 0; i < BUFFER_COUNT; i++) {
+      struct gl_renderbuffer_attachment *att = &glfb->Attachment[i];
+      if (att->Texture) {
+        st_texture_release_context_sampler_view(st, st_texture_object(att->Texture));
+      }
+   }
+}
 
 void
 st_destroy_context(struct st_context *st)
@@ -971,6 +984,8 @@ st_destroy_context(struct st_context *st)
       st_framebuffer_reference(&stfb, NULL);
    }
 
+   _mesa_HashWalk(ctx->Shared->FrameBuffers, destroy_framebuffer_attachment_sampler_cb, st);
+
    pipe_sampler_view_reference(&st->pixel_xfer.pixelmap_sampler_view, NULL);
    pipe_resource_reference(&st->pixel_xfer.pixelmap_texture, NULL);
 
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 560dd7b31a4..371e7d2ec8f 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -76,8 +76,7 @@ static int _clamp(int a, int min, int max)
  * Note that we have to limit/clamp against Mesa's internal limits too.
  */
 void st_init_limits(struct pipe_screen *screen,
-                    struct gl_constants *c, struct gl_extensions *extensions,
-                    gl_api api)
+                    struct gl_constants *c, struct gl_extensions *extensions)
 {
    int supported_irs;
    unsigned sh;
@@ -449,14 +448,8 @@ void st_init_limits(struct pipe_screen *screen,
    c->GLSLFrontFacingIsSysVal =
       screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL);
 
-   /* GL_ARB_get_program_binary
-    *
-    * The QT framework has a bug in their shader program cache, which is built
-    * on GL_ARB_get_program_binary. In an effort to allow them to fix the bug
-    * we don't enable more than 1 binary format for compatibility profiles.
-    */
-   if (api != API_OPENGL_COMPAT &&
-       screen->get_disk_shader_cache && screen->get_disk_shader_cache(screen))
+   /* GL_ARB_get_program_binary */
+   if (screen->get_disk_shader_cache && screen->get_disk_shader_cache(screen))
       c->NumProgramBinaryFormats = 1;
 
    c->MaxAtomicBufferBindings =
diff --git a/src/mesa/state_tracker/st_extensions.h b/src/mesa/state_tracker/st_extensions.h
index fdfac7ece70..7bf1aa8c8cb 100644
--- a/src/mesa/state_tracker/st_extensions.h
+++ b/src/mesa/state_tracker/st_extensions.h
@@ -35,8 +35,7 @@ struct pipe_screen;
 
 extern void st_init_limits(struct pipe_screen *screen,
                            struct gl_constants *c,
-                           struct gl_extensions *extensions,
-                           gl_api api);
+                           struct gl_extensions *extensions);
 
 extern void st_init_extensions(struct pipe_screen *screen,
                                struct gl_constants *consts,
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 97b2831b880..12ea1ce55b8 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -500,7 +500,7 @@ st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
     * prog->ParameterValues to get reallocated (e.g., anything that adds a
     * program constant) has to happen before creating this linkage.
     */
-   _mesa_associate_uniform_storage(st->ctx, shader_program, prog, true);
+   _mesa_associate_uniform_storage(st->ctx, shader_program, prog);
 
    st_set_prog_affected_state_flags(prog);
 
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f2344703d71..18a5571aaa8 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7247,7 +7247,7 @@ get_mesa_program_tgsi(struct gl_context *ctx,
     * prog->ParameterValues to get reallocated (e.g., anything that adds a
     * program constant) has to happen before creating this linkage.
     */
-   _mesa_associate_uniform_storage(ctx, shader_program, prog, true);
+   _mesa_associate_uniform_storage(ctx, shader_program, prog);
    if (!shader_program->data->LinkStatus) {
       free_glsl_to_tgsi_visitor(v);
       _mesa_reference_program(ctx, &shader->Program, NULL);
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index bee1f6b1366..ff0bec8f569 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -1105,10 +1105,17 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi,
    else {
       GET_CURRENT_CONTEXT(ctx);
 
-      ret = _mesa_make_current(NULL, NULL, NULL);
-
-      if (ctx)
+      if (ctx) {
+         /* Before releasing the context, release its associated
+          * winsys buffers first. Then purge the context's winsys buffers list
+          * to free the resources of any winsys buffers that no longer have
+          * an existing drawable.
+          */
+         ret = _mesa_make_current(ctx, NULL, NULL);
          st_framebuffers_purge(ctx->st);
+      }
+
+      ret = _mesa_make_current(NULL, NULL, NULL);
    }
 
    return ret;
@@ -1262,7 +1269,7 @@ get_version(struct pipe_screen *screen,
    _mesa_init_constants(&consts, api);
    _mesa_init_extensions(&extensions);
 
-   st_init_limits(screen, &consts, &extensions, api);
+   st_init_limits(screen, &consts, &extensions);
    st_init_extensions(screen, &consts, &extensions, options, api);
 
    return _mesa_get_version(&extensions, &consts, api);
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 9f6e492d6fb..769b02b24fc 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -428,7 +428,7 @@ static nir_shader *
 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
                          gl_shader_stage stage)
 {
-   enum pipe_shader_type p_stage = stage; /* valid for VS/FS */
+   enum pipe_shader_type p_stage = pipe_shader_type_from_mesa(stage);
    const bool is_scalar =
       st->pipe->screen->get_shader_param(st->pipe->screen, p_stage,
                                          PIPE_SHADER_CAP_SCALAR_ISA);
diff --git a/src/mesa/state_tracker/st_shader_cache.c b/src/mesa/state_tracker/st_shader_cache.c
index b18829754cb..ae1602310db 100644
--- a/src/mesa/state_tracker/st_shader_cache.c
+++ b/src/mesa/state_tracker/st_shader_cache.c
@@ -366,7 +366,7 @@ st_deserialise_ir_program(struct gl_context *ctx,
    }
 
    st_set_prog_affected_state_flags(prog);
-   _mesa_associate_uniform_storage(ctx, shProg, prog, false);
+   _mesa_associate_uniform_storage(ctx, shProg, prog);
 
    /* Create Gallium shaders now instead of on demand. */
    if (ST_DEBUG & DEBUG_PRECOMPILE ||
diff --git a/src/mesa/state_tracker/st_tgsi_lower_yuv.c b/src/mesa/state_tracker/st_tgsi_lower_yuv.c
index 6acd173adc9..73437ddda70 100644
--- a/src/mesa/state_tracker/st_tgsi_lower_yuv.c
+++ b/src/mesa/state_tracker/st_tgsi_lower_yuv.c
@@ -269,31 +269,39 @@ yuv_to_rgb(struct tgsi_transform_context *tctx,
    tctx->emit_instruction(tctx, &inst);
 
    /* DP3 dst.x, tmpA, imm[0] */
-   inst = dp3_instruction();
-   reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
-   reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
-   reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
-   tctx->emit_instruction(tctx, &inst);
+   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
+      inst = dp3_instruction();
+      reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
+      reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+      reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
+      tctx->emit_instruction(tctx, &inst);
+   }
 
    /* DP3 dst.y, tmpA, imm[1] */
-   inst = dp3_instruction();
-   reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
-   reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
-   reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
-   tctx->emit_instruction(tctx, &inst);
+   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
+      inst = dp3_instruction();
+      reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
+      reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+      reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
+      tctx->emit_instruction(tctx, &inst);
+   }
 
    /* DP3 dst.z, tmpA, imm[2] */
-   inst = dp3_instruction();
-   reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
-   reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
-   reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
-   tctx->emit_instruction(tctx, &inst);
+   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
+      inst = dp3_instruction();
+      reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
+      reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+      reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
+      tctx->emit_instruction(tctx, &inst);
+   }
 
    /* MOV dst.w, imm[0].x */
-   inst = mov_instruction();
-   reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
-   reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
-   tctx->emit_instruction(tctx, &inst);
+   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
+      inst = mov_instruction();
+      reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
+      reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
+      tctx->emit_instruction(tctx, &inst);
+   }
 }
 
 static void
@@ -434,7 +442,7 @@ st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots,
    /* TODO better job of figuring out how many extra tokens we need..
     * this is a pain about tgsi_transform :-/
     */
-   newlen = tgsi_num_tokens(tokens) + 120;
+   newlen = tgsi_num_tokens(tokens) + 300;
    newtoks = tgsi_alloc_tokens(newlen);
    if (!newtoks)
       return NULL;
diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
index f62315498b2..6389b796d33 100644
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -111,6 +111,11 @@ TODO: document the other workarounds.
             <option name="allow_glsl_builtin_variable_redeclaration" value="true" />
         </application>
 
+        <application name="Doom 3: BFG" executable="Doom3BFG.exe">
+            <option name="allow_glsl_builtin_variable_redeclaration" value="true" />
+            <option name="force_glsl_extensions_warn" value="true" />
+        </application>
+
         <application name="Dying Light" executable="DyingLightGame">
             <option name="allow_glsl_builtin_variable_redeclaration" value="true" />
         </application>
@@ -463,6 +468,9 @@ TODO: document the other workarounds.
         <application name="ARK: Survival Evolved (and unintentionally the UE4 demo template)" executable="ShooterGame">
             <option name="radeonsi_clear_db_cache_before_clear" value="true" />
         </application>
+        <application name="Counter-Strike Global Offensive" executable="csgo_linux64">
+            <option name="radeonsi_zerovram" value="true" />
+        </application>
         <application name="No Mans Sky" executable="NMS.exe">
             <option name="radeonsi_zerovram" value="true" />
         </application>
diff --git a/src/util/android_trace.h b/src/util/android_trace.h
new file mode 100644
index 00000000000..cf90e984c87
--- /dev/null
+++ b/src/util/android_trace.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef ANDROID_TRACE_H
+#define ANDROID_TRACE_H
+
+#ifdef __ANDROID__
+
+#define ATRACE_TAG ATRACE_TAG_GRAPHICS
+#define LOG_TAG "mesa3d_intel"
+#include <cutils/log.h>
+#include <cutils/trace.h>
+
+#define MTRACE_BEGIN() ATRACE_BEGIN(__func__)
+#define MTRACE_END() ATRACE_END()
+
+#else
+
+#define MTRACE_BEGIN() ((void)0)
+#define MTRACE_END() ((void)0)
+
+#endif
+#endif
diff --git a/src/util/bitscan.h b/src/util/bitscan.h
index dc89ac93f28..02b3afda7f9 100644
--- a/src/util/bitscan.h
+++ b/src/util/bitscan.h
@@ -286,6 +286,38 @@ u_bit_consecutive64(unsigned start, unsigned count)
    return (((uint64_t)1 << count) - 1) << start;
 }
 
+/**
+ * Return number of bits set in n.
+ */
+static inline unsigned
+util_bitcount(unsigned n)
+{
+#if defined(HAVE___BUILTIN_POPCOUNT)
+   return __builtin_popcount(n);
+#else
+   /* K&R classic bitcount.
+    *
+    * For each iteration, clear the LSB from the bitfield.
+    * Requires only one iteration per set bit, instead of
+    * one iteration per bit less than highest set bit.
+    */
+   unsigned bits;
+   for (bits = 0; n; bits++) {
+      n &= n - 1;
+   }
+   return bits;
+#endif
+}
+
+static inline unsigned
+util_bitcount64(uint64_t n)
+{
+#ifdef HAVE___BUILTIN_POPCOUNTLL
+   return __builtin_popcountll(n);
+#else
+   return util_bitcount(n) + util_bitcount(n >> 32);
+#endif
+}
 
 #ifdef __cplusplus
 }
diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 9977c385de1..bc3becfe535 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -732,7 +732,7 @@ static size_t
 deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest,
                           const char *filename)
 {
-   unsigned char out[BUFSIZE];
+   unsigned char *out;
 
    /* allocate deflate state */
    z_stream strm;
@@ -749,6 +749,11 @@ deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest,
    /* compress until end of in_data */
    size_t compressed_size = 0;
    int flush;
+
+   out = malloc(BUFSIZE * sizeof(unsigned char));
+   if (out == NULL)
+      return 0;
+
    do {
       int remaining = in_data_size - BUFSIZE;
       flush = remaining > 0 ? Z_NO_FLUSH : Z_FINISH;
@@ -770,6 +775,7 @@ deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest,
          ssize_t written = write_all(dest, out, have);
          if (written == -1) {
             (void)deflateEnd(&strm);
+            free(out);
             return 0;
          }
       } while (strm.avail_out == 0);
@@ -784,6 +790,7 @@ deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest,
 
    /* clean up and return */
    (void)deflateEnd(&strm);
+   free(out);
    return compressed_size;
 }
 
diff --git a/src/util/futex.h b/src/util/futex.h
index a8b2c0f3527..268af92882a 100644
--- a/src/util/futex.h
+++ b/src/util/futex.h
@@ -69,15 +69,15 @@ static inline int futex_wake(uint32_t *addr, int count)
 static inline int futex_wait(uint32_t *addr, int32_t value, struct timespec *timeout)
 {
    void *uaddr = NULL, *uaddr2 = NULL;
+   struct _umtx_time tmo = {
+      ._flags = UMTX_ABSTIME,
+      ._clockid = CLOCK_MONOTONIC
+   };
 
    assert(value == (int)(uint32_t)value); /* Check that bits weren't discarded */
 
    if (timeout != NULL) {
-      const struct _umtx_time tmo = {
-         ._timeout = *timeout,
-         ._flags = UMTX_ABSTIME,
-         ._clockid = CLOCK_MONOTONIC
-      };
+      tmo._timeout = *timeout;
       uaddr = (void *)(uintptr_t)sizeof(tmo);
       uaddr2 = (void *)&tmo;
    }
diff --git a/src/util/os_file.c b/src/util/os_file.c
index ee34a75a2e0..18727d1121f 100644
--- a/src/util/os_file.c
+++ b/src/util/os_file.c
@@ -29,8 +29,10 @@ readN(int fd, char *buf, size_t len)
       if (ret == -EINTR || ret == -EAGAIN)
          continue;
 
-      if (ret <= 0)
+      if (ret <= 0) {
+         err = ret;
          break;
+      }
 
       total += ret;
    } while (total != len);
@@ -38,11 +40,29 @@ readN(int fd, char *buf, size_t len)
    return total ? total : err;
 }
 
-static char *
-read_grow(int fd)
+char *
+os_read_file(const char *filename)
 {
+   /* Note that this also serves as a slight margin to avoid a 2x grow when
+    * the file is just a few bytes larger when we read it than when we
+    * fstat'ed it.
+    * The string's NULL terminator is also included in here.
+    */
    size_t len = 64;
 
+   int fd = open(filename, O_RDONLY);
+   if (fd == -1) {
+      /* errno set by open() */
+      return NULL;
+   }
+
+   /* Pre-allocate a buffer at least the size of the file if we can read
+    * that information.
+    */
+   struct stat stat;
+   if (fstat(fd, &stat) == 0)
+      len += stat.st_size;
+
    char *buf = malloc(len);
    if (!buf) {
       close(fd);
@@ -72,47 +92,18 @@ read_grow(int fd)
    if (read > 0)
       offset += read;
 
-   buf[offset] = '\0';
-
-   return buf;
-}
-
-char *
-os_read_file(const char *filename)
-{
-   size_t len = 0;
-
-   int fd = open(filename, O_RDONLY);
-   if (fd == -1) {
-      /* errno set by open() */
-      return NULL;
-   }
-
-   struct stat stat;
-   if (fstat(fd, &stat) == 0)
-      len = stat.st_size;
-
-   if (!len)
-      return read_grow(fd);
-
-   /* add NULL terminator */
-   len++;
-
-   char *buf = malloc(len);
-   if (!buf) {
+   /* Final resize to actual size */
+   len = offset + 1;
+   char *newbuf = realloc(buf, len);
+   if (!newbuf) {
+      free(buf);
       close(fd);
       errno = -ENOMEM;
       return NULL;
    }
+   buf = newbuf;
 
-   ssize_t read = readN(fd, buf, len - 1);
-
-   close(fd);
-
-   if (read == -1)
-      return NULL;
-
-   buf[read] = '\0';
+   buf[offset] = '\0';
 
    return buf;
 }
diff --git a/src/util/rounding.h b/src/util/rounding.h
index dfc691eaf13..fd343ab929b 100644
--- a/src/util/rounding.h
+++ b/src/util/rounding.h
@@ -129,4 +129,20 @@ _mesa_lroundeven(double x)
 #endif
 }
 
+/**
+ * \brief Rounds \c x to the nearest integer, with ties to the even integer,
+ * and returns the value as an int64_t.
+ */
+static inline int64_t
+_mesa_i64roundevenf(float x)
+{
+#if LONG_MAX == INT64_MAX
+   return _mesa_lroundevenf(x);
+#elif LONG_MAX == INT32_MAX
+   return llrintf(x);
+#else
+#error "Unsupported long size"
+#endif
+}
+
 #endif
diff --git a/src/util/u_math.h b/src/util/u_math.h
index 5e712dadb4a..e9a8284175b 100644
--- a/src/util/u_math.h
+++ b/src/util/u_math.h
@@ -550,42 +550,6 @@ util_next_power_of_two64(uint64_t x)
 #endif
 }
 
-
-/**
- * Return number of bits set in n.
- */
-static inline unsigned
-util_bitcount(unsigned n)
-{
-#if defined(HAVE___BUILTIN_POPCOUNT)
-   return __builtin_popcount(n);
-#else
-   /* K&R classic bitcount.
-    *
-    * For each iteration, clear the LSB from the bitfield.
-    * Requires only one iteration per set bit, instead of
-    * one iteration per bit less than highest set bit.
-    */
-   unsigned bits;
-   for (bits = 0; n; bits++) {
-      n &= n - 1;
-   }
-   return bits;
-#endif
-}
-
-
-static inline unsigned
-util_bitcount64(uint64_t n)
-{
-#ifdef HAVE___BUILTIN_POPCOUNTLL
-   return __builtin_popcountll(n);
-#else
-   return util_bitcount(n) + util_bitcount(n >> 32);
-#endif
-}
-
-
 /**
  * Reverse bits in n
  * Algorithm taken from:
diff --git a/src/util/u_process.c b/src/util/u_process.c
index 94c975df396..371335303ec 100644
--- a/src/util/u_process.c
+++ b/src/util/u_process.c
@@ -32,11 +32,16 @@
 
 #undef GET_PROGRAM_NAME
 
-#if (defined(__GNU_LIBRARY__) || defined(__GLIBC__)) && !defined(__UCLIBC__)
-#    if !defined(__GLIBC__) || (__GLIBC__ < 2)
-/* These aren't declared in any libc5 header */
-extern char *program_invocation_name, *program_invocation_short_name;
-#    endif
+#if defined(__linux__) && defined(HAVE_PROGRAM_INVOCATION_NAME)
+
+static char *path = NULL;
+
+static void __freeProgramPath()
+{
+   free(path);
+   path = NULL;
+}
+
 static const char *
 __getProgramName()
 {
@@ -49,14 +54,10 @@ __getProgramName()
        * Strip these arguments out by using the realpath only if it was
        * a prefix of the invocation name.
        */
-      static char *path;
-
-      if (!path)
-         /* Note: realpath() allocates memory that we will keep around for
-          * the lifetime of the app, and then leak as the app closes.
-          * FIXME: we should find a way to clean this properly
-          */
+      if (!path) {
          path = realpath("/proc/self/exe", NULL);
+         atexit(__freeProgramPath);
+      }
 
       if (path && strncmp(path, program_invocation_name, strlen(path)) == 0) {
          /* This shouldn't be null because path is a a prefix,
@@ -79,7 +80,7 @@ __getProgramName()
    return program_invocation_name;
 }
 #    define GET_PROGRAM_NAME() __getProgramName()
-#elif defined(__CYGWIN__)
+#elif defined(HAVE_PROGRAM_INVOCATION_NAME)
 #    define GET_PROGRAM_NAME() program_invocation_short_name
 #elif defined(__FreeBSD__) && (__FreeBSD__ >= 2)
 #    include <osreldate.h>
diff --git a/src/util/u_vector.h b/src/util/u_vector.h
index 6807748e3b1..95f35c59c7d 100644
--- a/src/util/u_vector.h
+++ b/src/util/u_vector.h
@@ -81,7 +81,7 @@ u_vector_finish(struct u_vector *queue)
 }
 
 #ifndef __GNUC__
-#define __builtin_types_compatible_p(x) 1
+#define __builtin_types_compatible_p(t1, t2) 1
 #endif
 
 #define u_vector_foreach(elem, queue)                                  \
diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h
index c7ebd3a4600..319d3eb5d8b 100644
--- a/src/util/xmlpool/t_options.h
+++ b/src/util/xmlpool/t_options.h
@@ -269,6 +269,11 @@ DRI_CONF_OPT_BEGIN_B(allow_rgb565_configs, def) \
 DRI_CONF_DESC(en,gettext("Allow exposure of visuals and fbconfigs with rgb565 formats")) \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_ALLOW_FP16_CONFIGS(def) \
+DRI_CONF_OPT_BEGIN_B(allow_fp16_configs, def) \
+DRI_CONF_DESC(en,gettext("Allow exposure of visuals and fbconfigs with fp16 formats")) \
+DRI_CONF_OPT_END
+
 /**
  * \brief Initialization configuration options
  */
diff --git a/src/vulkan/meson.build b/src/vulkan/meson.build
index 3d4f80e2e39..afd2911e83d 100644
--- a/src/vulkan/meson.build
+++ b/src/vulkan/meson.build
@@ -20,7 +20,6 @@
 
 vk_api_xml = files('registry/vk.xml')
 
-inc_vulkan_util = include_directories('util')
 inc_vulkan_wsi = include_directories('wsi')
 
 vulkan_wsi_args = []
diff --git a/src/vulkan/overlay-layer/meson.build b/src/vulkan/overlay-layer/meson.build
index 3fd3a105b89..326e2004b53 100644
--- a/src/vulkan/overlay-layer/meson.build
+++ b/src/vulkan/overlay-layer/meson.build
@@ -41,10 +41,10 @@ vklayer_mesa_overlay = shared_library(
   vklayer_files, overlay_spv,
   c_args : [c_vis_args, no_override_init_args, vulkan_wsi_args],
   cpp_args : [cpp_vis_args, vulkan_wsi_args],
-  dependencies : [vulkan_wsi_deps, libimgui_core_dep, dep_dl],
-  include_directories : [inc_common, inc_vulkan_util],
+  dependencies : [idep_vulkan_util, vulkan_wsi_deps, libimgui_core_dep, dep_dl],
+  include_directories : inc_common,
   link_args : cc.get_supported_link_arguments(['-Wl,-Bsymbolic-functions', '-Wl,-z,relro']),
-  link_with : [libmesa_util, libvulkan_util],
+  link_with : libmesa_util,
   install : true
 )
 
diff --git a/src/vulkan/overlay-layer/overlay.cpp b/src/vulkan/overlay-layer/overlay.cpp
index e2d5d7d0278..bc365bf6cc6 100644
--- a/src/vulkan/overlay-layer/overlay.cpp
+++ b/src/vulkan/overlay-layer/overlay.cpp
@@ -109,6 +109,23 @@ struct queue_data {
    struct list_head running_command_buffer;
 };
 
+struct overlay_draw {
+   struct list_head link;
+
+   VkCommandBuffer command_buffer;
+
+   VkSemaphore semaphore;
+   VkFence fence;
+
+   VkBuffer vertex_buffer;
+   VkDeviceMemory vertex_buffer_mem;
+   VkDeviceSize vertex_buffer_size;
+
+   VkBuffer index_buffer;
+   VkDeviceMemory index_buffer_mem;
+   VkDeviceSize index_buffer_size;
+};
+
 /* Mapped from VkSwapchainKHR */
 struct swapchain_data {
    struct device_data *device;
@@ -135,17 +152,7 @@ struct swapchain_data {
 
    VkCommandPool command_pool;
 
-   struct {
-      VkCommandBuffer command_buffer;
-
-      VkBuffer vertex_buffer;
-      VkDeviceMemory vertex_buffer_mem;
-      VkDeviceSize vertex_buffer_size;
-
-      VkBuffer index_buffer;
-      VkDeviceMemory index_buffer_mem;
-      VkDeviceSize index_buffer_size;
-   } frame_data[2];
+   struct list_head draws; /* List of struct overlay_draw */
 
    bool font_uploaded;
    VkImage font_image;
@@ -154,8 +161,6 @@ struct swapchain_data {
    VkBuffer upload_font_buffer;
    VkDeviceMemory upload_font_buffer_mem;
 
-   VkSemaphore submission_semaphore;
-
    /**/
    ImGuiContext* imgui_context;
    ImVec2 window_size;
@@ -194,49 +199,45 @@ static const VkQueryPipelineStatisticFlags overlay_query_flags =
    VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT;
 #define OVERLAY_QUERY_COUNT (11)
 
-static struct hash_table *vk_object_to_data = NULL;
+static struct hash_table_u64 *vk_object_to_data = NULL;
 static simple_mtx_t vk_object_to_data_mutex = _SIMPLE_MTX_INITIALIZER_NP;
 
 thread_local ImGuiContext* __MesaImGui;
 
 static inline void ensure_vk_object_map(void)
 {
-   if (!vk_object_to_data) {
-      vk_object_to_data = _mesa_hash_table_create(NULL,
-                                                  _mesa_hash_pointer,
-                                                  _mesa_key_pointer_equal);
-   }
+   if (!vk_object_to_data)
+      vk_object_to_data = _mesa_hash_table_u64_create(NULL);
 }
 
-#define FIND_SWAPCHAIN_DATA(obj) ((struct swapchain_data *)find_object_data((void *) obj))
-#define FIND_CMD_BUFFER_DATA(obj) ((struct command_buffer_data *)find_object_data((void *) obj))
-#define FIND_DEVICE_DATA(obj) ((struct device_data *)find_object_data((void *) obj))
-#define FIND_QUEUE_DATA(obj) ((struct queue_data *)find_object_data((void *) obj))
-#define FIND_PHYSICAL_DEVICE_DATA(obj) ((struct instance_data *)find_object_data((void *) obj))
-#define FIND_INSTANCE_DATA(obj) ((struct instance_data *)find_object_data((void *) obj))
-static void *find_object_data(void *obj)
+#define HKEY(obj) ((uint64_t)(obj))
+#define FIND_SWAPCHAIN_DATA(obj) ((struct swapchain_data *)find_object_data(HKEY(obj)))
+#define FIND_CMD_BUFFER_DATA(obj) ((struct command_buffer_data *)find_object_data(HKEY(obj)))
+#define FIND_DEVICE_DATA(obj) ((struct device_data *)find_object_data(HKEY(obj)))
+#define FIND_QUEUE_DATA(obj) ((struct queue_data *)find_object_data(HKEY(obj)))
+#define FIND_PHYSICAL_DEVICE_DATA(obj) ((struct instance_data *)find_object_data(HKEY(obj)))
+#define FIND_INSTANCE_DATA(obj) ((struct instance_data *)find_object_data(HKEY(obj)))
+static void *find_object_data(uint64_t obj)
 {
    simple_mtx_lock(&vk_object_to_data_mutex);
    ensure_vk_object_map();
-   struct hash_entry *entry = _mesa_hash_table_search(vk_object_to_data, obj);
-   void *data = entry ? entry->data : NULL;
+   void *data = _mesa_hash_table_u64_search(vk_object_to_data, obj);
    simple_mtx_unlock(&vk_object_to_data_mutex);
    return data;
 }
 
-static void map_object(void *obj, void *data)
+static void map_object(uint64_t obj, void *data)
 {
    simple_mtx_lock(&vk_object_to_data_mutex);
    ensure_vk_object_map();
-   _mesa_hash_table_insert(vk_object_to_data, obj, data);
+   _mesa_hash_table_u64_insert(vk_object_to_data, obj, data);
    simple_mtx_unlock(&vk_object_to_data_mutex);
 }
 
-static void unmap_object(void *obj)
+static void unmap_object(uint64_t obj)
 {
    simple_mtx_lock(&vk_object_to_data_mutex);
-   struct hash_entry *entry = _mesa_hash_table_search(vk_object_to_data, obj);
-   _mesa_hash_table_remove(vk_object_to_data, entry);
+   _mesa_hash_table_u64_remove(vk_object_to_data, obj);
    simple_mtx_unlock(&vk_object_to_data_mutex);
 }
 
@@ -321,7 +322,7 @@ static struct instance_data *new_instance_data(VkInstance instance)
 {
    struct instance_data *data = rzalloc(NULL, struct instance_data);
    data->instance = instance;
-   map_object(data->instance, data);
+   map_object(HKEY(data->instance), data);
    return data;
 }
 
@@ -329,7 +330,7 @@ static void destroy_instance_data(struct instance_data *data)
 {
    if (data->params.output_file)
       fclose(data->params.output_file);
-   unmap_object(data->instance);
+   unmap_object(HKEY(data->instance));
    ralloc_free(data);
 }
 
@@ -348,9 +349,9 @@ static void instance_data_map_physical_devices(struct instance_data *instance_da
 
    for (uint32_t i = 0; i < physicalDeviceCount; i++) {
       if (map)
-         map_object(physicalDevices[i], instance_data);
+         map_object(HKEY(physicalDevices[i]), instance_data);
       else
-         unmap_object(physicalDevices[i]);
+         unmap_object(HKEY(physicalDevices[i]));
    }
 
    free(physicalDevices);
@@ -362,7 +363,7 @@ static struct device_data *new_device_data(VkDevice device, struct instance_data
    struct device_data *data = rzalloc(NULL, struct device_data);
    data->instance = instance;
    data->device = device;
-   map_object(data->device, data);
+   map_object(HKEY(data->device), data);
    return data;
 }
 
@@ -375,10 +376,10 @@ static struct queue_data *new_queue_data(VkQueue queue,
    data->device = device_data;
    data->queue = queue;
    data->flags = family_props->queueFlags;
-   data->timestamp_mask = (1ul << family_props->timestampValidBits) - 1;
+   data->timestamp_mask = (1ull << family_props->timestampValidBits) - 1;
    data->family_index = family_index;
    LIST_INITHEAD(&data->running_command_buffer);
-   map_object(data->queue, data);
+   map_object(HKEY(data->queue), data);
 
    /* Fence synchronizing access to queries on that queue. */
    VkFenceCreateInfo fence_info = {};
@@ -400,7 +401,7 @@ static void destroy_queue(struct queue_data *data)
 {
    struct device_data *device_data = data->device;
    device_data->vtable.DestroyFence(device_data->device, data->queries_fence, NULL);
-   unmap_object(data->queue);
+   unmap_object(HKEY(data->queue));
    ralloc_free(data);
 }
 
@@ -449,7 +450,7 @@ static void device_unmap_queues(struct device_data *data)
 
 static void destroy_device_data(struct device_data *data)
 {
-   unmap_object(data->device);
+   unmap_object(HKEY(data->device));
    ralloc_free(data);
 }
 
@@ -469,13 +470,13 @@ static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_b
    data->timestamp_query_pool = timestamp_query_pool;
    data->query_index = query_index;
    list_inithead(&data->link);
-   map_object((void *) data->cmd_buffer, data);
+   map_object(HKEY(data->cmd_buffer), data);
    return data;
 }
 
 static void destroy_command_buffer_data(struct command_buffer_data *data)
 {
-   unmap_object((void *) data->cmd_buffer);
+   unmap_object(HKEY(data->cmd_buffer));
    list_delinit(&data->link);
    ralloc_free(data);
 }
@@ -489,16 +490,63 @@ static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain,
    data->device = device_data;
    data->swapchain = swapchain;
    data->window_size = ImVec2(instance_data->params.width, instance_data->params.height);
-   map_object((void *) data->swapchain, data);
+   list_inithead(&data->draws);
+   map_object(HKEY(data->swapchain), data);
    return data;
 }
 
 static void destroy_swapchain_data(struct swapchain_data *data)
 {
-   unmap_object((void *) data->swapchain);
+   unmap_object(HKEY(data->swapchain));
    ralloc_free(data);
 }
 
+struct overlay_draw *get_overlay_draw(struct swapchain_data *data)
+{
+   struct device_data *device_data = data->device;
+   struct overlay_draw *draw = list_empty(&data->draws) ?
+      NULL : list_first_entry(&data->draws, struct overlay_draw, link);
+
+   VkSemaphoreCreateInfo sem_info = {};
+   sem_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+
+   if (draw && device_data->vtable.GetFenceStatus(device_data->device, draw->fence) == VK_SUCCESS) {
+      list_del(&draw->link);
+      VK_CHECK(device_data->vtable.ResetFences(device_data->device,
+                                               1, &draw->fence));
+      list_addtail(&draw->link, &data->draws);
+      return draw;
+   }
+
+   draw = rzalloc(data, struct overlay_draw);
+
+   VkCommandBufferAllocateInfo cmd_buffer_info = {};
+   cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+   cmd_buffer_info.commandPool = data->command_pool;
+   cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+   cmd_buffer_info.commandBufferCount = 1;
+   VK_CHECK(device_data->vtable.AllocateCommandBuffers(device_data->device,
+                                                       &cmd_buffer_info,
+                                                       &draw->command_buffer));
+   VK_CHECK(device_data->set_device_loader_data(device_data->device,
+                                                draw->command_buffer));
+
+
+   VkFenceCreateInfo fence_info = {};
+   fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+   VK_CHECK(device_data->vtable.CreateFence(device_data->device,
+                                            &fence_info,
+                                            NULL,
+                                            &draw->fence));
+
+   VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &sem_info,
+                                                NULL, &draw->semaphore));
+
+   list_addtail(&draw->link, &data->draws);
+
+   return draw;
+}
+
 static const char *param_unit(enum overlay_param_enabled param)
 {
    switch (param) {
@@ -872,20 +920,19 @@ static void CreateOrResizeBuffer(struct device_data *data,
     *buffer_size = new_size;
 }
 
-static void render_swapchain_display(struct swapchain_data *data,
-                                     const VkSemaphore *wait_semaphores,
-                                     unsigned n_wait_semaphores,
-                                     unsigned image_index)
+static struct overlay_draw *render_swapchain_display(struct swapchain_data *data,
+                                                     const VkSemaphore *wait_semaphores,
+                                                     unsigned n_wait_semaphores,
+                                                     unsigned image_index)
 {
    ImDrawData* draw_data = ImGui::GetDrawData();
    if (draw_data->TotalVtxCount == 0)
-      return;
+      return NULL;
 
    struct device_data *device_data = data->device;
-   uint32_t idx = data->n_frames % ARRAY_SIZE(data->frame_data);
-   VkCommandBuffer command_buffer = data->frame_data[idx].command_buffer;
+   struct overlay_draw *draw = get_overlay_draw(data);
 
-   device_data->vtable.ResetCommandBuffer(command_buffer, 0);
+   device_data->vtable.ResetCommandBuffer(draw->command_buffer, 0);
 
    VkRenderPassBeginInfo render_pass_info = {};
    render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
@@ -897,9 +944,9 @@ static void render_swapchain_display(struct swapchain_data *data,
    VkCommandBufferBeginInfo buffer_begin_info = {};
    buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
 
-   device_data->vtable.BeginCommandBuffer(command_buffer, &buffer_begin_info);
+   device_data->vtable.BeginCommandBuffer(draw->command_buffer, &buffer_begin_info);
 
-   ensure_swapchain_fonts(data, command_buffer);
+   ensure_swapchain_fonts(data, draw->command_buffer);
 
    /* Bounce the image to display back to color attachment layout for
     * rendering on top of it.
@@ -919,7 +966,7 @@ static void render_swapchain_display(struct swapchain_data *data,
    imb.subresourceRange.layerCount = 1;
    imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index;
    imb.dstQueueFamilyIndex = device_data->graphic_queue->family_index;
-   device_data->vtable.CmdPipelineBarrier(command_buffer,
+   device_data->vtable.CmdPipelineBarrier(draw->command_buffer,
                                           VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
                                           VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
                                           0,          /* dependency flags */
@@ -927,37 +974,33 @@ static void render_swapchain_display(struct swapchain_data *data,
                                           0, nullptr, /* buffer memory barriers */
                                           1, &imb);   /* image memory barriers */
 
-   device_data->vtable.CmdBeginRenderPass(command_buffer, &render_pass_info,
+   device_data->vtable.CmdBeginRenderPass(draw->command_buffer, &render_pass_info,
                                           VK_SUBPASS_CONTENTS_INLINE);
 
    /* Create/Resize vertex & index buffers */
    size_t vertex_size = draw_data->TotalVtxCount * sizeof(ImDrawVert);
    size_t index_size = draw_data->TotalIdxCount * sizeof(ImDrawIdx);
-   if (data->frame_data[idx].vertex_buffer_size < vertex_size) {
+   if (draw->vertex_buffer_size < vertex_size) {
       CreateOrResizeBuffer(device_data,
-                           &data->frame_data[idx].vertex_buffer,
-                           &data->frame_data[idx].vertex_buffer_mem,
-                           &data->frame_data[idx].vertex_buffer_size,
+                           &draw->vertex_buffer,
+                           &draw->vertex_buffer_mem,
+                           &draw->vertex_buffer_size,
                            vertex_size, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
    }
-   if (data->frame_data[idx].index_buffer_size < index_size) {
+   if (draw->index_buffer_size < index_size) {
       CreateOrResizeBuffer(device_data,
-                           &data->frame_data[idx].index_buffer,
-                           &data->frame_data[idx].index_buffer_mem,
-                           &data->frame_data[idx].index_buffer_size,
+                           &draw->index_buffer,
+                           &draw->index_buffer_mem,
+                           &draw->index_buffer_size,
                            index_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
    }
 
     /* Upload vertex & index data */
-    VkBuffer vertex_buffer = data->frame_data[idx].vertex_buffer;
-    VkDeviceMemory vertex_mem = data->frame_data[idx].vertex_buffer_mem;
-    VkBuffer index_buffer = data->frame_data[idx].index_buffer;
-    VkDeviceMemory index_mem = data->frame_data[idx].index_buffer_mem;
     ImDrawVert* vtx_dst = NULL;
     ImDrawIdx* idx_dst = NULL;
-    VK_CHECK(device_data->vtable.MapMemory(device_data->device, vertex_mem,
+    VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->vertex_buffer_mem,
                                            0, vertex_size, 0, (void**)(&vtx_dst)));
-    VK_CHECK(device_data->vtable.MapMemory(device_data->device, index_mem,
+    VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->index_buffer_mem,
                                            0, index_size, 0, (void**)(&idx_dst)));
     for (int n = 0; n < draw_data->CmdListsCount; n++)
         {
@@ -969,26 +1012,26 @@ static void render_swapchain_display(struct swapchain_data *data,
         }
     VkMappedMemoryRange range[2] = {};
     range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
-    range[0].memory = vertex_mem;
+    range[0].memory = draw->vertex_buffer_mem;
     range[0].size = VK_WHOLE_SIZE;
     range[1].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
-    range[1].memory = index_mem;
+    range[1].memory = draw->index_buffer_mem;
     range[1].size = VK_WHOLE_SIZE;
     VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 2, range));
-    device_data->vtable.UnmapMemory(device_data->device, vertex_mem);
-    device_data->vtable.UnmapMemory(device_data->device, index_mem);
+    device_data->vtable.UnmapMemory(device_data->device, draw->vertex_buffer_mem);
+    device_data->vtable.UnmapMemory(device_data->device, draw->index_buffer_mem);
 
     /* Bind pipeline and descriptor sets */
-    device_data->vtable.CmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline);
+    device_data->vtable.CmdBindPipeline(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline);
     VkDescriptorSet desc_set[1] = { data->descriptor_set };
-    device_data->vtable.CmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+    device_data->vtable.CmdBindDescriptorSets(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
                                               data->pipeline_layout, 0, 1, desc_set, 0, NULL);
 
     /* Bind vertex & index buffers */
-    VkBuffer vertex_buffers[1] = { vertex_buffer };
+    VkBuffer vertex_buffers[1] = { draw->vertex_buffer };
     VkDeviceSize vertex_offset[1] = { 0 };
-    device_data->vtable.CmdBindVertexBuffers(command_buffer, 0, 1, vertex_buffers, vertex_offset);
-    device_data->vtable.CmdBindIndexBuffer(command_buffer, index_buffer, 0, VK_INDEX_TYPE_UINT16);
+    device_data->vtable.CmdBindVertexBuffers(draw->command_buffer, 0, 1, vertex_buffers, vertex_offset);
+    device_data->vtable.CmdBindIndexBuffer(draw->command_buffer, draw->index_buffer, 0, VK_INDEX_TYPE_UINT16);
 
     /* Setup viewport */
     VkViewport viewport;
@@ -998,7 +1041,7 @@ static void render_swapchain_display(struct swapchain_data *data,
     viewport.height = draw_data->DisplaySize.y;
     viewport.minDepth = 0.0f;
     viewport.maxDepth = 1.0f;
-    device_data->vtable.CmdSetViewport(command_buffer, 0, 1, &viewport);
+    device_data->vtable.CmdSetViewport(draw->command_buffer, 0, 1, &viewport);
 
 
     /* Setup scale and translation through push constants :
@@ -1013,10 +1056,10 @@ static void render_swapchain_display(struct swapchain_data *data,
     float translate[2];
     translate[0] = -1.0f - draw_data->DisplayPos.x * scale[0];
     translate[1] = -1.0f - draw_data->DisplayPos.y * scale[1];
-    device_data->vtable.CmdPushConstants(command_buffer, data->pipeline_layout,
+    device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout,
                                          VK_SHADER_STAGE_VERTEX_BIT,
                                          sizeof(float) * 0, sizeof(float) * 2, scale);
-    device_data->vtable.CmdPushConstants(command_buffer, data->pipeline_layout,
+    device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout,
                                          VK_SHADER_STAGE_VERTEX_BIT,
                                          sizeof(float) * 2, sizeof(float) * 2, translate);
 
@@ -1037,42 +1080,33 @@ static void render_swapchain_display(struct swapchain_data *data,
             scissor.offset.y = (int32_t)(pcmd->ClipRect.y - display_pos.y) > 0 ? (int32_t)(pcmd->ClipRect.y - display_pos.y) : 0;
             scissor.extent.width = (uint32_t)(pcmd->ClipRect.z - pcmd->ClipRect.x);
             scissor.extent.height = (uint32_t)(pcmd->ClipRect.w - pcmd->ClipRect.y + 1); // FIXME: Why +1 here?
-            device_data->vtable.CmdSetScissor(command_buffer, 0, 1, &scissor);
+            device_data->vtable.CmdSetScissor(draw->command_buffer, 0, 1, &scissor);
 
             // Draw
-            device_data->vtable.CmdDrawIndexed(command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0);
+            device_data->vtable.CmdDrawIndexed(draw->command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0);
 
             idx_offset += pcmd->ElemCount;
         }
         vtx_offset += cmd_list->VtxBuffer.Size;
     }
 
-   device_data->vtable.CmdEndRenderPass(command_buffer);
-   device_data->vtable.EndCommandBuffer(command_buffer);
-
-   if (data->submission_semaphore) {
-      device_data->vtable.DestroySemaphore(device_data->device,
-                                           data->submission_semaphore,
-                                           NULL);
-   }
-   /* Submission semaphore */
-   VkSemaphoreCreateInfo semaphore_info = {};
-   semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
-   VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &semaphore_info,
-                                                NULL, &data->submission_semaphore));
+   device_data->vtable.CmdEndRenderPass(draw->command_buffer);
+   device_data->vtable.EndCommandBuffer(draw->command_buffer);
 
    VkSubmitInfo submit_info = {};
    VkPipelineStageFlags stage_wait = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
    submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
    submit_info.commandBufferCount = 1;
-   submit_info.pCommandBuffers = &command_buffer;
+   submit_info.pCommandBuffers = &draw->command_buffer;
    submit_info.pWaitDstStageMask = &stage_wait;
    submit_info.waitSemaphoreCount = n_wait_semaphores;
    submit_info.pWaitSemaphores = wait_semaphores;
    submit_info.signalSemaphoreCount = 1;
-   submit_info.pSignalSemaphores = &data->submission_semaphore;
+   submit_info.pSignalSemaphores = &draw->semaphore;
+
+   device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence);
 
-   device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, VK_NULL_HANDLE);
+   return draw;
 }
 
 static const uint32_t overlay_vert_spv[] = {
@@ -1437,7 +1471,7 @@ static void setup_swapchain_data(struct swapchain_data *data,
                                                      NULL, &data->framebuffers[i]));
    }
 
-   /* Command buffer */
+   /* Command buffer pool */
    VkCommandPoolCreateInfo cmd_buffer_pool_info = {};
    cmd_buffer_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
    cmd_buffer_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
@@ -1445,29 +1479,21 @@ static void setup_swapchain_data(struct swapchain_data *data,
    VK_CHECK(device_data->vtable.CreateCommandPool(device_data->device,
                                                   &cmd_buffer_pool_info,
                                                   NULL, &data->command_pool));
-
-   VkCommandBuffer cmd_bufs[ARRAY_SIZE(data->frame_data)];
-
-   VkCommandBufferAllocateInfo cmd_buffer_info = {};
-   cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
-   cmd_buffer_info.commandPool = data->command_pool;
-   cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
-   cmd_buffer_info.commandBufferCount = 2;
-   VK_CHECK(device_data->vtable.AllocateCommandBuffers(device_data->device,
-                                                       &cmd_buffer_info,
-                                                       cmd_bufs));
-   for (uint32_t i = 0; i < ARRAY_SIZE(data->frame_data); i++) {
-      VK_CHECK(device_data->set_device_loader_data(device_data->device,
-                                                   cmd_bufs[i]));
-
-      data->frame_data[i].command_buffer = cmd_bufs[i];
-   }
 }
 
 static void shutdown_swapchain_data(struct swapchain_data *data)
 {
    struct device_data *device_data = data->device;
 
+   list_for_each_entry_safe(struct overlay_draw, draw, &data->draws, link) {
+      device_data->vtable.DestroySemaphore(device_data->device, draw->semaphore, NULL);
+      device_data->vtable.DestroyFence(device_data->device, draw->fence, NULL);
+      device_data->vtable.DestroyBuffer(device_data->device, draw->vertex_buffer, NULL);
+      device_data->vtable.DestroyBuffer(device_data->device, draw->index_buffer, NULL);
+      device_data->vtable.FreeMemory(device_data->device, draw->vertex_buffer_mem, NULL);
+      device_data->vtable.FreeMemory(device_data->device, draw->index_buffer_mem, NULL);
+   }
+
    for (uint32_t i = 0; i < data->n_images; i++) {
       device_data->vtable.DestroyImageView(device_data->device, data->image_views[i], NULL);
       device_data->vtable.DestroyFramebuffer(device_data->device, data->framebuffers[i], NULL);
@@ -1475,24 +1501,8 @@ static void shutdown_swapchain_data(struct swapchain_data *data)
 
    device_data->vtable.DestroyRenderPass(device_data->device, data->render_pass, NULL);
 
-   for (uint32_t i = 0; i < ARRAY_SIZE(data->frame_data); i++) {
-      device_data->vtable.FreeCommandBuffers(device_data->device,
-                                             data->command_pool,
-                                             1, &data->frame_data[i].command_buffer);
-      if (data->frame_data[i].vertex_buffer)
-         device_data->vtable.DestroyBuffer(device_data->device, data->frame_data[i].vertex_buffer, NULL);
-      if (data->frame_data[i].index_buffer)
-         device_data->vtable.DestroyBuffer(device_data->device, data->frame_data[i].index_buffer, NULL);
-      if (data->frame_data[i].vertex_buffer_mem)
-         device_data->vtable.FreeMemory(device_data->device, data->frame_data[i].vertex_buffer_mem, NULL);
-      if (data->frame_data[i].index_buffer_mem)
-         device_data->vtable.FreeMemory(device_data->device, data->frame_data[i].index_buffer_mem, NULL);
-   }
    device_data->vtable.DestroyCommandPool(device_data->device, data->command_pool, NULL);
 
-   if (data->submission_semaphore)
-      device_data->vtable.DestroySemaphore(device_data->device, data->submission_semaphore, NULL);
-
    device_data->vtable.DestroyPipeline(device_data->device, data->pipeline, NULL);
    device_data->vtable.DestroyPipelineLayout(device_data->device, data->pipeline_layout, NULL);
 
@@ -1512,19 +1522,24 @@ static void shutdown_swapchain_data(struct swapchain_data *data)
    ImGui::DestroyContext(data->imgui_context);
 }
 
-static void before_present(struct swapchain_data *swapchain_data,
-                           const VkSemaphore *wait_semaphores,
-                           unsigned n_wait_semaphores,
-                           unsigned imageIndex)
+static struct overlay_draw *before_present(struct swapchain_data *swapchain_data,
+                                           const VkSemaphore *wait_semaphores,
+                                           unsigned n_wait_semaphores,
+                                           unsigned imageIndex)
 {
    struct instance_data *instance_data = swapchain_data->device->instance;
+   struct overlay_draw *draw = NULL;
 
    snapshot_swapchain_frame(swapchain_data);
 
    if (!instance_data->params.no_display && swapchain_data->n_frames > 0) {
       compute_swapchain_display(swapchain_data);
-      render_swapchain_display(swapchain_data, wait_semaphores, n_wait_semaphores, imageIndex);
+      draw = render_swapchain_display(swapchain_data,
+                                      wait_semaphores, n_wait_semaphores,
+                                      imageIndex);
    }
+
+   return draw;
 }
 
 static VkResult overlay_CreateSwapchainKHR(
@@ -1642,16 +1657,19 @@ static VkResult overlay_QueuePresentKHR(
          present_info.swapchainCount = 1;
          present_info.pSwapchains = &swapchain;
 
-         before_present(swapchain_data,
-                        pPresentInfo->pWaitSemaphores,
-                        pPresentInfo->waitSemaphoreCount,
-                        pPresentInfo->pImageIndices[i]);
+         uint32_t image_index = pPresentInfo->pImageIndices[i];
+
+         struct overlay_draw *draw = before_present(swapchain_data,
+                                                    pPresentInfo->pWaitSemaphores,
+                                                    pPresentInfo->waitSemaphoreCount,
+                                                    image_index);
+
          /* Because the submission of the overlay draw waits on the semaphores
           * handed for present, we don't need to have this present operation
           * wait on them as well, we can just wait on the overlay submission
           * semaphore.
           */
-         present_info.pWaitSemaphores = &swapchain_data->submission_semaphore;
+         present_info.pWaitSemaphores = &draw->semaphore;
          present_info.waitSemaphoreCount = 1;
 
          VkResult chain_result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info);
@@ -1839,6 +1857,8 @@ static VkResult overlay_BeginCommandBuffer(
    struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
    struct device_data *device_data = cmd_buffer_data->device;
 
+   memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats));
+
    /* We don't record any query in secondary command buffers, just make sure
     * we have the right inheritance.
     */
@@ -1875,10 +1895,6 @@ static VkResult overlay_BeginCommandBuffer(
       return result;
    }
 
-   /* Primary command buffers with no queries. */
-   if (!cmd_buffer_data->pipeline_query_pool && cmd_buffer_data->timestamp_query_pool)
-      return device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
-
    /* Otherwise record a begin query as first command. */
    VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
 
@@ -2011,9 +2027,9 @@ static VkResult overlay_AllocateCommandBuffers(
    }
 
    if (pipeline_query_pool)
-      map_object(pipeline_query_pool, (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
+      map_object(HKEY(pipeline_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
    if (timestamp_query_pool)
-      map_object(timestamp_query_pool, (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
+      map_object(HKEY(timestamp_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
 
    return result;
 }
@@ -2028,21 +2044,25 @@ static void overlay_FreeCommandBuffers(
    for (uint32_t i = 0; i < commandBufferCount; i++) {
       struct command_buffer_data *cmd_buffer_data =
          FIND_CMD_BUFFER_DATA(pCommandBuffers[i]);
-      uint64_t count = (uintptr_t)find_object_data((void *)cmd_buffer_data->pipeline_query_pool);
+      /* It is legal to free a NULL command buffer*/
+      if (!cmd_buffer_data)
+         continue;
+
+      uint64_t count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->pipeline_query_pool));
       if (count == 1) {
-         unmap_object(cmd_buffer_data->pipeline_query_pool);
+         unmap_object(HKEY(cmd_buffer_data->pipeline_query_pool));
          device_data->vtable.DestroyQueryPool(device_data->device,
                                               cmd_buffer_data->pipeline_query_pool, NULL);
       } else if (count != 0) {
-         map_object(cmd_buffer_data->pipeline_query_pool, (void *)(uintptr_t)(count - 1));
+         map_object(HKEY(cmd_buffer_data->pipeline_query_pool), (void *)(uintptr_t)(count - 1));
       }
-      count = (uintptr_t)find_object_data((void *)cmd_buffer_data->timestamp_query_pool);
+      count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->timestamp_query_pool));
       if (count == 1) {
-         unmap_object(cmd_buffer_data->timestamp_query_pool);
+         unmap_object(HKEY(cmd_buffer_data->timestamp_query_pool));
          device_data->vtable.DestroyQueryPool(device_data->device,
                                               cmd_buffer_data->timestamp_query_pool, NULL);
       } else if (count != 0) {
-         map_object(cmd_buffer_data->timestamp_query_pool, (void *)(uintptr_t)(count - 1));
+         map_object(HKEY(cmd_buffer_data->timestamp_query_pool), (void *)(uintptr_t)(count - 1));
       }
       destroy_command_buffer_data(cmd_buffer_data);
    }
diff --git a/src/vulkan/util/meson.build b/src/vulkan/util/meson.build
index db09482e3a7..84a44862495 100644
--- a/src/vulkan/util/meson.build
+++ b/src/vulkan/util/meson.build
@@ -44,3 +44,15 @@ libvulkan_util = static_library(
   c_args : [c_vis_args, vulkan_wsi_args],
   build_by_default : false,
 )
+
+idep_vulkan_util_headers = declare_dependency(
+  sources : vk_enum_to_str[1],
+  include_directories : include_directories('.')
+)
+
+idep_vulkan_util = declare_dependency(
+  sources : vk_enum_to_str[1],
+  link_with : libvulkan_util,
+  include_directories : include_directories('.'),
+  dependencies : idep_vulkan_util_headers
+)
diff --git a/src/vulkan/wsi/meson.build b/src/vulkan/wsi/meson.build
index 1f8ada56962..01b5f39da4b 100644
--- a/src/vulkan/wsi/meson.build
+++ b/src/vulkan/wsi/meson.build
@@ -41,9 +41,9 @@ endif
 libvulkan_wsi = static_library(
   'vulkan_wsi',
   files_vulkan_wsi,
-  include_directories : [inc_common, inc_vulkan_util, inc_include],
+  include_directories : [inc_common, inc_include],
   link_with: [libxmlconfig],
-  dependencies : [vulkan_wsi_deps, dep_libdrm],
+  dependencies : [vulkan_wsi_deps, dep_libdrm, idep_vulkan_util],
   c_args : [c_vis_args, vulkan_wsi_args],
   build_by_default : false,
 )
diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c
index 46f1c08b453..6fa4dab1240 100644
--- a/src/vulkan/wsi/wsi_common_x11.c
+++ b/src/vulkan/wsi/wsi_common_x11.c
@@ -974,6 +974,19 @@ x11_present_to_x11(struct x11_swapchain *chain, uint32_t image_index,
       options |= XCB_PRESENT_OPTION_SUBOPTIMAL;
 #endif
 
+   /* Poll for any available event and update the swapchain status. This could
+    * update the status of the swapchain to SUBOPTIMAL or OUT_OF_DATE if the
+    * associated X11 surface has been resized.
+    */
+   xcb_generic_event_t *event;
+   while ((event = xcb_poll_for_special_event(chain->conn, chain->special_event))) {
+      VkResult result = x11_handle_dri3_present_event(chain, (void *)event);
+      free(event);
+      if (result < 0)
+         return x11_swapchain_result(chain, result);
+      x11_swapchain_result(chain, result);
+   }
+
    xshmfence_reset(image->shm_fence);
 
    ++chain->send_sbc;
@@ -1009,6 +1022,10 @@ x11_acquire_next_image(struct wsi_swapchain *anv_chain,
    struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
    uint64_t timeout = info->timeout;
 
+   /* If the swapchain is in an error state, don't go any further. */
+   if (chain->status < 0)
+      return chain->status;
+
    if (chain->threaded) {
       return x11_acquire_next_image_from_queue(chain, image_index, timeout);
    } else {
@@ -1023,6 +1040,10 @@ x11_queue_present(struct wsi_swapchain *anv_chain,
 {
    struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
 
+   /* If the swapchain is in an error state, don't go any further. */
+   if (chain->status < 0)
+      return chain->status;
+
    if (chain->threaded) {
       wsi_queue_push(&chain->present_queue, image_index);
       return chain->status;