diff --git a/Android.common.mk b/Android.common.mk
index 397dc03dee4..ddf02b04333 100644
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -31,6 +31,7 @@ LOCAL_C_INCLUDES += \
 
 MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION)
 LOCAL_CFLAGS += \
+	-O3 \
 	-Wno-error \
 	-Wno-unused-parameter \
 	-Wno-pointer-arith \
@@ -76,14 +77,23 @@ LOCAL_CFLAGS += \
 	-DMAJOR_IN_SYSMACROS \
 	-DVK_USE_PLATFORM_ANDROID_KHR \
 	-fvisibility=hidden \
-	-Wno-sign-compare
+	-Wno-sign-compare \
+	-Wno-self-assign \
+	-Wno-constant-logical-operand \
+	-Wno-format \
+	-Wno-incompatible-pointer-types \
+	-Wno-enum-conversion
 
 LOCAL_CPPFLAGS += \
 	-D__STDC_CONSTANT_MACROS \
 	-D__STDC_FORMAT_MACROS \
 	-D__STDC_LIMIT_MACROS \
 	-Wno-error=non-virtual-dtor \
-	-Wno-non-virtual-dtor
+	-Wno-non-virtual-dtor	\
+	-Wno-delete-non-virtual-dtor \
+	-Wno-overloaded-virtual \
+	-Wno-missing-braces \
+	-Wno-deprecated-register
 
 # mesa requires at least c99 compiler
 LOCAL_CONLYFLAGS += \
diff --git a/Readme.md b/Readme.md
new file mode 100644
index 00000000000..5df295abc3a
--- /dev/null
+++ b/Readme.md
@@ -0,0 +1,2 @@
+Any security related issues should be reported by following the instructions here:
+https://01.org/security
diff --git a/VERSION b/VERSION
index 9a33c149fca..49fdb126eb8 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-18.2.0-devel
+18.2.5
diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore
new file mode 100644
index 00000000000..93be8a6c4e9
--- /dev/null
+++ b/bin/.cherry-ignore
@@ -0,0 +1,23 @@
+# fixes:  This commit has more than one Fixes tag but the commit it
+#         addresses didn't land in branch.
+6ff1c479968819b93c46d24bd898e89ce14ac401 autotools: don't ship the git_sha1.h generated in git in the tarballs
+# pick:   This commit addresses a regression introduced by previous
+#         commit fa9e6c235da, which didn't make it for 18.2.
+a72dbc461bdb7714656e62cd8f4b00a404c2e6e0 mesa: allow GL_UNSIGNED_BYTE type for SNORM reads
+# fixes:  This commit has more than one Fixes tag but the commit it
+#         addresses didn't land in branch.
+c9f54486959716762e6818dabb0a73a8cd46df67 radeonsi: fix regression in indirect input swizzles.
+# extra:  Just some comments update.
+2ad9917e187c1e9dbb053d3c98aa0e39fa374059 anv/blorp: Fix a comment as per Nanley's review feedback
+# fixes:  This commit was immediately reverted by commit 2dce1175c1c.
+4aec44c0d9c4c0649c362199fac97efe0a3b38a4 i965/tools: 32bit compilation with meson
+# pick:   This commit was reverted by commit 95bb7d82ca8.
+90819abb56f6b1a0cd4946b13b6caf24fb46e500 radv: fix descriptor pool allocation size
+# pick:   There is a specific patch for stable branch for this commit.
+0d495bec25bd7584de4e988c2b4528c1996bc1d0 radeonsi: NaN should pass kill_if
+# pick:   This commit reverts 0fa9e6d7b30 which did not land in branch.
+aa02d7e8781c25ee18b6da97606300808c84973a Revert "anv/skylake: disable ForceThreadDispatchEnable"
+# pick:   Explicit 18.3 only nominations.
+b1b2dd06a7b777e862b525302b15bcaf407d3648 radv: add missing TFB queries support to CmdCopyQueryPoolsResults()
+# fixes:  This commit was reverted by commit 5f312e95f87.
+a9031bf9b55602d93cccef6c926e2179c23205b4 i965/batch: avoid reverting batch buffer if saved state is an empty
diff --git a/bin/install_megadrivers.py b/bin/install_megadrivers.py
index 8d9ed9c6dce..551e385d1a5 100755
--- a/bin/install_megadrivers.py
+++ b/bin/install_megadrivers.py
@@ -43,13 +43,15 @@ def main():
     master = os.path.join(to, os.path.basename(args.megadriver))
 
     if not os.path.exists(to):
+        if os.path.lexists(to):
+            os.unlink(to)
         os.makedirs(to)
     shutil.copy(args.megadriver, master)
 
     for driver in args.drivers:
         abs_driver = os.path.join(to, driver)
 
-        if os.path.exists(abs_driver):
+        if os.path.lexists(abs_driver):
             os.unlink(abs_driver)
         print('installing {} to {}'.format(args.megadriver, abs_driver))
         os.link(master, abs_driver)
@@ -60,7 +62,7 @@ def main():
 
             name, ext = os.path.splitext(driver)
             while ext != '.so':
-                if os.path.exists(name):
+                if os.path.lexists(name):
                     os.unlink(name)
                 os.symlink(driver, name)
                 name, ext = os.path.splitext(name)
diff --git a/common.py b/common.py
index 24a7e8a611d..0d8cb59b436 100644
--- a/common.py
+++ b/common.py
@@ -107,9 +107,6 @@ def AddOptions(opts):
     opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes'))
     opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no'))
     opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes'))
-    opts.Add(BoolOption('texture_float',
-                        'enable floating-point textures and renderbuffers',
-                        'no'))
     opts.Add(BoolOption('swr', 'Build OpenSWR', 'no'))
     if host_platform == 'windows':
         opts.Add('MSVC_VERSION', 'Microsoft Visual C/C++ version')
diff --git a/configure.ac b/configure.ac
index ffb8424a07b..64c03506fb0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -295,6 +295,12 @@ esac
 
 AM_CONDITIONAL(HAVE_ANDROID, test "x$android" = xyes)
 
+# Toggle Werror since at some point clang started treating unknown -W
+# flags as warnings, succeeding with the build, yet issuing an annoying
+# warning.
+save_CFLAGS="$CFLAGS"
+export CFLAGS="$CFLAGS -Werror"
+
 dnl
 dnl Check compiler flags
 dnl
@@ -309,6 +315,11 @@ AX_CHECK_COMPILE_FLAG([-fno-math-errno],                       [CFLAGS="$CFLAGS
 AX_CHECK_COMPILE_FLAG([-fno-trapping-math],                    [CFLAGS="$CFLAGS -fno-trapping-math"])
 AX_CHECK_COMPILE_FLAG([-fvisibility=hidden],                   [VISIBILITY_CFLAGS="-fvisibility=hidden"])
 
+CFLAGS="$save_CFLAGS"
+
+# Toggle Werror since at some point clang started treating unknown -W
+# flags as warnings, succeeding with the build, yet issuing an annoying
+# warning.
 dnl
 dnl Check C++ compiler flags
 dnl
@@ -1415,6 +1426,7 @@ AM_CONDITIONAL(NEED_OPENGL_COMMON, test "x$enable_opengl" = xyes -o \
                                         "x$enable_gles1" = xyes -o \
                                         "x$enable_gles2" = xyes)
 AM_CONDITIONAL(NEED_KHRPLATFORM, test "x$enable_egl" = xyes -o \
+                                      "x$enable_opengl" = xyes -o \
                                       "x$enable_gles1" = xyes -o \
                                       "x$enable_gles2" = xyes)
 
@@ -1503,15 +1515,15 @@ fi
 AC_ARG_WITH([gl-lib-name],
   [AS_HELP_STRING([--with-gl-lib-name@<:@=NAME@:>@],
     [specify GL library name @<:@default=GL@:>@])],
-  [GL_LIB=$withval],
-  [GL_LIB="$DEFAULT_GL_LIB_NAME"])
+  [AC_MSG_ERROR([--with-gl-lib-name is no longer supported. Rename the library manually if needed.])],
+  [])
 AC_ARG_WITH([osmesa-lib-name],
   [AS_HELP_STRING([--with-osmesa-lib-name@<:@=NAME@:>@],
     [specify OSMesa library name @<:@default=OSMesa@:>@])],
-  [OSMESA_LIB=$withval],
-  [OSMESA_LIB=OSMesa])
-AS_IF([test "x$GL_LIB" = xyes], [GL_LIB="$DEFAULT_GL_LIB_NAME"])
-AS_IF([test "x$OSMESA_LIB" = xyes], [OSMESA_LIB=OSMesa])
+  [AC_MSG_ERROR([--with-osmesa-lib-name is no longer supported. Rename the library manually if needed.])],
+  [])
+GL_LIB="$DEFAULT_GL_LIB_NAME"
+OSMESA_LIB=OSMesa
 
 dnl
 dnl Mangled Mesa support
@@ -1523,6 +1535,9 @@ AC_ARG_ENABLE([mangling],
   [enable_mangling=no]
 )
 if test "x${enable_mangling}" = "xyes" ; then
+  if test "x$enable_libglvnd" = xyes; then
+    AC_MSG_ERROR([Conflicting options --enable-mangling and --enable-libglvnd.])
+  fi
   DEFINES="${DEFINES} -DUSE_MGL_NAMESPACE"
   GL_LIB="Mangled${GL_LIB}"
   OSMESA_LIB="Mangled${OSMESA_LIB}"
@@ -1530,6 +1545,15 @@ fi
 AC_SUBST([GL_LIB])
 AC_SUBST([OSMESA_LIB])
 
+dnl HACK when building glx + glvnd we ship gl.pc, despite that glvnd should do it
+dnl Thus we need to use GL as a DSO name.
+if test "x$enable_libglvnd" = xyes -a "x$enable_glx" != xno; then
+  GL_PKGCONF_LIB="GL"
+else
+  GL_PKGCONF_LIB="$GL_LIB"
+fi
+AC_SUBST([GL_PKGCONF_LIB])
+
 # Check for libdrm
 PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED],
                   [have_libdrm=yes], [have_libdrm=no])
@@ -1658,6 +1682,8 @@ xxlib | xgallium-xlib)
 xdri)
     # DRI-based GLX
 
+    require_dri_shared_libs_and_glapi "GLX"
+
     # find the DRI deps for libGL
     dri_modules="x11 xext xdamage >= $XDAMAGE_REQUIRED xfixes x11-xcb xcb xcb-glx >= $XCBGLX_REQUIRED"
 
diff --git a/docs/faq.html b/docs/faq.html
index 1f2fd66034c..6270a071dac 100644
--- a/docs/faq.html
+++ b/docs/faq.html
@@ -16,7 +16,7 @@ <h1>The Mesa 3D Graphics Library</h1>
 
 <center>
 <h1>Mesa Frequently Asked Questions</h1>
-Last updated: 9 October 2012
+Last updated: 19 September 2018
 </center>
 
 <br>
@@ -373,18 +373,16 @@ <h2>4.2 How do I write a new device driver?</h2>
 
 <h2>4.3 Why isn't GL_EXT_texture_compression_s3tc implemented in Mesa?</h2>
 <p>
-The <a href="http://oss.sgi.com/projects/ogl-sample/registry/EXT/texture_compression_s3tc.txt">specification for the extension</a>
-indicates that there are intellectual property (IP) and/or patent issues
-to be dealt with.
+Oh but it is! Prior to 2nd October 2017, the Mesa project did not include s3tc
+support due to intellectual property (IP) and/or patent issues around the s3tc
+algorithm.
 </p>
-<p>We've been unsuccessful in getting a response from S3 (or whoever owns
-the IP nowadays) to indicate whether or not an open source project can
-implement the extension (specifically the compression/decompression
-algorithms).
+<p>
+As of Mesa 17.3.0, Mesa now officially supports s3tc, as the patent has expired.
 </p>
 <p>
-In the mean time, a 3rd party <a href="https://dri.freedesktop.org/wiki/S3TC">
-plug-in library</a> is available.
+In versions prior to this, a 3rd party <a href="https://dri.freedesktop.org/wiki/S3TC">
+plug-in library</a> was required.
 </p>
 
 </div>
diff --git a/docs/install.html b/docs/install.html
index 08081944cfc..5493da054c5 100644
--- a/docs/install.html
+++ b/docs/install.html
@@ -75,7 +75,7 @@ <h2>1.1 General</h2>
 Version 2.6.4 or later should work.
 </li>
 <li><a href="http://www.makotemplates.org/">Python Mako module</a> -
-Python Mako module is required. Version 0.3.4 or later should work.
+Python Mako module is required. Version 0.8.0 or later should work.
 </li>
 <li>lex / yacc - for building the Mesa IR and GLSL compiler.
 <div>
diff --git a/docs/relnotes/18.2.0.html b/docs/relnotes/18.2.0.html
index fb7a12f2859..968312ca901 100644
--- a/docs/relnotes/18.2.0.html
+++ b/docs/relnotes/18.2.0.html
@@ -14,7 +14,7 @@ <h1>The Mesa 3D Graphics Library</h1>
 <iframe src="../contents.html"></iframe>
 <div class="content">
 
-<h1>Mesa 18.2.0 Release Notes / TBD</h1>
+<h1>Mesa 18.2.0 Release Notes / September 7, 2018</h1>
 
 <p>
 Mesa 18.2.0 is a new development release. People who are concerned
@@ -40,7 +40,8 @@ <h1>Mesa 18.2.0 Release Notes / TBD</h1>
 
 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+b9e6bb3eb7660b0726ba28405ffa0cb77de619e925b910b72f4d7a85c0098596  mesa-18.2.0.tar.gz
+22452bdffff8e11bf4284278155a9f77cb28d6d73a12c507f1490732d0d9ddce  mesa-18.2.0.tar.xz
 </pre>
 
 
@@ -59,9 +60,217 @@ <h2>New features</h2>
 <li>GL_ARB_sample_locations and GL_NV_sample_locations on nvc0 (GM200+)</li>
 <li>GL_ANDROID_extension_pack_es31a on radeonsi.</li>
 <li>GL_KHR_texture_compression_astc_ldr on radeonsi</li>
+<li>GL_NV_conservative_raster and GL_NV_conservative_raster_dilate on nvc0 (GM200+)</li>
+<li>GL_NV_conservative_raster_pre_snap_triangles on nvc0 (GP102+)</li>
+<li>multisampled images on nvc0 (GM107+) (now supported on GF100+)</li>
 </ul>
 
 <h2>Bug fixes</h2>
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=13728">Bug 13728</a> - [G965] Some objects in Neverwinter Nights Linux version not displayed correctly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=61761">Bug 61761</a> - glPolygonOffsetEXT, OFFSET_BIAS incorrectly set to a huge number</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65422">Bug 65422</a> - Rename api_validate.[ch] to draw_validate.[ch]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78097">Bug 78097</a> - glUniform1ui and friends not supported by display lists</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91808">Bug 91808</a> - trine1 misrender r600g</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93355">Bug 93355</a> - [BXT,SKLGT4e] intermittent ext_framebuffer_multisample.accuracy fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95009">Bug 95009</a> - [SNB] amd_shader_trinary_minmax.execution.built-in-functions.gs-mid3-ivec2-ivec2-ivec2 intermittent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95012">Bug 95012</a> - [SNB] glsl-1_50.execution.built-in-functions.gs-op tests intermittent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98699">Bug 98699</a> - &quot;float[a+++4 ? 1:1] f;&quot; crashes glsl_compiler</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99116">Bug 99116</a> - Wine DirectDraw programs showing only a blackscreen when using Mesa Gallium drivers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99730">Bug 99730</a> - Metro Redux game(s) needs override for midshader extension declaration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100177">Bug 100177</a> - [GM206] Misrendering in XCOM Ennemy Within</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100430">Bug 100430</a> - [radv] graphical glitches on dolphin emulator</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101247">Bug 101247</a> - Mesa fails to link GLSL programs with unused output blocks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102390">Bug 102390</a> - centroid interpolation causes broken attribute values</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102678">Bug 102678</a> - gl_BaseVertex should always be zero when the draw command has no &lt;basevertex&gt; parameter</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103274">Bug 103274</a> - BRW allocates too much heap memory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104388">Bug 104388</a> - [snb] GPU HANG: ecode 6:0:0x85fffff8 in fgfs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104626">Bug 104626</a> - broadcom/vc5: double compare</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104809">Bug 104809</a> - anv: DOOM 2016 and Wolfenstein II:The New Colossus crash due to not having depthBoundsTest</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105351">Bug 105351</a> - [Gen6+] piglit's arb_shader_image_load_store-host-mem-barrier fails with a glGetTexSubImage fallback path</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105374">Bug 105374</a> - texture3d, a SaschaWillems demo, assert fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105396">Bug 105396</a> - tc compatible htile sets depth of htiles of discarded fragments to 1.0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105399">Bug 105399</a> - [snb] GPU hang: after geometry shader emits no geometry, the program hangs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105497">Bug 105497</a> - shader-db crashes on 72 core system after ast_type_qualifier bitset change</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105613">Bug 105613</a> - Compute shader locks up within nested &quot;for&quot; loop</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105731">Bug 105731</a> - linker error &quot;fragment shader input ... has no matching output in the previous stage&quot; when previous stage's output declaration in a separate shader object</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105904">Bug 105904</a> - Needed to delete mesa shader cache after driver upgrade for 32 bit wine vulkan programs to work.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105975">Bug 105975</a> - i965 always reports 0 viewport subpixel bits</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106090">Bug 106090</a> - Compiling compute shader crashes RADV</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106133">Bug 106133</a> - make check &quot;OSError: [Errno 24] Too many open files&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106163">Bug 106163</a> - r600/sb: optimizer tries to schedule access to different array elements in one instruction group</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106174">Bug 106174</a> - vulkan dota2 broken (segfaulting), found bug commit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106180">Bug 106180</a> - [bisected] radv vulkan smoke test black screen (Add support for DRI3 v1.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106232">Bug 106232</a> - LLVM unit tests have error in random number handling</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106243">Bug 106243</a> - [kbl] GPU HANG: 9:0:0x85dffffb, in Cinnamon</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106315">Bug 106315</a> - The witness + dxvk suffers flickering garbage</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106331">Bug 106331</a> - radv doesnt support VK_FORMAT_R32G32B32_SFLOAT</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106382">Bug 106382</a> - Shader cache breaks INTEL_DEBUG=shader_time</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106393">Bug 106393</a> - glsl-fs-shader-stencil-export hangs forever</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106450">Bug 106450</a> - glGetIntegerv return wrong value in some cases</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106462">Bug 106462</a> - piglit.spec.arb_vertex_array_bgra.get regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106479">Bug 106479</a> - NDEBUG not defined for libamdgpu_addrlib</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106480">Bug 106480</a> - A2B10G10R10_SNORM vertex attribute doesn't work.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106499">Bug 106499</a> - [regression, bisected] Several games crash on start</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106504">Bug 106504</a> - vulkan SPIR-V parsing failed at ../src/compiler/spirv/vtn_cfg.c:381</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106511">Bug 106511</a> - radv: MSAA broken on SI (assertion failure in vkCreateImage)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106587">Bug 106587</a> - Dota2 is very dark when using vulkan render on a Intel &lt;&lt; AMD prime setup</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106594">Bug 106594</a> - [regression,apitrace,bisected] Prison Architect rendered unplayable by multicoloured flickering triangles and overlayed triangles when performing certain actions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106619">Bug 106619</a> - [OpenCL][llvm-svn]build failure  addPassesToEmitFile candidate expects 6 arguments, 3 provided</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106629">Bug 106629</a> - [SNB,IVB,HSW,BDW] dEQP-EGL.functional.image.create.gles2_cubemap_negative_z_rgb_read_pixels</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106642">Bug 106642</a> - X server crashes in i965 on desktop startup when DRI3 v1.2 / modifier support is enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106643">Bug 106643</a> - double free when exporting a temporarily imported semaphore</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106673">Bug 106673</a> - [bisected] Steam is unusable since commit 5c33e8c7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106687">Bug 106687</a> - radv: Fast color clears use incorrect format</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106708">Bug 106708</a> - [SKL/KBL/GLK] 2-3% performance drop in SynMark DrvState and 5-9% drop on SynMark Multithread</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106748">Bug 106748</a> - st/mesa: use PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY broke qemu -display sdl,gl=on</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106756">Bug 106756</a> - Wine 3.9 crashes with DXVK on Just Cause 3 and Quantum Break on VEGA but works ON POLARIS</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106774">Bug 106774</a> - GLSL IR copy propagates loads of SSBOs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106776">Bug 106776</a> - vma_random unrecognized command line option &quot;-std=c++11&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106778">Bug 106778</a> - Files missing from tarball - intel_sanitize_gpu.*</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106779">Bug 106779</a> - Files missing from tarball - u_debug_stack_android.cpp</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106784">Bug 106784</a> - 18.1.1 autotools build fail without mako</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106801">Bug 106801</a> - vma_random_test.cpp:239:18: error: non-constant-expression cannot be narrowed from type 'unsigned long' to 'uint_fast32_t' (aka 'unsigned int') in initializer list [-Wc++11-narrowing]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106810">Bug 106810</a> - ProgramBinary does not switch program correctly when using transform feedback</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106823">Bug 106823</a> - Failed to recongnize keyword of shader code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106830">Bug 106830</a> - [bisected] 32 bit tests (deqp, piglit, glcts, vulkancts) crashing on all platforms</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106861">Bug 106861</a> - fatal error: wayland-egl-backend.h: No such file or directory compilation terminated.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106865">Bug 106865</a> - [GLK] piglit.spec.ext_framebuffer_multisample.accuracy stencil tests fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106903">Bug 106903</a> - radv: Fragment shader output goes to wrong attachments when render targets are sparse</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106906">Bug 106906</a> - Failed to recongnize keyword “sampler2DRect” and &quot;sampler2DRectShadow&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106907">Bug 106907</a> - Correct Transform Feedback Varyings information is expected after using ProgramBinary</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106912">Bug 106912</a> - radv: 16-bit depth buffer causes artifacts in Shadow Warrior 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106928">Bug 106928</a> - When starting a match Rocket League crashes on &quot;Go&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106941">Bug 106941</a> - Intel ANV vulkan driver exposing version 1.1.0 which is incorrect</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106986">Bug 106986</a> - glGetQueryiv error when querying number of result bits for GL_ANY_SAMPLES_PASSED_CONSERVATIVE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106997">Bug 106997</a> - [Regression]. Dying light game is crashing on latest mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107098">Bug 107098</a> - Segfault after munmap(kms_sw_dt-&gt;ro_mapped)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107117">Bug 107117</a> - mesa-18.1: regression with TFP on intel with modesettings and glamor acceleration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107190">Bug 107190</a> - Got seg fault on snb when use INTEL_DEBUG=bat</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107193">Bug 107193</a> - piglit.spec.arb_compute_shader.linker.bug-93840 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107212">Bug 107212</a> - Dual-Core CPU E5500 / G45: RetroArch with reicast core results in corrupted graphics</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107223">Bug 107223</a> - [GEN9+] 50% perf drop in SynMark Fill* tests (E2E RBC gets disabled?)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107248">Bug 107248</a> - [G45 ILK G965] Texture handling broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107275">Bug 107275</a> - NIR segfaults after spirv-opt</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107276">Bug 107276</a> - radv: OpBitfieldUExtract returns incorrect result when count is zero</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107295">Bug 107295</a> - Access violation on glDrawArrays with count &gt;= 2048</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107305">Bug 107305</a> - glsl/opt_copy_propagation_elements.cpp:72:9: error: delegating constructors are permitted only in C++11</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107312">Bug 107312</a> - Mesa-git RPM build fails after commit 8cacf38f527d42e41441ef8c25d95d4b2f4e8602</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107359">Bug 107359</a> - [Regression] [bisected] [OpenGL CTS] [SKL,BDW] KHR-GL46.texture_barrier*-texels, GTF-GL46.gtf21.GL2FixedTests.buffer_corners.buffer_corners, and GTF-GL46.gtf21.GL2FixedTests.stencil_plane_corners.stencil_plane_corners fail with some configuration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107366">Bug 107366</a> - NIR verification crashes on piglit tests</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107423">Bug 107423</a> - vc4 build failure: &quot;v3d_decoder.c:893: undefined reference to `clif_lookup_bo'&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107443">Bug 107443</a> - Build error on arm64: v3d_decoder.c:837:17: error: format not a string literal and no format arguments [-Werror=format-security]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107460">Bug 107460</a> - radv: OpControlBarrier does not always work correctly (bisected)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107477">Bug 107477</a> - [DXVK] Setting high shader quality in GTA V results in LLVM error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107510">Bug 107510</a> - [GEN8+] up to 10% perf drop on several 3D benchmarks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107544">Bug 107544</a> - intel/decoder: out of bounds group_iter</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107550">Bug 107550</a> - &quot;0[2]&quot; as function parameter hits assert</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107579">Bug 107579</a> - [SNB] The graphic corruption when we reuse the GS compiled and used for TFB when statebuffer contain magic trash in the unused space</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107601">Bug 107601</a> - Rise of the Tomb Raider Segmentation Fault when the game starts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107610">Bug 107610</a> - Dolphin emulator mis-renders shadow overlay in Super Mario Sunshine</li>
+
+</ul>
 
 <h2>Changes</h2>
 
diff --git a/docs/relnotes/18.2.1.html b/docs/relnotes/18.2.1.html
new file mode 100644
index 00000000000..23fb8f46b5a
--- /dev/null
+++ b/docs/relnotes/18.2.1.html
@@ -0,0 +1,227 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.2.1 Release Notes / September 21, 2018</h1>
+
+<p>
+Mesa 18.2.1 is a bug fix release which fixes bugs found since the 18.2.0 release.
+</p>
+<p>
+Mesa 18.2.0 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+SHA256: 45419ccbe1bf9a2e15ffe71ced34615002e1b42c24b917fbe2b2f58ab1970562  mesa-18.2.1.tar.gz
+SHA256: 9636dc6f3d188abdcca02da97cedd73640d9035224efd5db724187d062c81056  mesa-18.2.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103241">Bug 103241</a> - Anv crashes when using 64-bit vertex inputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107280">Bug 107280</a> - [DXVK] Batman: Arkham City with tessellation enabled hangs on SKL GT4</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107772">Bug 107772</a> - Mesa preprocessor matches if(def)s &amp; endifs incorrectly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107779">Bug 107779</a> - Access violation with some games</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107810">Bug 107810</a> - The 'va_end' call is missed after 'va_copy' in 'util_vsnprintf' function under windows</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107832">Bug 107832</a> - Gallium picking A16L16 formats when emulating INTENSITY16 conflicts with mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107843">Bug 107843</a> - 32bit Mesa build failes with meson.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107879">Bug 107879</a> - crash happens when link program</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107891">Bug 107891</a> - [wine, regression, bisected] RAGE, Wolfenstein The New Order hangs in menu</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Gomez (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.2.0</li>
+  <li>Revert "Revert "glsl: skip stringification in preprocessor if in unreachable branch""</li>
+  <li>cherry-ignore: i965/tools: 32bit compilation with meson</li>
+</ul>
+
+<p>Andrii Simiklit (4):</p>
+<ul>
+  <li>apple/glx/log: added missing va_end() after va_copy()</li>
+  <li>mesa/util: don't use the same 'va_list' instance twice</li>
+  <li>mesa/util: don't ignore NULL returned from 'malloc'</li>
+  <li>mesa/util: add missing va_end() after va_copy()</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (5):</p>
+<ul>
+  <li>radv: Support v3 of VK_EXT_vertex_attribute_divisor.</li>
+  <li>radv: Set the user SGPR MSB for Vega.</li>
+  <li>radv: Only allow 16 user SGPRs for compute on GFX9+.</li>
+  <li>radv: Use build ID if available for cache UUID.</li>
+  <li>radv: Fix driver UUID SHA1 init.</li>
+</ul>
+
+<p>Christopher Egert (1):</p>
+<ul>
+  <li>radeon: fix ColorMask</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>virgl: don't send a shader create with no data. (v2)</li>
+</ul>
+
+<p>Dylan Baker (1):</p>
+<ul>
+  <li>meson: Print a message about why a libdrm version was selected</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>v3d: Fix SRC_ALPHA_SATURATE blending for RTs without alpha.</li>
+  <li>v3d: Fix setup of the VCM cache size.</li>
+</ul>
+
+<p>Erik Faye-Lund (2):</p>
+<ul>
+  <li>winsys/virgl: avoid unintended behavior</li>
+  <li>virgl: adjust strides when mapping temp-resources</li>
+</ul>
+
+<p>Fritz Koenig (2):</p>
+<ul>
+  <li>mesa: Additional FlipY applications</li>
+  <li>mesa: FramebufferParameteri parameter checking</li>
+</ul>
+
+<p>Gert Wollny (2):</p>
+<ul>
+  <li>winsys/virgl: correct resource and handle allocation (v2)</li>
+  <li>mesa/texture: Also check for LA texture when querying intensity component size</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>i965/fs: Don't propagate conditional modifiers from integer compares to adds</li>
+</ul>
+
+<p>Jason Ekstrand (11):</p>
+<ul>
+  <li>anv/pipeline: Only consider double elements which actually exist</li>
+  <li>i965: Workaround the gen9 hw astc5x5 sampler bug</li>
+  <li>anv: Re-emit vertex buffers when the pipeline changes</li>
+  <li>anv: Disable the vertex cache when tessellating on SKL GT4</li>
+  <li>anv: Clamp scissors to the framebuffer boundary</li>
+  <li>vulkan: Update the XML and headers to 1.1.84</li>
+  <li>anv: Support v3 of VK_EXT_vertex_attribute_divisor</li>
+  <li>anv/query: Write both dwords in emit_zero_queries</li>
+  <li>nir: Add a small pass to rematerialize derefs per-block</li>
+  <li>nir/loop_unroll: Re-materialize derefs in use blocks before unrolling</li>
+  <li>nir/opt_if: Re-materialize derefs in use blocks before peeling loops</li>
+</ul>
+
+<p>Josh Pieper (1):</p>
+<ul>
+  <li>st/mesa: Validate the result of pipe_transfer_map in make_texture (v2)</li>
+</ul>
+
+<p>Juan A. Suarez Romero (2):</p>
+<ul>
+  <li>cherry-ignore: radv: fix descriptor pool allocation size</li>
+  <li>Update version to 18.2.1</li>
+</ul>
+
+<p>Kenneth Feng (1):</p>
+<ul>
+  <li>amd: Add Picasso device id</li>
+</ul>
+
+<p>Marek Olšák (5):</p>
+<ul>
+  <li>radeonsi: fix HTILE for NPOT textures with mipmapping on SI/CI</li>
+  <li>winsys/radeon: fix CMASK fast clear for NPOT textures with mipmapping on SI/CI</li>
+  <li>r600: fix HTILE for NPOT textures with mipmapping</li>
+  <li>radeonsi: fix printing a BO list into ddebug reports</li>
+  <li>ac: revert new LLVM 7.0 behavior for fdiv</li>
+</ul>
+
+<p>Mathias Fröhlich (1):</p>
+<ul>
+  <li>tnl: Fix green gun regression in xonotic.</li>
+</ul>
+
+<p>Mauro Rossi (3):</p>
+<ul>
+  <li>android: broadcom/genxml: fix collision with intel/genxml header-gen macro</li>
+  <li>android: broadcom/cle: add gallium include path</li>
+  <li>android: broadcom/cle: export the broadcom top level path headers</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>loader/dri3: Only wait for back buffer fences in dri3_get_buffer</li>
+</ul>
+
+<p>Pierre Moreau (1):</p>
+<ul>
+  <li>nvir: Always split 64-bit IMAD/IMUL operations</li>
+</ul>
+
+<p>Samuel Pitoiset (7):</p>
+<ul>
+  <li>radv: fix function names for VK_EXT_conditional_rendering</li>
+  <li>radv: fix VK_EXT_conditional_rendering visibility</li>
+  <li>radv: bump the maximum number of arguments to 64</li>
+  <li>radv: handle loc-&gt;indirect correctly for the first descriptor</li>
+  <li>radv: fix GPU hangs with 32-bit indirect descriptors</li>
+  <li>radv: fix flushing indirect descriptors</li>
+  <li>radv: fix setting global locations for indirect descriptors</li>
+</ul>
+
+<p>Sergii Romantsov (3):</p>
+<ul>
+  <li>intel: compiler option msse2 and mstackrealign</li>
+  <li>i965/tools: 32bit compilation with meson</li>
+  <li>mesa/meson: 32bit xmlconfig linkage</li>
+</ul>
+
+<p>Timothy Arceri (2):</p>
+<ul>
+  <li>glsl: fixer lexer for unreachable defines</li>
+  <li>Revert "radeonsi: avoid syncing the driver thread in si_fence_finish"</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/18.2.2.html b/docs/relnotes/18.2.2.html
new file mode 100644
index 00000000000..9793c03a840
--- /dev/null
+++ b/docs/relnotes/18.2.2.html
@@ -0,0 +1,155 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.2.2 Release Notes / October 5, 2018</h1>
+
+<p>
+Mesa 18.2.2 is a bug fix release which fixes bugs found since the 18.2.1 release.
+</p>
+<p>
+Mesa 18.2.2 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+SHA256: c51711168971957037cc7e3e19e8abe1ec6eeab9cf236d419a1e7728a41cac8a  mesa-18.2.2.tar.gz
+SHA256: c3ba82b12a89d3d9fed2bdd96b4702dbb7ab675034650a8b1b718320daf073c4  mesa-18.2.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104602">Bug 104602</a> - [apitrace] Graphical artifacts in Civilization VI on RX Vega</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104926">Bug 104926</a> - swrast: Mesa 17.3.3 produces:  HW cursor for format 875713089 not supported</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107276">Bug 107276</a> - radv: OpBitfieldUExtract returns incorrect result when count is zero</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107786">Bug 107786</a> - [DXVK] MSAA reflections are broken in GTA V</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108024">Bug 108024</a> - [Debian Stretch]Fail to build because &quot;xcb_randr_lease_t&quot;</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>pci_ids: add new polaris pci id</li>
+</ul>
+
+<p>Andres Rodriguez (1):</p>
+<ul>
+  <li>radv: only emit ZPASS_DONE for timestamp queries on gfx queues</li>
+</ul>
+
+<p>Axel Davy (3):</p>
+<ul>
+  <li>st/nine: Clamp RCP when 0*inf!=0</li>
+  <li>st/nine: Avoid redundant SetCursorPos calls</li>
+  <li>st/nine: Increase maximum number of temp registers</li>
+</ul>
+
+<p>Dylan Baker (1):</p>
+<ul>
+  <li>meson: Don't compile pipe loader with dri support when not using dri</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: Fix sin(0.0) and cos(0.0) accuracy to fix SDL rendering rotation.</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>vulkan/wsi/display: check if wsi_swapchain_init() succeeded</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>anv,radv: Implement vkAcquireNextImage2</li>
+</ul>
+
+<p>Juan A. Suarez Romero (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.2.1</li>
+  <li>Update version to 18.2.2</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>radeon/uvd: use bitstream coded number for symbols of Huffman tables</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>glsl_to_tgsi: invert gl_SamplePosition.y for the default framebuffer</li>
+  <li>radeonsi: NaN should pass kill_if</li>
+</ul>
+
+<p>Maxime (1):</p>
+<ul>
+  <li>vulkan: Disable randr lease for libxcb &lt; 1.13</li>
+</ul>
+
+<p>Michal Srb (1):</p>
+<ul>
+  <li>st/dri: don't set queryDmaBufFormats/queryDmaBufModifiers if the driver does not implement it</li>
+</ul>
+
+<p>Rhys Perry (2):</p>
+<ul>
+  <li>nvc0: Update counter reading shaders to new NVC0_CB_AUX_MP_INFO</li>
+  <li>nvc0: fix bindless multisampled images on Maxwell+</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (1):</p>
+<ul>
+  <li>anv: Add support for protected memory properties on anv_GetPhysicalDeviceProperties2()</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: use the resolve compute path if dest uses multiple layers</li>
+</ul>
+
+<p>Stuart Young (1):</p>
+<ul>
+  <li>docs: Update FAQ with respect to s3tc support</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>radeonsi: add a workaround for bitfield_extract when count is 0</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/18.2.3.html b/docs/relnotes/18.2.3.html
new file mode 100644
index 00000000000..596a0a12072
--- /dev/null
+++ b/docs/relnotes/18.2.3.html
@@ -0,0 +1,167 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.2.3 Release Notes / October 19, 2018</h1>
+
+<p>
+Mesa 18.2.3 is a bug fix release which fixes bugs found since the 18.2.2 release.
+</p>
+<p>
+Mesa 18.2.3 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+0e13e2342eae74d8848df23595c4bb4b2f8874c9e1213b8466b1fbfa7ef99375  mesa-18.2.3.tar.gz
+e2bf83c17e1abdecb1ee81af22652e27e9aa38f963e95e60f34275cc0376304f  mesa-18.2.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99507">Bug 99507</a> - Corrupted frame contents with Vulkan version of DOTA2, Talos Principle and Sascha Willems' demos when they're run Vsynched in fullscreen</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107857">Bug 107857</a> - GPU hang - GS_EMIT without shader outputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107926">Bug 107926</a> - [anv] Rise of the Tomb Raider always misrendering, segfault and gpu hang.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108012">Bug 108012</a> - Compiler crashes on access of non-existent member incremental operations</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Boyuan Zhang (1):</p>
+<ul>
+  <li>st/va: use provided sizes and coords for vlVaGetImage</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>anv: add missing unlock in error path.</li>
+</ul>
+
+<p>Dylan Baker (1):</p>
+<ul>
+  <li>meson: Don't allow building EGL on Windows or MacOS</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>st/nine: do not double-close the fd on teardown</li>
+  <li>egl: make eglSwapInterval a no-op for !window surfaces</li>
+  <li>egl: make eglSwapBuffers* a no-op for !window surfaces</li>
+  <li>vl/dri3: do full teardown on screen_destroy</li>
+  <li>Revert "mesa: remove unnecessary 'sort by year' for the GL extensions"</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>radv: add missing meson c++ visibility arguments</li>
+</ul>
+
+<p>Fritz Koenig (1):</p>
+<ul>
+  <li>i965: Replace checks for rb-&gt;Name with FlipY (v2)</li>
+</ul>
+
+<p>Gert Wollny (1):</p>
+<ul>
+  <li>virgl, vtest: Correct the transfer size calculation</li>
+</ul>
+
+<p>Ilia Mirkin (4):</p>
+<ul>
+  <li>glsl: fix array assignments of a swizzled vector</li>
+  <li>nv50,nvc0: mark RGBX_UINT formats as renderable</li>
+  <li>nv50,nvc0: guard against zero-size blits</li>
+  <li>nvc0: fix blitting red to srgb8_alpha</li>
+</ul>
+
+<p>Jason Ekstrand (7):</p>
+<ul>
+  <li>nir/cf: Remove phi sources if needed in nir_handle_add_jump</li>
+  <li>anv: Use separate MOCS settings for external BOs</li>
+  <li>intel/fs: Fix a typo in need_matching_subreg_offset</li>
+  <li>nir/from_ssa: Don't rewrite derefs destinations to registers</li>
+  <li>anv/batch_chain: Don't start a new BO just for BATCH_BUFFER_START</li>
+  <li>nir/alu_to_scalar: Use ssa_for_alu_src in hand-rolled expansions</li>
+  <li>intel: Don't propagate conditional modifiers if a UD source is negated</li>
+</ul>
+
+<p>Juan A. Suarez Romero (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.2.2</li>
+  <li>Update version to 18.2.3</li>
+</ul>
+
+<p>Józef Kucia (1):</p>
+<ul>
+  <li>radeonsi: avoid sending GS_EMIT in shaders without outputs</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>drirc: add a workaround for ARMA 3</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: add a workaround for a VGT hang with prim restart and strips</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>glsl: do not attempt assignment if operand type not parsed correctly</li>
+</ul>
+
+<p>Timothy Arceri (11):</p>
+<ul>
+  <li>glsl: ignore trailing whitespace when define redefined</li>
+  <li>util: disable cache if we have no build-id and timestamp is zero</li>
+  <li>util: rename timestamp param in disk_cache_create()</li>
+  <li>util: add disk_cache_get_function_identifier()</li>
+  <li>radeonsi: use build-id when available for disk cache</li>
+  <li>nouveau: use build-id when available for disk cache</li>
+  <li>r600: use build-id when available for disk cache</li>
+  <li>mesa/st: add force_compat_profile option to driconfig</li>
+  <li>util: use force_compat_profile for Wolfenstein The Old Blood</li>
+  <li>util: better handle program names from wine</li>
+  <li>util: add drirc workarounds for RAGE</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>r600/sb: Fix constant-logical-operand warning.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/18.2.4.html b/docs/relnotes/18.2.4.html
new file mode 100644
index 00000000000..5da4362d09a
--- /dev/null
+++ b/docs/relnotes/18.2.4.html
@@ -0,0 +1,154 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.2.4 Release Notes / October 31, 2018</h1>
+
+<p>
+Mesa 18.2.4 is a bug fix release which fixes bugs found since the 18.2.4 release.
+</p>
+<p>
+Mesa 18.2.4 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+968bfe78605e9397ddf244933b1fa62edb8429fc55aaec2ae7e20bb1c82abdea  mesa-18.2.4.tar.gz
+621d1aebb57876d5b6a5d2dcf4eb7e0620e650c6fe5cf3655c65e243adc9cb4e  mesa-18.2.4.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107865">Bug 107865</a> - swr fail to build with llvm-libs 6.0.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108272">Bug 108272</a> - [polaris10] opencl-mesa: Anything using OpenCL segfaults, XFX Radeon RX 580</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108524">Bug 108524</a> - [RADV]  GPU lockup on event synchronization</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Smith (2):</p>
+<ul>
+  <li>ac/nir: Use context-specific LLVM types</li>
+  <li>anv: Fix sanitization of stencil state when the depth test is disabled</li>
+</ul>
+
+<p>Alok Hota (2):</p>
+<ul>
+  <li>swr/rast: ignore CreateElementUnorderedAtomicMemCpy</li>
+  <li>swr/rast: fix intrinsic/function for LLVM 7 compatibility</li>
+</ul>
+
+<p>Andres Rodriguez (1):</p>
+<ul>
+  <li>radv: fix check for perftest options size</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (1):</p>
+<ul>
+  <li>radv: Emit enqueued pipeline barriers on event write.</li>
+</ul>
+
+<p>Connor Abbott (2):</p>
+<ul>
+  <li>ac: Introduce ac_build_expand()</li>
+  <li>ac: Fix loading a dvec3 from an SSBO</li>
+</ul>
+
+<p>David McFarland (1):</p>
+<ul>
+  <li>util: Change remaining uint32 cache ids to sha1</li>
+</ul>
+
+<p>Dylan Baker (1):</p>
+<ul>
+  <li>meson: don't require libelf for r600 without LLVM</li>
+</ul>
+
+<p>Elie Tournier (1):</p>
+<ul>
+  <li>gallium: Correctly handle no config context creation</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>radv: s/abs/fabsf/ for floats</li>
+</ul>
+
+<p>Jan Vesely (1):</p>
+<ul>
+  <li>radeonsi: Bump number of allowed global buffers to 32</li>
+</ul>
+
+<p>Jason Ekstrand (3):</p>
+<ul>
+  <li>spirv: Use the right bit-size for spec constant ops</li>
+  <li>blorp: Emit a dummy 3DSTATE_WM prior to 3DSTATE_WM_HZ_OP</li>
+  <li>anv: Flag semaphore BOs as external</li>
+</ul>
+
+<p>Juan A. Suarez Romero (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.2.3</li>
+  <li>cherry-ignore: Revert "anv/skylake: disable ForceThreadDispatchEnable"</li>
+  <li>Update version to 18.2.4</li>
+</ul>
+
+<p>Liviu Prodea (1):</p>
+<ul>
+  <li>scons: Put to rest zombie texture_float build option.</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi: fix a VGT hang with primitive restart on Polaris10 and later</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>loader/dri3: Also wait for front buffer fence if we triggered it</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+  <li>intel/blorp: Define the clear value bounds for HiZ clears</li>
+</ul>
+
+<p>Rob Clark (2):</p>
+<ul>
+  <li>freedreno: fix inorder rendering case</li>
+  <li>freedreno: don't flush when new and old pfb is identical</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/docs/relnotes/18.2.5.html b/docs/relnotes/18.2.5.html
new file mode 100644
index 00000000000..d1e7887a3a9
--- /dev/null
+++ b/docs/relnotes/18.2.5.html
@@ -0,0 +1,171 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.2.5 Release Notes / November 15, 2018</h1>
+
+<p>
+Mesa 18.2.5 is a bug fix release which fixes bugs found since the 18.2.4 release.
+</p>
+<p>
+Mesa 18.2.5 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105731">Bug 105731</a> - linker error &quot;fragment shader input ... has no matching output in the previous stage&quot; when previous stage's output declaration in a separate shader object</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107511">Bug 107511</a> - KHR/khrplatform.h not always installed when needed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107626">Bug 107626</a> - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test &quot;arb_texture_multisample-large-float-texture&quot; with parameter --fp16</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108082">Bug 108082</a> - warning: unknown warning option '-Wno-format-truncation' [-Wunknown-warning-option]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108560">Bug 108560</a> - Mesa 32 is built without sse</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andre Heider (1):</p>
+<ul>
+  <li>st/nine: fix stack corruption due to ABI mismatch</li>
+</ul>
+
+<p>Andrii Simiklit (1):</p>
+<ul>
+  <li>i965/batch: don't ignore the 'brw_new_batch' call for a 'new batch'</li>
+</ul>
+
+<p>Dylan Baker (2):</p>
+<ul>
+  <li>meson: link gallium nine with pthreads</li>
+  <li>meson: fix libatomic tests</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>egl/glvnd: correctly report errors when vendor cannot be found</li>
+  <li>m4: add Werror when checking for compiler flags</li>
+</ul>
+
+<p>Eric Engestrom (6):</p>
+<ul>
+  <li>svga: add missing meson build dependency</li>
+  <li>clover: add missing meson build dependency</li>
+  <li>wsi/wayland: use proper VkResult type</li>
+  <li>wsi/wayland: only finish() a successfully init()ed display</li>
+  <li>configure: install KHR/khrplatform.h when needed</li>
+  <li>meson: install KHR/khrplatform.h when needed</li>
+</ul>
+
+<p>Gert Wollny (1):</p>
+<ul>
+  <li>virgl/vtest-winsys: Use virgl version of bind flags</li>
+</ul>
+
+<p>Jonathan Gray (1):</p>
+<ul>
+  <li>intel/tools: include stdarg.h in error2aub</li>
+</ul>
+
+<p>Juan A. Suarez Romero (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.2.4</li>
+  <li>cherry-ignore: add explicit 18.3 only nominations</li>
+  <li>cherry-ignore: i965/batch: avoid reverting batch buffer if saved state is an empty</li>
+  <li>Update version to 18.2.5</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>anv/android: mark gralloc allocated BOs as external</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>ac: fix ac_build_fdiv for f64</li>
+  <li>st/va: fix incorrect use of resource_destroy</li>
+  <li>include: update GL &amp; GLES headers (v2)</li>
+</ul>
+
+<p>Matt Turner (2):</p>
+<ul>
+  <li>util/ralloc: Switch from DEBUG to NDEBUG</li>
+  <li>util/ralloc: Make sizeof(linear_header) a multiple of 8</li>
+</ul>
+
+<p>Olivier Fourdan (1):</p>
+<ul>
+  <li>wayland/egl: Resize EGL surface on update buffer for swrast</li>
+</ul>
+
+<p>Rhys Perry (1):</p>
+<ul>
+  <li>glsl_to_tgsi: don't create 64-bit integer MAD/FMA</li>
+</ul>
+
+<p>Samuel Pitoiset (2):</p>
+<ul>
+  <li>radv: disable conditional rendering for vkCmdCopyQueryPoolResults()</li>
+  <li>radv: only expose VK_SUBGROUP_FEATURE_ARITHMETIC_BIT for VI+</li>
+</ul>
+
+<p>Sergii Romantsov (1):</p>
+<ul>
+  <li>autotools: library-dependency when no sse and 32-bit</li>
+</ul>
+
+<p>Timothy Arceri (4):</p>
+<ul>
+  <li>st/mesa: calculate buffer size correctly for packed uniforms</li>
+  <li>st/glsl_to_nir: fix next_stage gathering</li>
+  <li>nir: add glsl_type_is_integer() helper</li>
+  <li>nir: don't pack varyings ints with floats unless flat</li>
+</ul>
+
+<p>Vadym Shovkoplias (1):</p>
+<ul>
+  <li>glsl/linker: Fix out variables linking during single stage</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>r600/sb: Fix constant logical operand in assert.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/include/GL/glcorearb.h b/include/GL/glcorearb.h
index a78bbb6e182..3cf945c8b20 100644
--- a/include/GL/glcorearb.h
+++ b/include/GL/glcorearb.h
@@ -1,12 +1,12 @@
-#ifndef __glcorearb_h_
-#define __glcorearb_h_ 1
+#ifndef __gl_glcorearb_h_
+#define __gl_glcorearb_h_ 1
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /*
-** Copyright (c) 2013-2017 The Khronos Group Inc.
+** Copyright (c) 2013-2018 The Khronos Group Inc.
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a
 ** copy of this software and/or associated documentation files (the
@@ -306,7 +306,7 @@ typedef void (APIENTRYP PFNGLGETTEXPARAMETERIVPROC) (GLenum target, GLenum pname
 typedef void (APIENTRYP PFNGLGETTEXLEVELPARAMETERFVPROC) (GLenum target, GLint level, GLenum pname, GLfloat *params);
 typedef void (APIENTRYP PFNGLGETTEXLEVELPARAMETERIVPROC) (GLenum target, GLint level, GLenum pname, GLint *params);
 typedef GLboolean (APIENTRYP PFNGLISENABLEDPROC) (GLenum cap);
-typedef void (APIENTRYP PFNGLDEPTHRANGEPROC) (GLdouble near, GLdouble far);
+typedef void (APIENTRYP PFNGLDEPTHRANGEPROC) (GLdouble n, GLdouble f);
 typedef void (APIENTRYP PFNGLVIEWPORTPROC) (GLint x, GLint y, GLsizei width, GLsizei height);
 #ifdef GL_GLEXT_PROTOTYPES
 GLAPI void APIENTRY glCullFace (GLenum mode);
@@ -355,7 +355,7 @@ GLAPI void APIENTRY glGetTexParameteriv (GLenum target, GLenum pname, GLint *par
 GLAPI void APIENTRY glGetTexLevelParameterfv (GLenum target, GLint level, GLenum pname, GLfloat *params);
 GLAPI void APIENTRY glGetTexLevelParameteriv (GLenum target, GLint level, GLenum pname, GLint *params);
 GLAPI GLboolean APIENTRY glIsEnabled (GLenum cap);
-GLAPI void APIENTRY glDepthRange (GLdouble near, GLdouble far);
+GLAPI void APIENTRY glDepthRange (GLdouble n, GLdouble f);
 GLAPI void APIENTRY glViewport (GLint x, GLint y, GLsizei width, GLsizei height);
 #endif
 #endif /* GL_VERSION_1_0 */
@@ -613,9 +613,9 @@ GLAPI void APIENTRY glBlendEquation (GLenum mode);
 
 #ifndef GL_VERSION_1_5
 #define GL_VERSION_1_5 1
-#include <stddef.h>
-typedef ptrdiff_t GLsizeiptr;
-typedef ptrdiff_t GLintptr;
+#include <KHR/khrplatform.h>
+typedef khronos_ssize_t GLsizeiptr;
+typedef khronos_intptr_t GLintptr;
 #define GL_BUFFER_SIZE                    0x8764
 #define GL_BUFFER_USAGE                   0x8765
 #define GL_QUERY_COUNTER_BITS             0x8864
@@ -3958,6 +3958,22 @@ GLAPI void APIENTRY glMaxShaderCompilerThreadsKHR (GLuint count);
 #define GL_KHR_texture_compression_astc_sliced_3d 1
 #endif /* GL_KHR_texture_compression_astc_sliced_3d */
 
+#ifndef GL_AMD_framebuffer_multisample_advanced
+#define GL_AMD_framebuffer_multisample_advanced 1
+#define GL_RENDERBUFFER_STORAGE_SAMPLES_AMD 0x91B2
+#define GL_MAX_COLOR_FRAMEBUFFER_SAMPLES_AMD 0x91B3
+#define GL_MAX_COLOR_FRAMEBUFFER_STORAGE_SAMPLES_AMD 0x91B4
+#define GL_MAX_DEPTH_STENCIL_FRAMEBUFFER_SAMPLES_AMD 0x91B5
+#define GL_NUM_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B6
+#define GL_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B7
+typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC) (GLenum target, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC) (GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glRenderbufferStorageMultisampleAdvancedAMD (GLenum target, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
+GLAPI void APIENTRY glNamedRenderbufferStorageMultisampleAdvancedAMD (GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
+#endif
+#endif /* GL_AMD_framebuffer_multisample_advanced */
+
 #ifndef GL_AMD_performance_monitor
 #define GL_AMD_performance_monitor 1
 #define GL_COUNTER_TYPE_AMD               0x8BC0
@@ -4001,6 +4017,17 @@ GLAPI void APIENTRY glGetPerfMonitorCounterDataAMD (GLuint monitor, GLenum pname
 #define GL_RGB_RAW_422_APPLE              0x8A51
 #endif /* GL_APPLE_rgb_422 */
 
+#ifndef GL_EXT_EGL_image_storage
+#define GL_EXT_EGL_image_storage 1
+typedef void *GLeglImageOES;
+typedef void (APIENTRYP PFNGLEGLIMAGETARGETTEXSTORAGEEXTPROC) (GLenum target, GLeglImageOES image, const GLint* attrib_list);
+typedef void (APIENTRYP PFNGLEGLIMAGETARGETTEXTURESTORAGEEXTPROC) (GLuint texture, GLeglImageOES image, const GLint* attrib_list);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glEGLImageTargetTexStorageEXT (GLenum target, GLeglImageOES image, const GLint* attrib_list);
+GLAPI void APIENTRY glEGLImageTargetTextureStorageEXT (GLuint texture, GLeglImageOES image, const GLint* attrib_list);
+#endif
+#endif /* GL_EXT_EGL_image_storage */
+
 #ifndef GL_EXT_debug_label
 #define GL_EXT_debug_label 1
 #define GL_PROGRAM_PIPELINE_OBJECT_EXT    0x8A4F
@@ -4598,6 +4625,19 @@ GLAPI GLuint APIENTRY glCreateShaderProgramEXT (GLenum type, const GLchar *strin
 #endif
 #endif /* GL_EXT_separate_shader_objects */
 
+#ifndef GL_EXT_shader_framebuffer_fetch
+#define GL_EXT_shader_framebuffer_fetch 1
+#define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52
+#endif /* GL_EXT_shader_framebuffer_fetch */
+
+#ifndef GL_EXT_shader_framebuffer_fetch_non_coherent
+#define GL_EXT_shader_framebuffer_fetch_non_coherent 1
+typedef void (APIENTRYP PFNGLFRAMEBUFFERFETCHBARRIEREXTPROC) (void);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFramebufferFetchBarrierEXT (void);
+#endif
+#endif /* GL_EXT_shader_framebuffer_fetch_non_coherent */
+
 #ifndef GL_EXT_shader_integer_mix
 #define GL_EXT_shader_integer_mix 1
 #endif /* GL_EXT_shader_integer_mix */
@@ -4612,6 +4652,8 @@ GLAPI GLuint APIENTRY glCreateShaderProgramEXT (GLenum type, const GLchar *strin
 
 #ifndef GL_EXT_texture_filter_minmax
 #define GL_EXT_texture_filter_minmax 1
+#define GL_TEXTURE_REDUCTION_MODE_EXT     0x9366
+#define GL_WEIGHTED_AVERAGE_EXT           0x9367
 #endif /* GL_EXT_texture_filter_minmax */
 
 #ifndef GL_EXT_texture_sRGB_decode
@@ -4635,6 +4677,11 @@ GLAPI void APIENTRY glWindowRectanglesEXT (GLenum mode, GLsizei count, const GLi
 #endif
 #endif /* GL_EXT_window_rectangles */
 
+#ifndef GL_INTEL_blackhole_render
+#define GL_INTEL_blackhole_render 1
+#define GL_BLACKHOLE_RENDER_INTEL         0x83FC
+#endif /* GL_INTEL_blackhole_render */
+
 #ifndef GL_INTEL_conservative_rasterization
 #define GL_INTEL_conservative_rasterization 1
 #define GL_CONSERVATIVE_RASTERIZATION_INTEL 0x83FE
@@ -4677,7 +4724,7 @@ typedef void (APIENTRYP PFNGLENDPERFQUERYINTELPROC) (GLuint queryHandle);
 typedef void (APIENTRYP PFNGLGETFIRSTPERFQUERYIDINTELPROC) (GLuint *queryId);
 typedef void (APIENTRYP PFNGLGETNEXTPERFQUERYIDINTELPROC) (GLuint queryId, GLuint *nextQueryId);
 typedef void (APIENTRYP PFNGLGETPERFCOUNTERINFOINTELPROC) (GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue);
-typedef void (APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten);
+typedef void (APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, GLuint flags, GLsizei dataSize, void *data, GLuint *bytesWritten);
 typedef void (APIENTRYP PFNGLGETPERFQUERYIDBYNAMEINTELPROC) (GLchar *queryName, GLuint *queryId);
 typedef void (APIENTRYP PFNGLGETPERFQUERYINFOINTELPROC) (GLuint queryId, GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, GLuint *noInstances, GLuint *capsMask);
 #ifdef GL_GLEXT_PROTOTYPES
@@ -4688,7 +4735,7 @@ GLAPI void APIENTRY glEndPerfQueryINTEL (GLuint queryHandle);
 GLAPI void APIENTRY glGetFirstPerfQueryIdINTEL (GLuint *queryId);
 GLAPI void APIENTRY glGetNextPerfQueryIdINTEL (GLuint queryId, GLuint *nextQueryId);
 GLAPI void APIENTRY glGetPerfCounterInfoINTEL (GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue);
-GLAPI void APIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten);
+GLAPI void APIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint flags, GLsizei dataSize, void *data, GLuint *bytesWritten);
 GLAPI void APIENTRY glGetPerfQueryIdByNameINTEL (GLchar *queryName, GLuint *queryId);
 GLAPI void APIENTRY glGetPerfQueryInfoINTEL (GLuint queryId, GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, GLuint *noInstances, GLuint *capsMask);
 #endif
@@ -4923,6 +4970,11 @@ GLAPI void APIENTRY glConservativeRasterParameterfNV (GLenum pname, GLfloat valu
 #endif
 #endif /* GL_NV_conservative_raster_dilate */
 
+#ifndef GL_NV_conservative_raster_pre_snap
+#define GL_NV_conservative_raster_pre_snap 1
+#define GL_CONSERVATIVE_RASTER_MODE_PRE_SNAP_NV 0x9550
+#endif /* GL_NV_conservative_raster_pre_snap */
+
 #ifndef GL_NV_conservative_raster_pre_snap_triangles
 #define GL_NV_conservative_raster_pre_snap_triangles 1
 #define GL_CONSERVATIVE_RASTER_MODE_NV    0x954D
@@ -4934,6 +4986,10 @@ GLAPI void APIENTRY glConservativeRasterParameteriNV (GLenum pname, GLint param)
 #endif
 #endif /* GL_NV_conservative_raster_pre_snap_triangles */
 
+#ifndef GL_NV_conservative_raster_underestimation
+#define GL_NV_conservative_raster_underestimation 1
+#endif /* GL_NV_conservative_raster_underestimation */
+
 #ifndef GL_NV_draw_vulkan_image
 #define GL_NV_draw_vulkan_image 1
 typedef void (APIENTRY  *GLVULKANPROCNV)(void);
diff --git a/include/GL/glext.h b/include/GL/glext.h
index 75fd1f61185..181df28d3bb 100644
--- a/include/GL/glext.h
+++ b/include/GL/glext.h
@@ -1,12 +1,12 @@
-#ifndef __glext_h_
-#define __glext_h_ 1
+#ifndef __gl_glext_h_
+#define __gl_glext_h_ 1
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /*
-** Copyright (c) 2013-2017 The Khronos Group Inc.
+** Copyright (c) 2013-2018 The Khronos Group Inc.
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a
 ** copy of this software and/or associated documentation files (the
@@ -51,7 +51,7 @@ extern "C" {
 #define GLAPI extern
 #endif
 
-#define GL_GLEXT_VERSION 20171010
+#define GL_GLEXT_VERSION 20180725
 
 /* Generated C header for:
  * API: gl
@@ -464,9 +464,9 @@ GLAPI void APIENTRY glBlendEquation (GLenum mode);
 
 #ifndef GL_VERSION_1_5
 #define GL_VERSION_1_5 1
-#include <stddef.h>
-typedef ptrdiff_t GLsizeiptr;
-typedef ptrdiff_t GLintptr;
+#include <KHR/khrplatform.h>
+typedef khronos_ssize_t GLsizeiptr;
+typedef khronos_intptr_t GLintptr;
 #define GL_BUFFER_SIZE                    0x8764
 #define GL_BUFFER_USAGE                   0x8765
 #define GL_QUERY_COUNTER_BITS             0x8864
@@ -4718,6 +4718,7 @@ GLAPI void APIENTRY glVertexBlendARB (GLint count);
 
 #ifndef GL_ARB_vertex_buffer_object
 #define GL_ARB_vertex_buffer_object 1
+#include <stddef.h>
 typedef ptrdiff_t GLsizeiptrARB;
 typedef ptrdiff_t GLintptrARB;
 #define GL_BUFFER_SIZE_ARB                0x8764
@@ -5445,6 +5446,22 @@ GLAPI void APIENTRY glBlendEquationSeparateIndexedAMD (GLuint buf, GLenum modeRG
 #endif
 #endif /* GL_AMD_draw_buffers_blend */
 
+#ifndef GL_AMD_framebuffer_multisample_advanced
+#define GL_AMD_framebuffer_multisample_advanced 1
+#define GL_RENDERBUFFER_STORAGE_SAMPLES_AMD 0x91B2
+#define GL_MAX_COLOR_FRAMEBUFFER_SAMPLES_AMD 0x91B3
+#define GL_MAX_COLOR_FRAMEBUFFER_STORAGE_SAMPLES_AMD 0x91B4
+#define GL_MAX_DEPTH_STENCIL_FRAMEBUFFER_SAMPLES_AMD 0x91B5
+#define GL_NUM_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B6
+#define GL_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B7
+typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC) (GLenum target, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC) (GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glRenderbufferStorageMultisampleAdvancedAMD (GLenum target, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
+GLAPI void APIENTRY glNamedRenderbufferStorageMultisampleAdvancedAMD (GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
+#endif
+#endif /* GL_AMD_framebuffer_multisample_advanced */
+
 #ifndef GL_AMD_framebuffer_sample_positions
 #define GL_AMD_framebuffer_sample_positions 1
 #define GL_SUBSAMPLE_DISTANCE_AMD         0x883F
@@ -5709,6 +5726,10 @@ GLAPI void APIENTRY glSetMultisamplefvAMD (GLenum pname, GLuint index, const GLf
 #define GL_AMD_shader_explicit_vertex_parameter 1
 #endif /* GL_AMD_shader_explicit_vertex_parameter */
 
+#ifndef GL_AMD_shader_gpu_shader_half_float_fetch
+#define GL_AMD_shader_gpu_shader_half_float_fetch 1
+#endif /* GL_AMD_shader_gpu_shader_half_float_fetch */
+
 #ifndef GL_AMD_shader_image_load_store_lod
 #define GL_AMD_shader_image_load_store_lod 1
 #endif /* GL_AMD_shader_image_load_store_lod */
@@ -6456,6 +6477,17 @@ GLAPI void APIENTRY glVertexBlendEnvfATI (GLenum pname, GLfloat param);
 #define GL_422_REV_AVERAGE_EXT            0x80CF
 #endif /* GL_EXT_422_pixels */
 
+#ifndef GL_EXT_EGL_image_storage
+#define GL_EXT_EGL_image_storage 1
+typedef void *GLeglImageOES;
+typedef void (APIENTRYP PFNGLEGLIMAGETARGETTEXSTORAGEEXTPROC) (GLenum target, GLeglImageOES image, const GLint* attrib_list);
+typedef void (APIENTRYP PFNGLEGLIMAGETARGETTEXTURESTORAGEEXTPROC) (GLuint texture, GLeglImageOES image, const GLint* attrib_list);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glEGLImageTargetTexStorageEXT (GLenum target, GLeglImageOES image, const GLint* attrib_list);
+GLAPI void APIENTRY glEGLImageTargetTextureStorageEXT (GLuint texture, GLeglImageOES image, const GLint* attrib_list);
+#endif
+#endif /* GL_EXT_EGL_image_storage */
+
 #ifndef GL_EXT_abgr
 #define GL_EXT_abgr 1
 #define GL_ABGR_EXT                       0x8000
@@ -7994,6 +8026,8 @@ GLAPI void APIENTRY glSecondaryColorPointerEXT (GLint size, GLenum type, GLsizei
 #define GL_LAYOUT_SHADER_READ_ONLY_EXT    0x9591
 #define GL_LAYOUT_TRANSFER_SRC_EXT        0x9592
 #define GL_LAYOUT_TRANSFER_DST_EXT        0x9593
+#define GL_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_EXT 0x9530
+#define GL_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_EXT 0x9531
 typedef void (APIENTRYP PFNGLGENSEMAPHORESEXTPROC) (GLsizei n, GLuint *semaphores);
 typedef void (APIENTRYP PFNGLDELETESEMAPHORESEXTPROC) (GLsizei n, const GLuint *semaphores);
 typedef GLboolean (APIENTRYP PFNGLISSEMAPHOREEXTPROC) (GLuint semaphore);
@@ -8052,6 +8086,19 @@ GLAPI GLuint APIENTRY glCreateShaderProgramEXT (GLenum type, const GLchar *strin
 #define GL_SEPARATE_SPECULAR_COLOR_EXT    0x81FA
 #endif /* GL_EXT_separate_specular_color */
 
+#ifndef GL_EXT_shader_framebuffer_fetch
+#define GL_EXT_shader_framebuffer_fetch 1
+#define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52
+#endif /* GL_EXT_shader_framebuffer_fetch */
+
+#ifndef GL_EXT_shader_framebuffer_fetch_non_coherent
+#define GL_EXT_shader_framebuffer_fetch_non_coherent 1
+typedef void (APIENTRYP PFNGLFRAMEBUFFERFETCHBARRIEREXTPROC) (void);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFramebufferFetchBarrierEXT (void);
+#endif
+#endif /* GL_EXT_shader_framebuffer_fetch_non_coherent */
+
 #ifndef GL_EXT_shader_image_load_formatted
 #define GL_EXT_shader_image_load_formatted 1
 #endif /* GL_EXT_shader_image_load_formatted */
@@ -8352,6 +8399,8 @@ GLAPI void APIENTRY glTexBufferEXT (GLenum target, GLenum internalformat, GLuint
 
 #ifndef GL_EXT_texture_filter_minmax
 #define GL_EXT_texture_filter_minmax 1
+#define GL_TEXTURE_REDUCTION_MODE_EXT     0x9366
+#define GL_WEIGHTED_AVERAGE_EXT           0x9367
 #endif /* GL_EXT_texture_filter_minmax */
 
 #ifndef GL_EXT_texture_integer
@@ -9099,6 +9148,11 @@ GLAPI void APIENTRY glBlendFuncSeparateINGR (GLenum sfactorRGB, GLenum dfactorRG
 #define GL_INTERLACE_READ_INGR            0x8568
 #endif /* GL_INGR_interlace_read */
 
+#ifndef GL_INTEL_blackhole_render
+#define GL_INTEL_blackhole_render 1
+#define GL_BLACKHOLE_RENDER_INTEL         0x83FC
+#endif /* GL_INTEL_blackhole_render */
+
 #ifndef GL_INTEL_conservative_rasterization
 #define GL_INTEL_conservative_rasterization 1
 #define GL_CONSERVATIVE_RASTERIZATION_INTEL 0x83FE
@@ -9180,7 +9234,7 @@ typedef void (APIENTRYP PFNGLENDPERFQUERYINTELPROC) (GLuint queryHandle);
 typedef void (APIENTRYP PFNGLGETFIRSTPERFQUERYIDINTELPROC) (GLuint *queryId);
 typedef void (APIENTRYP PFNGLGETNEXTPERFQUERYIDINTELPROC) (GLuint queryId, GLuint *nextQueryId);
 typedef void (APIENTRYP PFNGLGETPERFCOUNTERINFOINTELPROC) (GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue);
-typedef void (APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten);
+typedef void (APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, GLuint flags, GLsizei dataSize, void *data, GLuint *bytesWritten);
 typedef void (APIENTRYP PFNGLGETPERFQUERYIDBYNAMEINTELPROC) (GLchar *queryName, GLuint *queryId);
 typedef void (APIENTRYP PFNGLGETPERFQUERYINFOINTELPROC) (GLuint queryId, GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, GLuint *noInstances, GLuint *capsMask);
 #ifdef GL_GLEXT_PROTOTYPES
@@ -9191,7 +9245,7 @@ GLAPI void APIENTRY glEndPerfQueryINTEL (GLuint queryHandle);
 GLAPI void APIENTRY glGetFirstPerfQueryIdINTEL (GLuint *queryId);
 GLAPI void APIENTRY glGetNextPerfQueryIdINTEL (GLuint queryId, GLuint *nextQueryId);
 GLAPI void APIENTRY glGetPerfCounterInfoINTEL (GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue);
-GLAPI void APIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten);
+GLAPI void APIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint flags, GLsizei dataSize, void *data, GLuint *bytesWritten);
 GLAPI void APIENTRY glGetPerfQueryIdByNameINTEL (GLchar *queryName, GLuint *queryId);
 GLAPI void APIENTRY glGetPerfQueryInfoINTEL (GLuint queryId, GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, GLuint *noInstances, GLuint *capsMask);
 #endif
@@ -9583,6 +9637,11 @@ GLAPI void APIENTRY glConservativeRasterParameterfNV (GLenum pname, GLfloat valu
 #endif
 #endif /* GL_NV_conservative_raster_dilate */
 
+#ifndef GL_NV_conservative_raster_pre_snap
+#define GL_NV_conservative_raster_pre_snap 1
+#define GL_CONSERVATIVE_RASTER_MODE_PRE_SNAP_NV 0x9550
+#endif /* GL_NV_conservative_raster_pre_snap */
+
 #ifndef GL_NV_conservative_raster_pre_snap_triangles
 #define GL_NV_conservative_raster_pre_snap_triangles 1
 #define GL_CONSERVATIVE_RASTER_MODE_NV    0x954D
@@ -9594,6 +9653,10 @@ GLAPI void APIENTRY glConservativeRasterParameteriNV (GLenum pname, GLint param)
 #endif
 #endif /* GL_NV_conservative_raster_pre_snap_triangles */
 
+#ifndef GL_NV_conservative_raster_underestimation
+#define GL_NV_conservative_raster_underestimation 1
+#endif /* GL_NV_conservative_raster_underestimation */
+
 #ifndef GL_NV_copy_depth_to_color
 #define GL_NV_copy_depth_to_color 1
 #define GL_DEPTH_STENCIL_TO_RGBA_NV       0x886E
@@ -9902,7 +9965,7 @@ GLAPI void APIENTRY glFramebufferTextureFaceEXT (GLenum target, GLenum attachmen
 #define GL_PER_GPU_STORAGE_NV             0x9548
 #define GL_MULTICAST_PROGRAMMABLE_SAMPLE_LOCATION_NV 0x9549
 typedef void (APIENTRYP PFNGLRENDERGPUMASKNVPROC) (GLbitfield mask);
-typedef void (APIENTRYP PFNGLMULTICASTBUFFERSUBDATANVPROC) (GLbitfield gpuMask, GLuint buffer, GLintptr offset, GLsizeiptr size, const GLvoid *data);
+typedef void (APIENTRYP PFNGLMULTICASTBUFFERSUBDATANVPROC) (GLbitfield gpuMask, GLuint buffer, GLintptr offset, GLsizeiptr size, const void *data);
 typedef void (APIENTRYP PFNGLMULTICASTCOPYBUFFERSUBDATANVPROC) (GLuint readGpu, GLbitfield writeGpuMask, GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
 typedef void (APIENTRYP PFNGLMULTICASTCOPYIMAGESUBDATANVPROC) (GLuint srcGpu, GLbitfield dstGpuMask, GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth);
 typedef void (APIENTRYP PFNGLMULTICASTBLITFRAMEBUFFERNVPROC) (GLuint srcGpu, GLuint dstGpu, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
@@ -9915,7 +9978,7 @@ typedef void (APIENTRYP PFNGLMULTICASTGETQUERYOBJECTI64VNVPROC) (GLuint gpu, GLu
 typedef void (APIENTRYP PFNGLMULTICASTGETQUERYOBJECTUI64VNVPROC) (GLuint gpu, GLuint id, GLenum pname, GLuint64 *params);
 #ifdef GL_GLEXT_PROTOTYPES
 GLAPI void APIENTRY glRenderGpuMaskNV (GLbitfield mask);
-GLAPI void APIENTRY glMulticastBufferSubDataNV (GLbitfield gpuMask, GLuint buffer, GLintptr offset, GLsizeiptr size, const GLvoid *data);
+GLAPI void APIENTRY glMulticastBufferSubDataNV (GLbitfield gpuMask, GLuint buffer, GLintptr offset, GLsizeiptr size, const void *data);
 GLAPI void APIENTRY glMulticastCopyBufferSubDataNV (GLuint readGpu, GLbitfield writeGpuMask, GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
 GLAPI void APIENTRY glMulticastCopyImageSubDataNV (GLuint srcGpu, GLbitfield dstGpuMask, GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth);
 GLAPI void APIENTRY glMulticastBlitFramebufferNV (GLuint srcGpu, GLuint dstGpu, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
diff --git a/include/GL/glxext.h b/include/GL/glxext.h
index 0f60a380c21..4c984ef4b89 100644
--- a/include/GL/glxext.h
+++ b/include/GL/glxext.h
@@ -1,12 +1,12 @@
-#ifndef __glxext_h_
-#define __glxext_h_ 1
+#ifndef __glx_glxext_h_
+#define __glx_glxext_h_ 1
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /*
-** Copyright (c) 2013-2017 The Khronos Group Inc.
+** Copyright (c) 2013-2018 The Khronos Group Inc.
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a
 ** copy of this software and/or associated documentation files (the
@@ -34,7 +34,7 @@ extern "C" {
 **   https://github.com/KhronosGroup/OpenGL-Registry
 */
 
-#define GLX_GLXEXT_VERSION 20170728
+#define GLX_GLXEXT_VERSION 20180525
 
 /* Generated C header for:
  * API: glx
@@ -325,6 +325,10 @@ void glXFreeContextEXT (Display *dpy, GLXContext context);
 #define GLX_VENDOR_NAMES_EXT              0x20F6
 #endif /* GLX_EXT_libglvnd */
 
+#ifndef GLX_EXT_no_config_context
+#define GLX_EXT_no_config_context 1
+#endif /* GLX_EXT_no_config_context */
+
 #ifndef GLX_EXT_stereo_tree
 #define GLX_EXT_stereo_tree 1
 typedef struct {
@@ -503,6 +507,16 @@ Bool glXSet3DfxModeMESA (int mode);
 #endif
 #endif /* GLX_MESA_set_3dfx_mode */
 
+#ifndef GLX_MESA_swap_control
+#define GLX_MESA_swap_control 1
+typedef int ( *PFNGLXGETSWAPINTERVALMESAPROC) (void);
+typedef int ( *PFNGLXSWAPINTERVALMESAPROC) (unsigned int interval);
+#ifdef GLX_GLXEXT_PROTOTYPES
+int glXGetSwapIntervalMESA (void);
+int glXSwapIntervalMESA (unsigned int interval);
+#endif
+#endif /* GLX_MESA_swap_control */
+
 #ifndef GLX_NV_copy_buffer
 #define GLX_NV_copy_buffer 1
 typedef void ( *PFNGLXCOPYBUFFERSUBDATANVPROC) (Display *dpy, GLXContext readCtx, GLXContext writeCtx, GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h
index c32cdd3767a..08d63184d1d 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -1333,6 +1333,10 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FOURCC_YVU422	0x36315659
 #define __DRI_IMAGE_FOURCC_YVU444	0x34325659
 
+#define __DRI_IMAGE_FOURCC_P010     0x30313050
+#define __DRI_IMAGE_FOURCC_P012     0x32313050
+#define __DRI_IMAGE_FOURCC_P016     0x36313050
+
 /**
  * Queryable on images created by createImageFromNames.
  *
diff --git a/include/GLES2/gl2.h b/include/GLES2/gl2.h
index 8ba907c892c..b4051e5a7c5 100644
--- a/include/GLES2/gl2.h
+++ b/include/GLES2/gl2.h
@@ -1,12 +1,12 @@
-#ifndef __gl2_h_
-#define __gl2_h_ 1
+#ifndef __gles2_gl2_h_
+#define __gles2_gl2_h_ 1
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /*
-** Copyright (c) 2013-2017 The Khronos Group Inc.
+** Copyright (c) 2013-2018 The Khronos Group Inc.
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a
 ** copy of this software and/or associated documentation files (the
@@ -44,7 +44,7 @@ extern "C" {
 #define GL_GLES_PROTOTYPES 1
 #endif
 
-/* Generated on date 20170606 */
+/* Generated on date 20180725 */
 
 /* Generated C header for:
  * API: gles2
diff --git a/include/GLES2/gl2ext.h b/include/GLES2/gl2ext.h
index 0a93bfb8652..559173dee45 100644
--- a/include/GLES2/gl2ext.h
+++ b/include/GLES2/gl2ext.h
@@ -1,12 +1,12 @@
-#ifndef __gl2ext_h_
-#define __gl2ext_h_ 1
+#ifndef __gles2_gl2ext_h_
+#define __gles2_gl2ext_h_ 1
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /*
-** Copyright (c) 2013-2017 The Khronos Group Inc.
+** Copyright (c) 2013-2018 The Khronos Group Inc.
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a
 ** copy of this software and/or associated documentation files (the
@@ -38,7 +38,7 @@ extern "C" {
 #define GL_APIENTRYP GL_APIENTRY*
 #endif
 
-/* Generated on date 20170804 */
+/* Generated on date 20180725 */
 
 /* Generated C header for:
  * API: gles2
@@ -159,6 +159,16 @@ GL_APICALL void GL_APIENTRY glGetPointervKHR (GLenum pname, void **params);
 #define GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR  0x00000008
 #endif /* GL_KHR_no_error */
 
+#ifndef GL_KHR_parallel_shader_compile
+#define GL_KHR_parallel_shader_compile 1
+#define GL_MAX_SHADER_COMPILER_THREADS_KHR 0x91B0
+#define GL_COMPLETION_STATUS_KHR          0x91B1
+typedef void (GL_APIENTRYP PFNGLMAXSHADERCOMPILERTHREADSKHRPROC) (GLuint count);
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glMaxShaderCompilerThreadsKHR (GLuint count);
+#endif
+#endif /* GL_KHR_parallel_shader_compile */
+
 #ifndef GL_KHR_robust_buffer_access_behavior
 #define GL_KHR_robust_buffer_access_behavior 1
 #endif /* GL_KHR_robust_buffer_access_behavior */
@@ -791,6 +801,22 @@ GL_APICALL void GL_APIENTRY glGetFloati_vOES (GLenum target, GLuint index, GLflo
 #define GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD 0x87EE
 #endif /* GL_AMD_compressed_ATC_texture */
 
+#ifndef GL_AMD_framebuffer_multisample_advanced
+#define GL_AMD_framebuffer_multisample_advanced 1
+#define GL_RENDERBUFFER_STORAGE_SAMPLES_AMD 0x91B2
+#define GL_MAX_COLOR_FRAMEBUFFER_SAMPLES_AMD 0x91B3
+#define GL_MAX_COLOR_FRAMEBUFFER_STORAGE_SAMPLES_AMD 0x91B4
+#define GL_MAX_DEPTH_STENCIL_FRAMEBUFFER_SAMPLES_AMD 0x91B5
+#define GL_NUM_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B6
+#define GL_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B7
+typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC) (GLenum target, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC) (GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisampleAdvancedAMD (GLenum target, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
+GL_APICALL void GL_APIENTRY glNamedRenderbufferStorageMultisampleAdvancedAMD (GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
+#endif
+#endif /* GL_AMD_framebuffer_multisample_advanced */
+
 #ifndef GL_AMD_performance_monitor
 #define GL_AMD_performance_monitor 1
 #define GL_COUNTER_TYPE_AMD               0x8BC0
@@ -1055,6 +1081,16 @@ GL_APICALL void GL_APIENTRY glGetSyncivAPPLE (GLsync sync, GLenum pname, GLsizei
 #define GL_EXT_EGL_image_array 1
 #endif /* GL_EXT_EGL_image_array */
 
+#ifndef GL_EXT_EGL_image_storage
+#define GL_EXT_EGL_image_storage 1
+typedef void (GL_APIENTRYP PFNGLEGLIMAGETARGETTEXSTORAGEEXTPROC) (GLenum target, GLeglImageOES image, const GLint* attrib_list);
+typedef void (GL_APIENTRYP PFNGLEGLIMAGETARGETTEXTURESTORAGEEXTPROC) (GLuint texture, GLeglImageOES image, const GLint* attrib_list);
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glEGLImageTargetTexStorageEXT (GLenum target, GLeglImageOES image, const GLint* attrib_list);
+GL_APICALL void GL_APIENTRY glEGLImageTargetTextureStorageEXT (GLuint texture, GLeglImageOES image, const GLint* attrib_list);
+#endif
+#endif /* GL_EXT_EGL_image_storage */
+
 #ifndef GL_EXT_YUV_target
 #define GL_EXT_YUV_target 1
 #define GL_SAMPLER_EXTERNAL_2D_Y2Y_EXT    0x8BE7
@@ -1126,6 +1162,20 @@ GL_APICALL void GL_APIENTRY glClearTexSubImageEXT (GLuint texture, GLint level,
 #endif
 #endif /* GL_EXT_clear_texture */
 
+#ifndef GL_EXT_clip_control
+#define GL_EXT_clip_control 1
+#define GL_LOWER_LEFT_EXT                 0x8CA1
+#define GL_UPPER_LEFT_EXT                 0x8CA2
+#define GL_NEGATIVE_ONE_TO_ONE_EXT        0x935E
+#define GL_ZERO_TO_ONE_EXT                0x935F
+#define GL_CLIP_ORIGIN_EXT                0x935C
+#define GL_CLIP_DEPTH_MODE_EXT            0x935D
+typedef void (GL_APIENTRYP PFNGLCLIPCONTROLEXTPROC) (GLenum origin, GLenum depth);
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glClipControlEXT (GLenum origin, GLenum depth);
+#endif
+#endif /* GL_EXT_clip_control */
+
 #ifndef GL_EXT_clip_cull_distance
 #define GL_EXT_clip_cull_distance 1
 #define GL_MAX_CLIP_DISTANCES_EXT         0x0D32
@@ -1680,6 +1730,8 @@ GL_APICALL void GL_APIENTRY glGetnUniformivEXT (GLuint program, GLint location,
 #define GL_LAYOUT_SHADER_READ_ONLY_EXT    0x9591
 #define GL_LAYOUT_TRANSFER_SRC_EXT        0x9592
 #define GL_LAYOUT_TRANSFER_DST_EXT        0x9593
+#define GL_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_EXT 0x9530
+#define GL_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_EXT 0x9531
 typedef void (GL_APIENTRYP PFNGLGENSEMAPHORESEXTPROC) (GLsizei n, GLuint *semaphores);
 typedef void (GL_APIENTRYP PFNGLDELETESEMAPHORESEXTPROC) (GLsizei n, const GLuint *semaphores);
 typedef GLboolean (GL_APIENTRYP PFNGLISSEMAPHOREEXTPROC) (GLuint semaphore);
@@ -1823,6 +1875,14 @@ GL_APICALL void GL_APIENTRY glProgramUniformMatrix4x3fvEXT (GLuint program, GLin
 #define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52
 #endif /* GL_EXT_shader_framebuffer_fetch */
 
+#ifndef GL_EXT_shader_framebuffer_fetch_non_coherent
+#define GL_EXT_shader_framebuffer_fetch_non_coherent 1
+typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERFETCHBARRIEREXTPROC) (void);
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glFramebufferFetchBarrierEXT (void);
+#endif
+#endif /* GL_EXT_shader_framebuffer_fetch_non_coherent */
+
 #ifndef GL_EXT_shader_group_vote
 #define GL_EXT_shader_group_vote 1
 #endif /* GL_EXT_shader_group_vote */
@@ -2067,12 +2127,24 @@ GL_APICALL void GL_APIENTRY glTexBufferRangeEXT (GLenum target, GLenum internalf
 
 #ifndef GL_EXT_texture_filter_minmax
 #define GL_EXT_texture_filter_minmax 1
+#define GL_TEXTURE_REDUCTION_MODE_EXT     0x9366
+#define GL_WEIGHTED_AVERAGE_EXT           0x9367
 #endif /* GL_EXT_texture_filter_minmax */
 
 #ifndef GL_EXT_texture_format_BGRA8888
 #define GL_EXT_texture_format_BGRA8888 1
 #endif /* GL_EXT_texture_format_BGRA8888 */
 
+#ifndef GL_EXT_texture_format_sRGB_override
+#define GL_EXT_texture_format_sRGB_override 1
+#define GL_TEXTURE_FORMAT_SRGB_OVERRIDE_EXT 0x8FBF
+#endif /* GL_EXT_texture_format_sRGB_override */
+
+#ifndef GL_EXT_texture_mirror_clamp_to_edge
+#define GL_EXT_texture_mirror_clamp_to_edge 1
+#define GL_MIRROR_CLAMP_TO_EDGE_EXT       0x8743
+#endif /* GL_EXT_texture_mirror_clamp_to_edge */
+
 #ifndef GL_EXT_texture_norm16
 #define GL_EXT_texture_norm16 1
 #define GL_R16_EXT                        0x822A
@@ -2275,6 +2347,11 @@ GL_APICALL void GL_APIENTRY glFramebufferTexture2DMultisampleIMG (GLenum target,
 #define GL_CUBIC_MIPMAP_LINEAR_IMG        0x913B
 #endif /* GL_IMG_texture_filter_cubic */
 
+#ifndef GL_INTEL_blackhole_render
+#define GL_INTEL_blackhole_render 1
+#define GL_BLACKHOLE_RENDER_INTEL         0x83FC
+#endif /* GL_INTEL_blackhole_render */
+
 #ifndef GL_INTEL_conservative_rasterization
 #define GL_INTEL_conservative_rasterization 1
 #define GL_CONSERVATIVE_RASTERIZATION_INTEL 0x83FE
@@ -2317,7 +2394,7 @@ typedef void (GL_APIENTRYP PFNGLENDPERFQUERYINTELPROC) (GLuint queryHandle);
 typedef void (GL_APIENTRYP PFNGLGETFIRSTPERFQUERYIDINTELPROC) (GLuint *queryId);
 typedef void (GL_APIENTRYP PFNGLGETNEXTPERFQUERYIDINTELPROC) (GLuint queryId, GLuint *nextQueryId);
 typedef void (GL_APIENTRYP PFNGLGETPERFCOUNTERINFOINTELPROC) (GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue);
-typedef void (GL_APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten);
+typedef void (GL_APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, GLuint flags, GLsizei dataSize, void *data, GLuint *bytesWritten);
 typedef void (GL_APIENTRYP PFNGLGETPERFQUERYIDBYNAMEINTELPROC) (GLchar *queryName, GLuint *queryId);
 typedef void (GL_APIENTRYP PFNGLGETPERFQUERYINFOINTELPROC) (GLuint queryId, GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, GLuint *noInstances, GLuint *capsMask);
 #ifdef GL_GLEXT_PROTOTYPES
@@ -2328,7 +2405,7 @@ GL_APICALL void GL_APIENTRY glEndPerfQueryINTEL (GLuint queryHandle);
 GL_APICALL void GL_APIENTRY glGetFirstPerfQueryIdINTEL (GLuint *queryId);
 GL_APICALL void GL_APIENTRY glGetNextPerfQueryIdINTEL (GLuint queryId, GLuint *nextQueryId);
 GL_APICALL void GL_APIENTRY glGetPerfCounterInfoINTEL (GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue);
-GL_APICALL void GL_APIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten);
+GL_APICALL void GL_APIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint flags, GLsizei dataSize, void *data, GLuint *bytesWritten);
 GL_APICALL void GL_APIENTRY glGetPerfQueryIdByNameINTEL (GLchar *queryName, GLuint *queryId);
 GL_APICALL void GL_APIENTRY glGetPerfQueryInfoINTEL (GLuint queryId, GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, GLuint *noInstances, GLuint *capsMask);
 #endif
@@ -2454,6 +2531,17 @@ GL_APICALL void GL_APIENTRY glBlendBarrierNV (void);
 #define GL_FACTOR_MAX_AMD                 0x901D
 #endif /* GL_NV_blend_minmax_factor */
 
+#ifndef GL_NV_clip_space_w_scaling
+#define GL_NV_clip_space_w_scaling 1
+#define GL_VIEWPORT_POSITION_W_SCALE_NV   0x937C
+#define GL_VIEWPORT_POSITION_W_SCALE_X_COEFF_NV 0x937D
+#define GL_VIEWPORT_POSITION_W_SCALE_Y_COEFF_NV 0x937E
+typedef void (GL_APIENTRYP PFNGLVIEWPORTPOSITIONWSCALENVPROC) (GLuint index, GLfloat xcoeff, GLfloat ycoeff);
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glViewportPositionWScaleNV (GLuint index, GLfloat xcoeff, GLfloat ycoeff);
+#endif
+#endif /* GL_NV_clip_space_w_scaling */
+
 #ifndef GL_NV_conditional_render
 #define GL_NV_conditional_render 1
 #define GL_QUERY_WAIT_NV                  0x8E13
@@ -2480,6 +2568,11 @@ GL_APICALL void GL_APIENTRY glSubpixelPrecisionBiasNV (GLuint xbits, GLuint ybit
 #endif
 #endif /* GL_NV_conservative_raster */
 
+#ifndef GL_NV_conservative_raster_pre_snap
+#define GL_NV_conservative_raster_pre_snap 1
+#define GL_CONSERVATIVE_RASTER_MODE_PRE_SNAP_NV 0x9550
+#endif /* GL_NV_conservative_raster_pre_snap */
+
 #ifndef GL_NV_conservative_raster_pre_snap_triangles
 #define GL_NV_conservative_raster_pre_snap_triangles 1
 #define GL_CONSERVATIVE_RASTER_MODE_NV    0x954D
@@ -2851,6 +2944,7 @@ GL_APICALL void GL_APIENTRY glUniformMatrix4x3fvNV (GLint location, GLsizei coun
 
 #ifndef GL_NV_path_rendering
 #define GL_NV_path_rendering 1
+typedef double GLdouble;
 #define GL_PATH_FORMAT_SVG_NV             0x9070
 #define GL_PATH_FORMAT_PS_NV              0x9071
 #define GL_STANDARD_FONT_NAME_NV          0x9072
@@ -3061,6 +3155,25 @@ typedef GLenum (GL_APIENTRYP PFNGLPATHGLYPHINDEXARRAYNVPROC) (GLuint firstPathNa
 typedef GLenum (GL_APIENTRYP PFNGLPATHMEMORYGLYPHINDEXARRAYNVPROC) (GLuint firstPathName, GLenum fontTarget, GLsizeiptr fontSize, const void *fontData, GLsizei faceIndex, GLuint firstGlyphIndex, GLsizei numGlyphs, GLuint pathParameterTemplate, GLfloat emScale);
 typedef void (GL_APIENTRYP PFNGLPROGRAMPATHFRAGMENTINPUTGENNVPROC) (GLuint program, GLint location, GLenum genMode, GLint components, const GLfloat *coeffs);
 typedef void (GL_APIENTRYP PFNGLGETPROGRAMRESOURCEFVNVPROC) (GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLfloat *params);
+typedef void (GL_APIENTRYP PFNGLMATRIXFRUSTUMEXTPROC) (GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar);
+typedef void (GL_APIENTRYP PFNGLMATRIXLOADIDENTITYEXTPROC) (GLenum mode);
+typedef void (GL_APIENTRYP PFNGLMATRIXLOADTRANSPOSEFEXTPROC) (GLenum mode, const GLfloat *m);
+typedef void (GL_APIENTRYP PFNGLMATRIXLOADTRANSPOSEDEXTPROC) (GLenum mode, const GLdouble *m);
+typedef void (GL_APIENTRYP PFNGLMATRIXLOADFEXTPROC) (GLenum mode, const GLfloat *m);
+typedef void (GL_APIENTRYP PFNGLMATRIXLOADDEXTPROC) (GLenum mode, const GLdouble *m);
+typedef void (GL_APIENTRYP PFNGLMATRIXMULTTRANSPOSEFEXTPROC) (GLenum mode, const GLfloat *m);
+typedef void (GL_APIENTRYP PFNGLMATRIXMULTTRANSPOSEDEXTPROC) (GLenum mode, const GLdouble *m);
+typedef void (GL_APIENTRYP PFNGLMATRIXMULTFEXTPROC) (GLenum mode, const GLfloat *m);
+typedef void (GL_APIENTRYP PFNGLMATRIXMULTDEXTPROC) (GLenum mode, const GLdouble *m);
+typedef void (GL_APIENTRYP PFNGLMATRIXORTHOEXTPROC) (GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar);
+typedef void (GL_APIENTRYP PFNGLMATRIXPOPEXTPROC) (GLenum mode);
+typedef void (GL_APIENTRYP PFNGLMATRIXPUSHEXTPROC) (GLenum mode);
+typedef void (GL_APIENTRYP PFNGLMATRIXROTATEFEXTPROC) (GLenum mode, GLfloat angle, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GL_APIENTRYP PFNGLMATRIXROTATEDEXTPROC) (GLenum mode, GLdouble angle, GLdouble x, GLdouble y, GLdouble z);
+typedef void (GL_APIENTRYP PFNGLMATRIXSCALEFEXTPROC) (GLenum mode, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GL_APIENTRYP PFNGLMATRIXSCALEDEXTPROC) (GLenum mode, GLdouble x, GLdouble y, GLdouble z);
+typedef void (GL_APIENTRYP PFNGLMATRIXTRANSLATEFEXTPROC) (GLenum mode, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GL_APIENTRYP PFNGLMATRIXTRANSLATEDEXTPROC) (GLenum mode, GLdouble x, GLdouble y, GLdouble z);
 #ifdef GL_GLEXT_PROTOTYPES
 GL_APICALL GLuint GL_APIENTRY glGenPathsNV (GLsizei range);
 GL_APICALL void GL_APIENTRY glDeletePathsNV (GLuint path, GLsizei range);
@@ -3119,6 +3232,25 @@ GL_APICALL GLenum GL_APIENTRY glPathGlyphIndexArrayNV (GLuint firstPathName, GLe
 GL_APICALL GLenum GL_APIENTRY glPathMemoryGlyphIndexArrayNV (GLuint firstPathName, GLenum fontTarget, GLsizeiptr fontSize, const void *fontData, GLsizei faceIndex, GLuint firstGlyphIndex, GLsizei numGlyphs, GLuint pathParameterTemplate, GLfloat emScale);
 GL_APICALL void GL_APIENTRY glProgramPathFragmentInputGenNV (GLuint program, GLint location, GLenum genMode, GLint components, const GLfloat *coeffs);
 GL_APICALL void GL_APIENTRY glGetProgramResourcefvNV (GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLfloat *params);
+GL_APICALL void GL_APIENTRY glMatrixFrustumEXT (GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar);
+GL_APICALL void GL_APIENTRY glMatrixLoadIdentityEXT (GLenum mode);
+GL_APICALL void GL_APIENTRY glMatrixLoadTransposefEXT (GLenum mode, const GLfloat *m);
+GL_APICALL void GL_APIENTRY glMatrixLoadTransposedEXT (GLenum mode, const GLdouble *m);
+GL_APICALL void GL_APIENTRY glMatrixLoadfEXT (GLenum mode, const GLfloat *m);
+GL_APICALL void GL_APIENTRY glMatrixLoaddEXT (GLenum mode, const GLdouble *m);
+GL_APICALL void GL_APIENTRY glMatrixMultTransposefEXT (GLenum mode, const GLfloat *m);
+GL_APICALL void GL_APIENTRY glMatrixMultTransposedEXT (GLenum mode, const GLdouble *m);
+GL_APICALL void GL_APIENTRY glMatrixMultfEXT (GLenum mode, const GLfloat *m);
+GL_APICALL void GL_APIENTRY glMatrixMultdEXT (GLenum mode, const GLdouble *m);
+GL_APICALL void GL_APIENTRY glMatrixOrthoEXT (GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar);
+GL_APICALL void GL_APIENTRY glMatrixPopEXT (GLenum mode);
+GL_APICALL void GL_APIENTRY glMatrixPushEXT (GLenum mode);
+GL_APICALL void GL_APIENTRY glMatrixRotatefEXT (GLenum mode, GLfloat angle, GLfloat x, GLfloat y, GLfloat z);
+GL_APICALL void GL_APIENTRY glMatrixRotatedEXT (GLenum mode, GLdouble angle, GLdouble x, GLdouble y, GLdouble z);
+GL_APICALL void GL_APIENTRY glMatrixScalefEXT (GLenum mode, GLfloat x, GLfloat y, GLfloat z);
+GL_APICALL void GL_APIENTRY glMatrixScaledEXT (GLenum mode, GLdouble x, GLdouble y, GLdouble z);
+GL_APICALL void GL_APIENTRY glMatrixTranslatefEXT (GLenum mode, GLfloat x, GLfloat y, GLfloat z);
+GL_APICALL void GL_APIENTRY glMatrixTranslatedEXT (GLenum mode, GLdouble x, GLdouble y, GLdouble z);
 #endif
 #endif /* GL_NV_path_rendering */
 
@@ -3230,6 +3362,10 @@ GL_APICALL void GL_APIENTRY glResolveDepthValuesNV (void);
 #define GL_SAMPLER_CUBE_SHADOW_NV         0x8DC5
 #endif /* GL_NV_shadow_samplers_cube */
 
+#ifndef GL_NV_stereo_view_rendering
+#define GL_NV_stereo_view_rendering 1
+#endif /* GL_NV_stereo_view_rendering */
+
 #ifndef GL_NV_texture_border_clamp
 #define GL_NV_texture_border_clamp 1
 #define GL_TEXTURE_BORDER_COLOR_NV        0x1004
@@ -3432,6 +3568,19 @@ GL_APICALL void GL_APIENTRY glFramebufferFetchBarrierQCOM (void);
 #endif
 #endif /* GL_QCOM_shader_framebuffer_fetch_noncoherent */
 
+#ifndef GL_QCOM_texture_foveated
+#define GL_QCOM_texture_foveated 1
+#define GL_TEXTURE_FOVEATED_FEATURE_BITS_QCOM 0x8BFB
+#define GL_TEXTURE_FOVEATED_MIN_PIXEL_DENSITY_QCOM 0x8BFC
+#define GL_TEXTURE_FOVEATED_FEATURE_QUERY_QCOM 0x8BFD
+#define GL_TEXTURE_FOVEATED_NUM_FOCAL_POINTS_QUERY_QCOM 0x8BFE
+#define GL_FRAMEBUFFER_INCOMPLETE_FOVEATION_QCOM 0x8BFF
+typedef void (GL_APIENTRYP PFNGLTEXTUREFOVEATIONPARAMETERSQCOMPROC) (GLuint texture, GLuint layer, GLuint focalPoint, GLfloat focalX, GLfloat focalY, GLfloat gainX, GLfloat gainY, GLfloat foveaArea);
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glTextureFoveationParametersQCOM (GLuint texture, GLuint layer, GLuint focalPoint, GLfloat focalX, GLfloat focalY, GLfloat gainX, GLfloat gainY, GLfloat foveaArea);
+#endif
+#endif /* GL_QCOM_texture_foveated */
+
 #ifndef GL_QCOM_tiled_rendering
 #define GL_QCOM_tiled_rendering 1
 #define GL_COLOR_BUFFER_BIT0_QCOM         0x00000001
diff --git a/include/GLES3/gl3.h b/include/GLES3/gl3.h
index 71e72b403ee..532bbbd3e2e 100644
--- a/include/GLES3/gl3.h
+++ b/include/GLES3/gl3.h
@@ -1,12 +1,12 @@
-#ifndef __gl3_h_
-#define __gl3_h_ 1
+#ifndef __gles2_gl3_h_
+#define __gles2_gl3_h_ 1
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /*
-** Copyright (c) 2013-2017 The Khronos Group Inc.
+** Copyright (c) 2013-2018 The Khronos Group Inc.
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a
 ** copy of this software and/or associated documentation files (the
@@ -44,7 +44,7 @@ extern "C" {
 #define GL_GLES_PROTOTYPES 1
 #endif
 
-/* Generated on date 20170606 */
+/* Generated on date 20180725 */
 
 /* Generated C header for:
  * API: gles2
diff --git a/include/meson.build b/include/meson.build
index b4555eabbfc..081c1bc0008 100644
--- a/include/meson.build
+++ b/include/meson.build
@@ -43,7 +43,7 @@ if with_gles2
   )
 endif
 
-if with_gles1 or with_gles2 or with_egl
+if with_gles1 or with_gles2 or with_opengl or with_egl
   install_headers('KHR/khrplatform.h', subdir : 'KHR')
 endif
 
diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h
index c8d30597230..35ea3559b02 100644
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -204,6 +204,7 @@ CHIPSET(0x67CC, POLARIS10)
 CHIPSET(0x67CF, POLARIS10)
 CHIPSET(0x67D0, POLARIS10)
 CHIPSET(0x67DF, POLARIS10)
+CHIPSET(0x6FDF, POLARIS10)
 
 CHIPSET(0x98E4, STONEY)
 
@@ -243,3 +244,4 @@ CHIPSET(0x66A7, VEGA20)
 CHIPSET(0x66AF, VEGA20)
 
 CHIPSET(0x15DD, RAVEN)
+CHIPSET(0x15D8, RAVEN)
diff --git a/include/vulkan/vulkan_core.h b/include/vulkan/vulkan_core.h
index 06c860707b8..fe450142503 100644
--- a/include/vulkan/vulkan_core.h
+++ b/include/vulkan/vulkan_core.h
@@ -43,7 +43,7 @@ extern "C" {
 #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
 #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
 // Version of this file
-#define VK_HEADER_VERSION 80
+#define VK_HEADER_VERSION 84
 
 
 #define VK_NULL_HANDLE 0
@@ -305,6 +305,8 @@ typedef enum VkStructureType {
     VK_STRUCTURE_TYPE_WIN32_KEYED_MUTEX_ACQUIRE_RELEASE_INFO_NV = 1000058000,
     VK_STRUCTURE_TYPE_VALIDATION_FLAGS_EXT = 1000061000,
     VK_STRUCTURE_TYPE_VI_SURFACE_CREATE_INFO_NN = 1000062000,
+    VK_STRUCTURE_TYPE_IMAGE_VIEW_ASTC_DECODE_MODE_EXT = 1000067000,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ASTC_DECODE_FEATURES_EXT = 1000067001,
     VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR = 1000073000,
     VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR = 1000073001,
     VK_STRUCTURE_TYPE_MEMORY_WIN32_HANDLE_PROPERTIES_KHR = 1000073002,
@@ -380,6 +382,10 @@ typedef enum VkStructureType {
     VK_STRUCTURE_TYPE_EXTERNAL_FORMAT_ANDROID = 1000129005,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT = 1000130000,
     VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT = 1000130001,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT = 1000138000,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT = 1000138001,
+    VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT = 1000138002,
+    VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT = 1000138003,
     VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT = 1000143000,
     VK_STRUCTURE_TYPE_RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT = 1000143001,
     VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT = 1000143002,
@@ -406,6 +412,11 @@ typedef enum VkStructureType {
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD = 1000185000,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT = 1000190000,
     VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT = 1000190001,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT = 1000190002,
+    VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV = 1000206000,
+    VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_NV = 1000206001,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR = 1000211000,
+    VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
     VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES,
@@ -440,6 +451,7 @@ typedef enum VkStructureType {
     VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES,
     VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO,
+    VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT = VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT,
     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_FENCE_INFO_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_FENCE_INFO,
     VK_STRUCTURE_TYPE_EXTERNAL_FENCE_PROPERTIES_KHR = VK_STRUCTURE_TYPE_EXTERNAL_FENCE_PROPERTIES,
     VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
@@ -1118,6 +1130,7 @@ typedef enum VkDescriptorType {
     VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC = 8,
     VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = 9,
     VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = 10,
+    VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT = 1000138000,
     VK_DESCRIPTOR_TYPE_BEGIN_RANGE = VK_DESCRIPTOR_TYPE_SAMPLER,
     VK_DESCRIPTOR_TYPE_END_RANGE = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT,
     VK_DESCRIPTOR_TYPE_RANGE_SIZE = (VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT - VK_DESCRIPTOR_TYPE_SAMPLER + 1),
@@ -4573,7 +4586,6 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSurfaceKHR)
 
 #define VK_KHR_SURFACE_SPEC_VERSION       25
 #define VK_KHR_SURFACE_EXTENSION_NAME     "VK_KHR_surface"
-#define VK_COLORSPACE_SRGB_NONLINEAR_KHR  VK_COLOR_SPACE_SRGB_NONLINEAR_KHR
 
 
 typedef enum VkColorSpaceKHR {
@@ -4592,6 +4604,7 @@ typedef enum VkColorSpaceKHR {
     VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT = 1000104012,
     VK_COLOR_SPACE_PASS_THROUGH_EXT = 1000104013,
     VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT = 1000104014,
+    VK_COLORSPACE_SRGB_NONLINEAR_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
     VK_COLOR_SPACE_BEGIN_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
     VK_COLOR_SPACE_END_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
     VK_COLOR_SPACE_RANGE_SIZE_KHR = (VK_COLOR_SPACE_SRGB_NONLINEAR_KHR - VK_COLOR_SPACE_SRGB_NONLINEAR_KHR + 1),
@@ -5979,13 +5992,24 @@ typedef struct VkPhysicalDevice8BitStorageFeaturesKHR {
 
 
 
+#define VK_KHR_vulkan_memory_model 1
+#define VK_KHR_VULKAN_MEMORY_MODEL_SPEC_VERSION 2
+#define VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME "VK_KHR_vulkan_memory_model"
+
+typedef struct VkPhysicalDeviceVulkanMemoryModelFeaturesKHR {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           vulkanMemoryModel;
+    VkBool32           vulkanMemoryModelDeviceScope;
+} VkPhysicalDeviceVulkanMemoryModelFeaturesKHR;
+
+
+
 #define VK_EXT_debug_report 1
 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugReportCallbackEXT)
 
 #define VK_EXT_DEBUG_REPORT_SPEC_VERSION  9
 #define VK_EXT_DEBUG_REPORT_EXTENSION_NAME "VK_EXT_debug_report"
-#define VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT
-#define VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT
 
 
 typedef enum VkDebugReportObjectTypeEXT {
@@ -6025,6 +6049,8 @@ typedef enum VkDebugReportObjectTypeEXT {
     VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT = 33,
     VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT = 1000156000,
     VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT = 1000085000,
+    VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT,
+    VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT,
     VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT,
     VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_KHR_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT,
     VK_DEBUG_REPORT_OBJECT_TYPE_BEGIN_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT,
@@ -6411,10 +6437,10 @@ typedef enum VkValidationCheckEXT {
 } VkValidationCheckEXT;
 
 typedef struct VkValidationFlagsEXT {
-    VkStructureType          sType;
-    const void*              pNext;
-    uint32_t                 disabledValidationCheckCount;
-    VkValidationCheckEXT*    pDisabledValidationChecks;
+    VkStructureType                sType;
+    const void*                    pNext;
+    uint32_t                       disabledValidationCheckCount;
+    const VkValidationCheckEXT*    pDisabledValidationChecks;
 } VkValidationFlagsEXT;
 
 
@@ -6429,6 +6455,24 @@ typedef struct VkValidationFlagsEXT {
 #define VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME "VK_EXT_shader_subgroup_vote"
 
 
+#define VK_EXT_astc_decode_mode 1
+#define VK_EXT_ASTC_DECODE_MODE_SPEC_VERSION 1
+#define VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME "VK_EXT_astc_decode_mode"
+
+typedef struct VkImageViewASTCDecodeModeEXT {
+    VkStructureType    sType;
+    const void*        pNext;
+    VkFormat           decodeMode;
+} VkImageViewASTCDecodeModeEXT;
+
+typedef struct VkPhysicalDeviceASTCDecodeFeaturesEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           decodeModeSharedExponent;
+} VkPhysicalDeviceASTCDecodeFeaturesEXT;
+
+
+
 #define VK_EXT_conditional_rendering 1
 #define VK_EXT_CONDITIONAL_RENDERING_SPEC_VERSION 1
 #define VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME "VK_EXT_conditional_rendering"
@@ -6744,7 +6788,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkReleaseDisplayEXT(
 #define VK_EXT_display_surface_counter 1
 #define VK_EXT_DISPLAY_SURFACE_COUNTER_SPEC_VERSION 1
 #define VK_EXT_DISPLAY_SURFACE_COUNTER_EXTENSION_NAME "VK_EXT_display_surface_counter"
-#define VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT
 
 
 typedef enum VkSurfaceCounterFlagBitsEXT {
@@ -7298,6 +7341,42 @@ typedef struct VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT {
 #define VK_AMD_SHADER_FRAGMENT_MASK_EXTENSION_NAME "VK_AMD_shader_fragment_mask"
 
 
+#define VK_EXT_inline_uniform_block 1
+#define VK_EXT_INLINE_UNIFORM_BLOCK_SPEC_VERSION 1
+#define VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME "VK_EXT_inline_uniform_block"
+
+typedef struct VkPhysicalDeviceInlineUniformBlockFeaturesEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           inlineUniformBlock;
+    VkBool32           descriptorBindingInlineUniformBlockUpdateAfterBind;
+} VkPhysicalDeviceInlineUniformBlockFeaturesEXT;
+
+typedef struct VkPhysicalDeviceInlineUniformBlockPropertiesEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    uint32_t           maxInlineUniformBlockSize;
+    uint32_t           maxPerStageDescriptorInlineUniformBlocks;
+    uint32_t           maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks;
+    uint32_t           maxDescriptorSetInlineUniformBlocks;
+    uint32_t           maxDescriptorSetUpdateAfterBindInlineUniformBlocks;
+} VkPhysicalDeviceInlineUniformBlockPropertiesEXT;
+
+typedef struct VkWriteDescriptorSetInlineUniformBlockEXT {
+    VkStructureType    sType;
+    const void*        pNext;
+    uint32_t           dataSize;
+    const void*        pData;
+} VkWriteDescriptorSetInlineUniformBlockEXT;
+
+typedef struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT {
+    VkStructureType    sType;
+    const void*        pNext;
+    uint32_t           maxInlineUniformBlockBindings;
+} VkDescriptorPoolInlineUniformBlockCreateInfoEXT;
+
+
+
 #define VK_EXT_shader_stencil_export 1
 #define VK_EXT_SHADER_STENCIL_EXPORT_SPEC_VERSION 1
 #define VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME "VK_EXT_shader_stencil_export"
@@ -7481,7 +7560,6 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkValidationCacheEXT)
 
 #define VK_EXT_VALIDATION_CACHE_SPEC_VERSION 1
 #define VK_EXT_VALIDATION_CACHE_EXTENSION_NAME "VK_EXT_validation_cache"
-#define VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT
 
 
 typedef enum VkValidationCacheHeaderVersionEXT {
@@ -7732,7 +7810,7 @@ typedef struct VkPhysicalDeviceShaderCorePropertiesAMD {
 
 
 #define VK_EXT_vertex_attribute_divisor 1
-#define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_SPEC_VERSION 1
+#define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_SPEC_VERSION 3
 #define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME "VK_EXT_vertex_attribute_divisor"
 
 typedef struct VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT {
@@ -7753,6 +7831,13 @@ typedef struct VkPipelineVertexInputDivisorStateCreateInfoEXT {
     const VkVertexInputBindingDivisorDescriptionEXT*    pVertexBindingDivisors;
 } VkPipelineVertexInputDivisorStateCreateInfoEXT;
 
+typedef struct VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           vertexAttributeInstanceRateDivisor;
+    VkBool32           vertexAttributeInstanceRateZeroDivisor;
+} VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT;
+
 
 
 #define VK_NV_shader_subgroup_partitioned 1
@@ -7760,6 +7845,38 @@ typedef struct VkPipelineVertexInputDivisorStateCreateInfoEXT {
 #define VK_NV_SHADER_SUBGROUP_PARTITIONED_EXTENSION_NAME "VK_NV_shader_subgroup_partitioned"
 
 
+#define VK_NV_device_diagnostic_checkpoints 1
+#define VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_SPEC_VERSION 2
+#define VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME "VK_NV_device_diagnostic_checkpoints"
+
+typedef struct VkQueueFamilyCheckpointPropertiesNV {
+    VkStructureType         sType;
+    void*                   pNext;
+    VkPipelineStageFlags    checkpointExecutionStageMask;
+} VkQueueFamilyCheckpointPropertiesNV;
+
+typedef struct VkCheckpointDataNV {
+    VkStructureType            sType;
+    void*                      pNext;
+    VkPipelineStageFlagBits    stage;
+    void*                      pCheckpointMarker;
+} VkCheckpointDataNV;
+
+
+typedef void (VKAPI_PTR *PFN_vkCmdSetCheckpointNV)(VkCommandBuffer commandBuffer, const void* pCheckpointMarker);
+typedef void (VKAPI_PTR *PFN_vkGetQueueCheckpointDataNV)(VkQueue queue, uint32_t* pCheckpointDataCount, VkCheckpointDataNV* pCheckpointData);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR void VKAPI_CALL vkCmdSetCheckpointNV(
+    VkCommandBuffer                             commandBuffer,
+    const void*                                 pCheckpointMarker);
+
+VKAPI_ATTR void VKAPI_CALL vkGetQueueCheckpointDataNV(
+    VkQueue                                     queue,
+    uint32_t*                                   pCheckpointDataCount,
+    VkCheckpointDataNV*                         pCheckpointData);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/m4/ax_check_compile_flag.m4 b/m4/ax_check_compile_flag.m4
index 51df0c09a7a..0fdca907e8b 100644
--- a/m4/ax_check_compile_flag.m4
+++ b/m4/ax_check_compile_flag.m4
@@ -55,6 +55,11 @@
 #   modified version of the Autoconf Macro, you may extend this special
 #   exception to the GPL to apply to your modified version as well.
 
+# Emil:
+# Toggle Werror since at some point clang started treating unknown -W
+# flags as warnings, succeeding with the build, yet issuing an annoying
+# warning.
+
 #serial 3
 
 AC_DEFUN([AX_CHECK_COMPILE_FLAG],
@@ -62,7 +67,7 @@ AC_DEFUN([AX_CHECK_COMPILE_FLAG],
 AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
 AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
   ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
-  _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
+  _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1 -Werror"
   AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
     [AS_VAR_SET(CACHEVAR,[yes])],
     [AS_VAR_SET(CACHEVAR,[no])])
diff --git a/meson.build b/meson.build
index 86a4a4ce6da..ca5538c80e7 100644
--- a/meson.build
+++ b/meson.build
@@ -297,7 +297,10 @@ endif
 
 _egl = get_option('egl')
 if _egl == 'auto'
-  with_egl = with_dri and with_shared_glapi and with_platforms
+  with_egl = (
+    not ['darwin', 'windows'].contains(host_machine.system()) and
+    with_dri and with_shared_glapi and with_platforms
+  )
 elif _egl == 'true'
   if not with_dri
     error('EGL requires dri')
@@ -307,6 +310,8 @@ elif _egl == 'true'
     error('No platforms specified, consider -Dplatforms=drm,x11 at least')
   elif not ['disabled', 'dri'].contains(with_glx)
     error('EGL requires dri, but a GLX is being built without dri')
+  elif ['darwin', 'windows'].contains(host_machine.system())
+    error('EGL is not available on Windows or MacOS')
   endif
   with_egl = true
 else
@@ -882,8 +887,9 @@ if not cc.links('''#include <stdint.h>
                    int main() {
                      return __sync_add_and_fetch(&v, (uint64_t)1);
                    }''',
+                dependencies : dep_atomic,
                 name : 'GCC 64bit atomics')
-  pre_args += '-DMISSING_64_BIT_ATOMICS'
+  pre_args += '-DMISSING_64BIT_ATOMICS'
 endif
 
 # TODO: shared/static? Is this even worth doing?
@@ -989,7 +995,7 @@ if cc.links('''
       freelocale(loc);
       return 0;
     }''',
-    extra_args : pre_args,
+    args : pre_args,
     name : 'strtod has locale support')
   pre_args += '-DHAVE_STRTOD_L'
 endif
@@ -1056,14 +1062,6 @@ dep_thread = dependency('threads')
 if dep_thread.found() and host_machine.system() != 'windows'
   pre_args += '-DHAVE_PTHREAD'
 endif
-if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or with_gallium_opencl
-  dep_elf = dependency('libelf', required : false)
-  if not dep_elf.found()
-    dep_elf = cc.find_library('elf')
-  endif
-else
-  dep_elf = null_dep
-endif
 dep_expat = dependency('expat')
 # this only exists on linux so either this is linux and it will be found, or
 # its not linux and and wont
@@ -1106,12 +1104,17 @@ endif
 
 # Loop over the enables versions and get the highest libdrm requirement for all
 # active drivers.
+_drm_blame = ''
 foreach d : _libdrm_checks
   ver = get_variable('_drm_@0@_ver'.format(d[0]))
   if d[1] and ver.version_compare('>' + _drm_ver)
     _drm_ver = ver
+    _drm_blame = d[0]
   endif
 endforeach
+if _drm_blame != ''
+  message('libdrm @0@ needed because @1@ has the highest requirement'.format(_drm_ver, _drm_blame))
+endif
 
 # Then get each libdrm module
 foreach d : _libdrm_checks
@@ -1215,6 +1218,16 @@ elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
   error('The following drivers require LLVM: Radv, RadeonSI, SWR. One of these is enabled, but LLVM is disabled.')
 endif
 
+if (with_amd_vk or with_gallium_radeonsi or with_gallium_opencl or
+    (with_gallium_r600 and with_llvm))
+  dep_elf = dependency('libelf', required : false)
+  if not dep_elf.found()
+    dep_elf = cc.find_library('elf')
+  endif
+else
+  dep_elf = null_dep
+endif
+
 dep_glvnd = null_dep
 if with_glvnd
   dep_glvnd = dependency('libglvnd', version : '>= 0.2.0')
diff --git a/src/amd/Android.mk b/src/amd/Android.mk
index 6129e360cbf..e40e7da01bd 100644
--- a/src/amd/Android.mk
+++ b/src/amd/Android.mk
@@ -27,4 +27,6 @@ include $(LOCAL_PATH)/Makefile.sources
 
 include $(LOCAL_PATH)/Android.addrlib.mk
 include $(LOCAL_PATH)/Android.common.mk
+ifneq ($(filter radeonsi,$(BOARD_GPU_DRIVERS)),)
 include $(LOCAL_PATH)/vulkan/Android.mk
+endif
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 54b7e987015..c85d2816ba9 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -515,39 +515,51 @@ ac_build_gather_values(struct ac_llvm_context *ctx,
 	return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
 }
 
-/* Expand a scalar or vector to <4 x type> by filling the remaining channels
- * with undef. Extract at most num_channels components from the input.
+/* Expand a scalar or vector to <dst_channels x type> by filling the remaining
+ * channels with undef. Extract at most src_channels components from the input.
  */
-LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
-				     LLVMValueRef value,
-				     unsigned num_channels)
+LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx,
+			     LLVMValueRef value,
+			     unsigned src_channels,
+			     unsigned dst_channels)
 {
 	LLVMTypeRef elemtype;
-	LLVMValueRef chan[4];
+	LLVMValueRef chan[dst_channels];
 
 	if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
 		unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
-		num_channels = MIN2(num_channels, vec_size);
 
-		if (num_channels >= 4)
+		if (src_channels == dst_channels && vec_size == dst_channels)
 			return value;
 
-		for (unsigned i = 0; i < num_channels; i++)
+		src_channels = MIN2(src_channels, vec_size);
+
+		for (unsigned i = 0; i < src_channels; i++)
 			chan[i] = ac_llvm_extract_elem(ctx, value, i);
 
 		elemtype = LLVMGetElementType(LLVMTypeOf(value));
 	} else {
-		if (num_channels) {
-			assert(num_channels == 1);
+		if (src_channels) {
+			assert(src_channels == 1);
 			chan[0] = value;
 		}
 		elemtype = LLVMTypeOf(value);
 	}
 
-	while (num_channels < 4)
-		chan[num_channels++] = LLVMGetUndef(elemtype);
+	for (unsigned i = src_channels; i < dst_channels; i++)
+		chan[i] = LLVMGetUndef(elemtype);
+
+	return ac_build_gather_values(ctx, chan, dst_channels);
+}
 
-	return ac_build_gather_values(ctx, chan, 4);
+/* Expand a scalar or vector to <4 x type> by filling the remaining channels
+ * with undef. Extract at most num_channels components from the input.
+ */
+LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
+				     LLVMValueRef value,
+				     unsigned num_channels)
+{
+	return ac_build_expand(ctx, value, num_channels, 4);
 }
 
 LLVMValueRef
@@ -555,7 +567,15 @@ ac_build_fdiv(struct ac_llvm_context *ctx,
 	      LLVMValueRef num,
 	      LLVMValueRef den)
 {
-	LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
+	/* If we do (num / den), LLVM >= 7.0 does:
+	 *    return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f));
+	 *
+	 * If we do (num * (1 / den)), LLVM does:
+	 *    return num * v_rcp_f32(den);
+	 */
+	LLVMValueRef one = LLVMTypeOf(num) == ctx->f64 ? ctx->f64_1 : ctx->f32_1;
+	LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
+	LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");
 
 	/* Use v_rcp_f32 instead of precise division. */
 	if (!LLVMIsConstant(ret))
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index c5753037e7b..92d72ae4764 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -161,6 +161,9 @@ LLVMValueRef
 ac_build_gather_values(struct ac_llvm_context *ctx,
 		       LLVMValueRef *values,
 		       unsigned value_count);
+LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx,
+			     LLVMValueRef value,
+			     unsigned src_channels, unsigned dst_channels);
 LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
 				     LLVMValueRef value,
 				     unsigned num_channels);
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 10e1ca99d41..cd3525187a0 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -149,15 +149,13 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
 	char features[256];
 	const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
 	LLVMTargetRef target = ac_get_llvm_target(triple);
-	bool barrier_does_waitcnt = family != CHIP_VEGA20;
 
 	snprintf(features, sizeof(features),
-		 "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s%s",
+		 "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s",
 		 tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
 		 tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
 		 tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
-		 tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "",
-		 barrier_does_waitcnt ? ",+auto-waitcnt-before-barrier" : "");
+		 tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
 	
 	LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
 	                             target,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index cffc980e51f..2cb08be2b3f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1171,7 +1171,8 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
 					  struct ac_image_args *args,
 					  const nir_tex_instr *instr)
 {
-	enum glsl_base_type stype = glsl_get_sampler_result_type(var->type);
+	const struct glsl_type *type = glsl_without_array(var->type);
+	enum glsl_base_type stype = glsl_get_sampler_result_type(type);
 	LLVMValueRef half_texel[2];
 	LLVMValueRef compare_cube_wa = NULL;
 	LLVMValueRef result;
@@ -1356,7 +1357,8 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
 	if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) {
 		nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr);
 		nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr);
-		enum glsl_base_type stype = glsl_get_sampler_result_type(var->type);
+		const struct glsl_type *type = glsl_without_array(var->type);
+		enum glsl_base_type stype = glsl_get_sampler_result_type(type);
 		if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
 			return lower_gather4_integer(&ctx->ac, var, args, instr);
 		}
@@ -1398,7 +1400,7 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
 
 	if (instr->dest.ssa.bit_size == 16) {
 		unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
-		LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16Type(), 2 * load_dwords);
+		LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
 		ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
 		LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
 		res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
@@ -1671,7 +1673,7 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
 			};
 			results[idx] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
 			unsigned num_elems = ac_get_type_size(data_type) / elem_size_bytes;
-			LLVMTypeRef resTy = LLVMVectorType(LLVMIntType(instr->dest.ssa.bit_size), num_elems);
+			LLVMTypeRef resTy = LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), num_elems);
 			results[idx] = LLVMBuildBitCast(ctx->ac.builder, results[idx], resTy, "");
 		}
 	}
@@ -1685,8 +1687,8 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
 		};
 
 		if (num_bytes > 16 && num_components == 3) {
-			/* we end up with a v4f32 and v2f32 but shuffle fails on that */
-			results[1] = ac_build_expand_to_vec4(&ctx->ac, results[1], 2);
+			/* we end up with a v2i64 and i64 but shuffle fails on that */
+			results[1] = ac_build_expand(&ctx->ac, results[1], 1, 2);
 		}
 
 		LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 2f4f0f8884f..94723dc9c09 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -588,8 +588,8 @@ void ac_compute_cmask(const struct radeon_info *info,
 
 	unsigned base_align = num_pipes * pipe_interleave_bytes;
 
-	unsigned width = align(config->info.width, cl_width*8);
-	unsigned height = align(config->info.height, cl_height*8);
+	unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8);
+	unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8);
 	unsigned slice_elements = (width * height) / (8*8);
 
 	/* Each element of CMASK is a nibble. */
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index 3c3bc541b4f..303c036fab4 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -9123,7 +9123,9 @@
 #define    CIK_SDMA_PACKET_SEMAPHORE               0x7
 #define    CIK_SDMA_PACKET_CONSTANT_FILL           0xb
 #define    CIK_SDMA_PACKET_SRBM_WRITE              0xe
-#define    CIK_SDMA_COPY_MAX_SIZE                  0x3fffe0
+/* There is apparently an undocumented HW "feature" that
+   prevents the HW from copying past 256 bytes of (1 << 22) */
+#define    CIK_SDMA_COPY_MAX_SIZE                  0x3fff00
 
 enum amd_cmp_class_flags {
 	S_NAN = 1 << 0,        // Signaling NaN
diff --git a/src/amd/vulkan/Android.mk b/src/amd/vulkan/Android.mk
index cee3744f40b..51b03561fa7 100644
--- a/src/amd/vulkan/Android.mk
+++ b/src/amd/vulkan/Android.mk
@@ -62,6 +62,7 @@ LOCAL_SRC_FILES := \
 	$(VULKAN_FILES)
 
 LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU   # instructs LLVM to declare LLVMInitializeAMDGPU* functions
+LOCAL_CFLAGS += -DVK_USE_PLATFORM_ANDROID_KHR
 
 $(call mesa-build-with-llvm)
 
@@ -140,6 +141,7 @@ LOCAL_SRC_FILES := \
 	$(VULKAN_ANDROID_FILES)
 
 LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU   # instructs LLVM to declare LLVMInitializeAMDGPU* functions
+LOCAL_CFLAGS += -DVK_USE_PLATFORM_ANDROID_KHR
 
 $(call mesa-build-with-llvm)
 
diff --git a/src/amd/vulkan/Makefile.am b/src/amd/vulkan/Makefile.am
index e7ccc58a28e..e28f032cbee 100644
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -124,7 +124,7 @@ VULKAN_LIB_DEPS += \
 endif
 
 if HAVE_PLATFORM_ANDROID
-AM_CPPFLAGS += $(ANDROID_CPPFLAGS)
+AM_CPPFLAGS += $(ANDROID_CPPFLAGS) -DVK_USE_PLATFORM_ANDROID_KHR
 AM_CFLAGS += $(ANDROID_CFLAGS)
 VULKAN_LIB_DEPS += $(ANDROID_LIBS)
 VULKAN_SOURCES += $(VULKAN_ANDROID_FILES)
diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build
index 9f2842182e7..7998ba8cf91 100644
--- a/src/amd/vulkan/meson.build
+++ b/src/amd/vulkan/meson.build
@@ -144,6 +144,7 @@ libvulkan_radeon = shared_library(
     idep_nir,
   ],
   c_args : [c_vis_args, no_override_init_args, radv_flags],
+  cpp_args : [cpp_vis_args, radv_flags],
   link_args : [ld_args_bsymbolic, ld_args_gc_sections],
   install : true,
 )
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 7785ece8ce6..dae64406896 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1657,7 +1657,8 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
 {
 	struct radv_descriptor_state *descriptors_state =
 		radv_get_descriptors_state(cmd_buffer, bind_point);
-	uint32_t size = MAX_SETS * 2 * 4;
+	uint8_t ptr_size = HAVE_32BIT_POINTERS ? 1 : 2;
+	uint32_t size = MAX_SETS * 4 * ptr_size;
 	uint32_t offset;
 	void *ptr;
 	
@@ -1666,13 +1667,14 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
 		return;
 
 	for (unsigned i = 0; i < MAX_SETS; i++) {
-		uint32_t *uptr = ((uint32_t *)ptr) + i * 2;
+		uint32_t *uptr = ((uint32_t *)ptr) + i * ptr_size;
 		uint64_t set_va = 0;
 		struct radv_descriptor_set *set = descriptors_state->sets[i];
 		if (descriptors_state->valid & (1u << i))
 			set_va = set->va;
 		uptr[0] = set_va & 0xffffffff;
-		uptr[1] = set_va >> 32;
+		if (ptr_size == 2)
+			uptr[1] = set_va >> 32;
 	}
 
 	uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
@@ -1714,6 +1716,8 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
 					 VK_PIPELINE_BIND_POINT_GRAPHICS;
 	struct radv_descriptor_state *descriptors_state =
 		radv_get_descriptors_state(cmd_buffer, bind_point);
+	struct radv_cmd_state *state = &cmd_buffer->state;
+	bool flush_indirect_descriptors;
 
 	if (!descriptors_state->dirty)
 		return;
@@ -1721,10 +1725,14 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
 	if (descriptors_state->push_dirty)
 		radv_flush_push_descriptors(cmd_buffer, bind_point);
 
-	if ((cmd_buffer->state.pipeline && cmd_buffer->state.pipeline->need_indirect_descriptor_sets) ||
-	    (cmd_buffer->state.compute_pipeline && cmd_buffer->state.compute_pipeline->need_indirect_descriptor_sets)) {
+	flush_indirect_descriptors =
+		(bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS &&
+		 state->pipeline && state->pipeline->need_indirect_descriptor_sets) ||
+		(bind_point == VK_PIPELINE_BIND_POINT_COMPUTE &&
+		 state->compute_pipeline && state->compute_pipeline->need_indirect_descriptor_sets);
+
+	if (flush_indirect_descriptors)
 		radv_flush_indirect_descriptor_sets(cmd_buffer, bind_point);
-	}
 
 	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
 	                                                   cmd_buffer->cs,
@@ -2307,6 +2315,7 @@ VkResult radv_BeginCommandBuffer(
 	cmd_buffer->state.last_num_instances = -1;
 	cmd_buffer->state.last_vertex_offset = -1;
 	cmd_buffer->state.last_first_instance = -1;
+	cmd_buffer->state.predication_type = -1;
 	cmd_buffer->usage_flags = pBeginInfo->flags;
 
 	/* setup initial configuration into command buffer */
@@ -4126,15 +4135,18 @@ static void radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer,
 
 	if (radv_image_has_dcc(image)) {
 		uint32_t value = 0xffffffffu; /* Fully expanded mode. */
+		bool need_decompress_pass = false;
 
 		if (radv_layout_dcc_compressed(image, dst_layout,
 					       dst_queue_mask)) {
 			value = 0x20202020u;
+			need_decompress_pass = true;
 		}
 
 		radv_initialize_dcc(cmd_buffer, image, value);
 
-		radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image, false);
+		radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image,
+						  need_decompress_pass);
 	}
 
 	if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) {
@@ -4335,6 +4347,8 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
 	struct radeon_cmdbuf *cs = cmd_buffer->cs;
 	uint64_t va = radv_buffer_get_va(event->bo);
 
+	si_emit_cache_flush(cmd_buffer);
+
 	radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
 
 	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18);
@@ -4439,29 +4453,37 @@ void radv_CmdSetDeviceMask(VkCommandBuffer commandBuffer,
 }
 
 /* VK_EXT_conditional_rendering */
-void vkCmdBeginConditionalRenderingEXT(
+void radv_CmdBeginConditionalRenderingEXT(
 	VkCommandBuffer                             commandBuffer,
 	const VkConditionalRenderingBeginInfoEXT*   pConditionalRenderingBegin)
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 	RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer);
-	bool inverted;
+	bool draw_visible = true;
 	uint64_t va;
 
 	va = radv_buffer_get_va(buffer->bo) + pConditionalRenderingBegin->offset;
 
-	inverted = pConditionalRenderingBegin->flags & VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
+	/* By default, if the 32-bit value at offset in buffer memory is zero,
+	 * then the rendering commands are discarded, otherwise they are
+	 * executed as normal. If the inverted flag is set, all commands are
+	 * discarded if the value is non zero.
+	 */
+	if (pConditionalRenderingBegin->flags &
+	    VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT) {
+		draw_visible = false;
+	}
 
 	/* Enable predication for this command buffer. */
-	si_emit_set_predication_state(cmd_buffer, inverted, va);
+	si_emit_set_predication_state(cmd_buffer, draw_visible, va);
 	cmd_buffer->state.predicating = true;
 
 	/* Store conditional rendering user info. */
-	cmd_buffer->state.predication_type = inverted;
+	cmd_buffer->state.predication_type = draw_visible;
 	cmd_buffer->state.predication_va = va;
 }
 
-void vkCmdEndConditionalRenderingEXT(
+void radv_CmdEndConditionalRenderingEXT(
 	VkCommandBuffer                             commandBuffer)
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 33f24b9d302..a72cf261f66 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -45,22 +45,29 @@
 #include "sid.h"
 #include "gfx9d.h"
 #include "addrlib/gfx9/chip/gfx9_enum.h"
+#include "util/build_id.h"
 #include "util/debug.h"
+#include "util/mesa-sha1.h"
 
 static int
 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
 {
-	uint32_t mesa_timestamp, llvm_timestamp;
-	uint16_t f = family;
+	struct mesa_sha1 ctx;
+	unsigned char sha1[20];
+	unsigned ptr_size = sizeof(void*);
+
 	memset(uuid, 0, VK_UUID_SIZE);
-	if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
-	    !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
+	_mesa_sha1_init(&ctx);
+
+	if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
+	    !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
 		return -1;
 
-	memcpy(uuid, &mesa_timestamp, 4);
-	memcpy((char*)uuid + 4, &llvm_timestamp, 4);
-	memcpy((char*)uuid + 8, &f, 2);
-	snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
+	_mesa_sha1_update(&ctx, &family, sizeof(family));
+	_mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
+	_mesa_sha1_final(&ctx, sha1);
+
+	memcpy(uuid, sha1, VK_UUID_SIZE);
 	return 0;
 }
 
@@ -459,7 +466,7 @@ static const struct debug_control radv_perftest_options[] = {
 const char *
 radv_get_perftest_option_name(int id)
 {
-	assert(id < ARRAY_SIZE(radv_debug_options) - 1);
+	assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
 	return radv_perftest_options[id].string;
 }
 
@@ -480,6 +487,9 @@ radv_handle_per_app_options(struct radv_instance *instance,
 			 */
 			instance->perftest_flags |= RADV_PERFTEST_SISCHED;
 		}
+	} else if (!strcmp(name, "DOOM_VFR")) {
+		/* Work around a Doom VFR game bug */
+		instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
 	}
 }
 
@@ -818,6 +828,13 @@ void radv_GetPhysicalDeviceFeatures2(
 			features->inheritedConditionalRendering = false;
 			break;
 		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
+			VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
+				(VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
+			features->vertexAttributeInstanceRateDivisor = VK_TRUE;
+			features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
+			break;
+		}
 		default:
 			break;
 		}
@@ -1030,6 +1047,7 @@ void radv_GetPhysicalDeviceProperties2(
 							VK_SUBGROUP_FEATURE_VOTE_BIT;
 			if (pdevice->rad_info.chip_class >= VI) {
 				properties->supportedOperations |=
+							VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
 							VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
 							VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
 			}
@@ -1892,10 +1910,30 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff
 		device->physical_device->rad_info.family != CHIP_CARRIZO &&
 		device->physical_device->rad_info.family != CHIP_STONEY;
 	unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
-	unsigned max_offchip_buffers = max_offchip_buffers_per_se *
-		device->physical_device->rad_info.max_se;
+	unsigned max_offchip_buffers;
 	unsigned offchip_granularity;
 	unsigned hs_offchip_param;
+
+	/*
+	 * Per RadeonSI:
+	 * This must be one less than the maximum number due to a hw limitation.
+         * Various hardware bugs in SI, CIK, and GFX9 need this.
+	 *
+	 * Per AMDVLK:
+	 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
+	 * Gfx7 should limit max_offchip_buffers to 508
+	 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
+	 *
+	 * Follow AMDVLK here.
+	 */
+	if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
+	    device->physical_device->rad_info.chip_class == CIK ||
+	    device->physical_device->rad_info.chip_class == SI)
+		--max_offchip_buffers_per_se;
+
+	max_offchip_buffers = max_offchip_buffers_per_se *
+		device->physical_device->rad_info.max_se;
+
 	switch (device->tess_offchip_block_dw_size) {
 	default:
 		assert(0);
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index 15d29becfd4..028d10f5fae 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -105,7 +105,7 @@ def __init__(self, name, ext_version, enable):
     Extension('VK_EXT_sampler_filter_minmax',             1, 'device->rad_info.chip_class >= CIK'),
     Extension('VK_EXT_shader_viewport_index_layer',       1, True),
     Extension('VK_EXT_shader_stencil_export',             1, True),
-    Extension('VK_EXT_vertex_attribute_divisor',          1, True),
+    Extension('VK_EXT_vertex_attribute_divisor',          3, True),
     Extension('VK_AMD_draw_indirect_count',               1, True),
     Extension('VK_AMD_gcn_shader',                        1, True),
     Extension('VK_AMD_rasterization_order',               1, 'device->has_out_of_order_rast'),
diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c
index f23ebfb2ad7..6253c27b95d 100644
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -612,7 +612,8 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
 	}
 
 	if (desc->layout == VK_FORMAT_LAYOUT_ETC &&
-	    physical_device->rad_info.chip_class < GFX9 &&
+	    physical_device->rad_info.family != CHIP_VEGA10 &&
+	    physical_device->rad_info.family != CHIP_RAVEN &&
 	    physical_device->rad_info.family != CHIP_STONEY) {
 		out_properties->linearTilingFeatures = linear;
 		out_properties->optimalTilingFeatures = tiled;
@@ -1111,6 +1112,25 @@ static VkResult radv_get_image_format_properties(struct radv_physical_device *ph
 		}
 	}
 
+	if (info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
+		if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT)) {
+			goto unsupported;
+		}
+	}
+
+	if (info->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
+		if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) {
+			goto unsupported;
+		}
+	}
+
+	if (info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
+		if (!(format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
+		                              VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT))) {
+			goto unsupported;
+		}
+	}
+
 	*pImageFormatProperties = (VkImageFormatProperties) {
 		.maxExtent = maxExtent,
 		.maxMipLevels = maxMipLevels,
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index f1c78e8115d..b316242dc5a 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -801,8 +801,8 @@ radv_image_get_cmask_info(struct radv_device *device,
 
 	unsigned base_align = num_pipes * pipe_interleave_bytes;
 
-	unsigned width = align(image->info.width, cl_width*8);
-	unsigned height = align(image->info.height, cl_height*8);
+	unsigned width = align(image->surface.u.legacy.level[0].nblk_x, cl_width*8);
+	unsigned height = align(image->surface.u.legacy.level[0].nblk_y, cl_height*8);
 	unsigned slice_elements = (width * height) / (8*8);
 
 	/* Each element of CMASK is a nibble. */
diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
index b42a6783fd2..74868d5a2bb 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -603,7 +603,7 @@ radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
                pipeline = cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
 	}
 
-	if (radv_image_has_dcc(image)) {
+	if (!decompress_dcc && radv_image_has_dcc(image)) {
 		old_predicating = cmd_buffer->state.predicating;
 
 		radv_emit_set_predication_state_from_image(cmd_buffer, image, true);
@@ -671,7 +671,7 @@ radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
 					&cmd_buffer->pool->alloc);
 
 	}
-	if (radv_image_has_dcc(image)) {
+	if (!decompress_dcc && radv_image_has_dcc(image)) {
 		cmd_buffer->state.predicating = old_predicating;
 
 		radv_emit_set_predication_state_from_image(cmd_buffer, image, false);
diff --git a/src/amd/vulkan/radv_meta_resolve.c b/src/amd/vulkan/radv_meta_resolve.c
index b049237ba65..2c8ba5306c0 100644
--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -358,7 +358,8 @@ static void radv_pick_resolve_method_images(struct radv_image *src_image,
 		*method = RESOLVE_COMPUTE;
 	else if (vk_format_is_int(src_image->vk_format))
 		*method = RESOLVE_COMPUTE;
-	else if (src_image->info.array_size > 1)
+	else if (src_image->info.array_size > 1 ||
+		 dest_image->info.array_size > 1)
 		*method = RESOLVE_COMPUTE;
 	
 	if (radv_layout_dcc_compressed(dest_image, dest_image_layout, queue_mask)) {
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 64b6522cd93..ac3d80618c2 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -422,7 +422,7 @@ get_tcs_out_current_patch_data_offset(struct radv_shader_context *ctx)
 			    "");
 }
 
-#define MAX_ARGS 23
+#define MAX_ARGS 64
 struct arg_info {
 	LLVMTypeRef types[MAX_ARGS];
 	LLVMValueRef *assign[MAX_ARGS];
@@ -545,13 +545,12 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
 
 
 static void
-set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs,
-	uint32_t indirect_offset)
+set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx,
+	uint8_t num_sgprs, bool indirect)
 {
 	ud_info->sgpr_idx = *sgpr_idx;
 	ud_info->num_sgprs = num_sgprs;
-	ud_info->indirect = indirect_offset > 0;
-	ud_info->indirect_offset = indirect_offset;
+	ud_info->indirect = indirect;
 	*sgpr_idx += num_sgprs;
 }
 
@@ -563,7 +562,7 @@ set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx,
 		&ctx->shader_info->user_sgprs_locs.shader_data[idx];
 	assert(ud_info);
 
-	set_loc(ud_info, sgpr_idx, num_sgprs, 0);
+	set_loc(ud_info, sgpr_idx, num_sgprs, false);
 }
 
 static void
@@ -577,15 +576,16 @@ set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
 
 static void
 set_loc_desc(struct radv_shader_context *ctx, int idx,  uint8_t *sgpr_idx,
-	     uint32_t indirect_offset)
+	     bool indirect)
 {
 	struct radv_userdata_locations *locs =
 		&ctx->shader_info->user_sgprs_locs;
 	struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
 	assert(ud_info);
 
-	set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect_offset);
-	if (indirect_offset == 0)
+	set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect);
+
+	if (!indirect)
 		locs->descriptor_sets_enabled |= 1 << idx;
 }
 
@@ -695,7 +695,7 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx,
 	if (ctx->shader_info->info.loads_push_constants)
 		user_sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
 
-	uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16;
+	uint32_t available_sgprs = ctx->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
 	uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
 	uint32_t num_desc_set =
 		util_bitcount(ctx->shader_info->info.desc_set_used_mask);
@@ -806,7 +806,7 @@ set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage,
 		for (unsigned i = 0; i < num_sets; ++i) {
 			if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) &&
 			    ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
-				set_loc_desc(ctx, i, user_sgpr_idx, 0);
+				set_loc_desc(ctx, i, user_sgpr_idx, false);
 			} else
 				ctx->descriptor_sets[i] = NULL;
 		}
@@ -817,7 +817,6 @@ set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage,
 		for (unsigned i = 0; i < num_sets; ++i) {
 			if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) &&
 			    ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
-				set_loc_desc(ctx, i, user_sgpr_idx, i * 8);
 				ctx->descriptor_sets[i] =
 					ac_build_load_to_sgpr(&ctx->ac,
 							      desc_sets,
@@ -1991,8 +1990,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
 			uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index];
 
 			if (divisor) {
-				buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id,
-				                            ctx->abi.start_instance, "");
+				buffer_index = ctx->abi.instance_id;
 
 				if (divisor != 1) {
 					buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
@@ -2009,6 +2007,8 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
 			} else {
 				buffer_index = ctx->ac.i32_0;
 			}
+
+			buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.start_instance, buffer_index, "");
 		} else
 			buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
 			                            ctx->abi.base_vertex, "");
@@ -2105,9 +2105,10 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
 	int idx = variable->data.location;
 	unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
 	LLVMValueRef interp = NULL;
+	uint64_t mask;
 
 	variable->data.driver_location = idx * 4;
-	ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
+	mask = ((1ull << attrib_count) - 1) << variable->data.location;
 
 	if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
 		unsigned interp_type;
@@ -2128,6 +2129,15 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
 	for (unsigned i = 0; i < attrib_count; ++i)
 		ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
 
+	if (idx == VARYING_SLOT_CLIP_DIST0) {
+		/* Do not account for the number of components inside the array
+		 * of clip/cull distances because this might wrongly set other
+		 * bits like primitive ID or layer.
+		 */
+		mask = 1ull << VARYING_SLOT_CLIP_DIST0;
+	}
+
+	ctx->input_mask |= mask;
 }
 
 static void
@@ -2194,6 +2204,17 @@ handle_fs_inputs(struct radv_shader_context *ctx,
 			if (LLVMIsUndef(interp_param))
 				ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
 			++index;
+		} else if (i == VARYING_SLOT_CLIP_DIST0) {
+			int length = ctx->shader_info->info.ps.num_input_clips_culls;
+
+			for (unsigned j = 0; j < length; j += 4) {
+				inputs = ctx->inputs + ac_llvm_reg_index_soa(i, j);
+
+				interp_param = *inputs;
+				interp_fs_input(ctx, index, interp_param,
+						ctx->abi.prim_mask, inputs);
+				++index;
+			}
 		} else if (i == VARYING_SLOT_POS) {
 			for(int i = 0; i < 3; ++i)
 				inputs[i] = ctx->abi.frag_pos[i];
@@ -2489,6 +2510,13 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
 		memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
 		       &args, sizeof(args));
 
+		/* Export the clip/cull distances values to the next stage. */
+		radv_export_param(ctx, param_count, &slots[0], 0xf);
+		outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = param_count++;
+		if (ctx->num_output_clips + ctx->num_output_culls > 4) {
+			radv_export_param(ctx, param_count, &slots[4], 0xf);
+			outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++;
+		}
 	}
 
 	LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1};
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index eb58e8a1c0a..6a51efa2980 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2514,6 +2514,7 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *cs,
 	switch (pipeline->device->physical_device->rad_info.family) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 		context_states_per_bin = 1;
 		persistent_states_per_bin = 1;
 		fpovs_per_batch = 63;
@@ -3027,6 +3028,23 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs,
 		ps_offset++;
 	}
 
+	if (ps->info.info.ps.num_input_clips_culls) {
+		unsigned vs_offset;
+
+		vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
+		if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+			++ps_offset;
+		}
+
+		vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
+		if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
+		    ps->info.info.ps.num_input_clips_culls > 4) {
+			ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+			++ps_offset;
+		}
+	}
+
 	for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
 		unsigned vs_offset;
 		bool flat_shade;
@@ -3319,6 +3337,17 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
 		}
 	}
 
+	/* Workaround for a VGT hang when strip primitive types are used with
+	 * primitive restart.
+	 */
+	if (pipeline->graphics.prim_restart_enable &&
+	    (prim == V_008958_DI_PT_LINESTRIP ||
+	     prim == V_008958_DI_PT_TRISTRIP ||
+	     prim == V_008958_DI_PT_LINESTRIP_ADJ ||
+	     prim == V_008958_DI_PT_TRISTRIP_ADJ)) {
+		ia_multi_vgt_param.partial_vs_wave = true;
+	}
+
 	ia_multi_vgt_param.base =
 		S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
 		/* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index e3229ab59bb..427e677cc5f 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -648,12 +648,19 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
 {
 	struct radv_device *device = cmd_buffer->device;
 	struct radv_meta_saved_state saved_state;
+	bool old_predicating;
 
 	radv_meta_save(&saved_state, cmd_buffer,
 		       RADV_META_SAVE_COMPUTE_PIPELINE |
 		       RADV_META_SAVE_CONSTANTS |
 		       RADV_META_SAVE_DESCRIPTORS);
 
+	/* VK_EXT_conditional_rendering says that copy commands should not be
+	 * affected by conditional rendering.
+	 */
+	old_predicating = cmd_buffer->state.predicating;
+	cmd_buffer->state.predicating = false;
+
 	struct radv_buffer dst_buffer = {
 		.bo = dst_bo,
 		.offset = dst_offset,
@@ -736,6 +743,9 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
 	                                RADV_CMD_FLAG_INV_VMEM_L1 |
 	                                RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
 
+	/* Restore conditional rendering. */
+	cmd_buffer->state.predicating = old_predicating;
+
 	radv_meta_restore(&saved_state, cmd_buffer);
 }
 
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 7d4265cfdad..4093d36c4de 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -259,9 +259,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
 		 */
 		NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_shader_out);
 
-		NIR_PASS_V(nir, nir_remove_dead_variables,
-		           nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
-
 		/* Now that we've deleted all but the main function, we can go ahead and
 		 * lower the rest of the constant initializers.
 		 */
@@ -273,6 +270,9 @@ radv_shader_compile_to_nir(struct radv_device *device,
 		NIR_PASS_V(nir, nir_split_var_copies);
 		NIR_PASS_V(nir, nir_split_per_member_structs);
 
+		NIR_PASS_V(nir, nir_remove_dead_variables,
+		           nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
+
 		NIR_PASS_V(nir, nir_lower_system_values);
 		NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
 	}
@@ -409,6 +409,7 @@ radv_fill_shader_variant(struct radv_device *device,
 
 	variant->code_size = radv_get_shader_binary_size(binary);
 	variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
+			 S_00B12C_USER_SGPR_MSB(variant->info.num_user_sgprs >> 5) |
 			 S_00B12C_SCRATCH_EN(scratch_enabled);
 
 	variant->rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) |
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 03760b689c3..c490b69f52b 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -174,6 +174,7 @@ struct radv_shader_info {
 		bool has_pcoord;
 		bool prim_id_input;
 		bool layer_input;
+		uint8_t num_input_clips_culls;
 	} ps;
 	struct {
 		bool uses_grid_size;
@@ -191,7 +192,6 @@ struct radv_userdata_info {
 	int8_t sgpr_idx;
 	uint8_t num_sgprs;
 	bool indirect;
-	uint32_t indirect_offset;
 };
 
 struct radv_userdata_locations {
diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c
index 8026cca46c8..a45c847c46c 100644
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -341,6 +341,7 @@ static void
 gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
 			  struct radv_shader_info *info)
 {
+	unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
 	const struct glsl_type *type = glsl_without_array(var->type);
 	int idx = var->data.location;
 
@@ -354,6 +355,9 @@ gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
 	case VARYING_SLOT_LAYER:
 		info->ps.layer_input = true;
 		break;
+	case VARYING_SLOT_CLIP_DIST0:
+		info->ps.num_input_clips_culls = attrib_count;
+		break;
 	default:
 		break;
 	}
diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index 20484177135..6479bea070b 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -206,23 +206,38 @@ VkResult radv_GetSwapchainImagesKHR(
 }
 
 VkResult radv_AcquireNextImageKHR(
-	VkDevice                                     _device,
+	VkDevice                                     device,
 	VkSwapchainKHR                               swapchain,
 	uint64_t                                     timeout,
 	VkSemaphore                                  semaphore,
-	VkFence                                      _fence,
+	VkFence                                      fence,
+	uint32_t*                                    pImageIndex)
+{
+	VkAcquireNextImageInfoKHR acquire_info = {
+		.sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR,
+		.swapchain = swapchain,
+		.timeout = timeout,
+		.semaphore = semaphore,
+		.fence = fence,
+		.deviceMask = 0,
+	};
+
+	return radv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex);
+}
+
+VkResult radv_AcquireNextImage2KHR(
+	VkDevice                                     _device,
+	const VkAcquireNextImageInfoKHR*             pAcquireInfo,
 	uint32_t*                                    pImageIndex)
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
 	struct radv_physical_device *pdevice = device->physical_device;
-	RADV_FROM_HANDLE(radv_fence, fence, _fence);
-
-	VkResult result = wsi_common_acquire_next_image(&pdevice->wsi_device,
-							_device,
-							swapchain,
-							timeout,
-							semaphore,
-							pImageIndex);
+	RADV_FROM_HANDLE(radv_fence, fence, pAcquireInfo->fence);
+
+	VkResult result = wsi_common_acquire_next_image2(&pdevice->wsi_device,
+							 _device,
+                                                         pAcquireInfo,
+							 pImageIndex);
 
 	if (fence && (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR)) {
 		fence->submitted = true;
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 2337036c67a..63e07e457c1 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -342,6 +342,7 @@ si_emit_config(struct radv_physical_device *physical_device,
 		switch (physical_device->rad_info.family) {
 		case CHIP_VEGA10:
 		case CHIP_VEGA12:
+		case CHIP_VEGA20:
 			pc_lines = 4096;
 			break;
 		case CHIP_RAVEN:
@@ -531,16 +532,16 @@ si_write_scissors(struct radeon_cmdbuf *cs, int first,
 		VkRect2D scissor = si_intersect_scissor(&scissors[i], &viewport_scissor);
 
 		get_viewport_xform(viewports + i, scale, translate);
-		scale[0] = abs(scale[0]);
-		scale[1] = abs(scale[1]);
+		scale[0] = fabsf(scale[0]);
+		scale[1] = fabsf(scale[1]);
 
 		if (scale[0] < 0.5)
 			scale[0] = 0.5;
 		if (scale[1] < 0.5)
 			scale[1] = 0.5;
 
-		guardband_x = MIN2(guardband_x, (max_range - abs(translate[0])) / scale[0]);
-		guardband_y = MIN2(guardband_y, (max_range - abs(translate[1])) / scale[1]);
+		guardband_x = MIN2(guardband_x, (max_range - fabsf(translate[0])) / scale[0]);
+		guardband_y = MIN2(guardband_y, (max_range - fabsf(translate[1])) / scale[1]);
 
 		radeon_emit(cs, S_028250_TL_X(scissor.offset.x) |
 			    S_028250_TL_Y(scissor.offset.y) |
@@ -698,7 +699,7 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
 		 * counters) must immediately precede every timestamp event to
 		 * prevent a GPU hang on GFX9.
 		 */
-		if (chip_class == GFX9) {
+		if (chip_class == GFX9 && !is_mec) {
 			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 			radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 			radeon_emit(cs, gfx9_eop_bug_va);
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index c94c0f339fd..149c2562187 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -673,7 +673,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
 			if (!cs->num_buffers)
 				continue;
 
-			if (unique_bo_count == 0) {
+			if (unique_bo_count == 0 && !cs->num_virtual_buffers) {
 				memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle));
 				unique_bo_count = cs->num_buffers;
 				continue;
diff --git a/src/broadcom/Android.cle.mk b/src/broadcom/Android.cle.mk
index 9b728424fa1..5634a8d4ad3 100644
--- a/src/broadcom/Android.cle.mk
+++ b/src/broadcom/Android.cle.mk
@@ -29,6 +29,10 @@ LOCAL_SRC_FILES := $(BROADCOM_DECODER_FILES)
 
 LOCAL_STATIC_LIBRARIES := libmesa_broadcom_genxml
 
+LOCAL_C_INCLUDES += $(MESA_TOP)/src/gallium/include
+
+LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)
+
 LOCAL_SHARED_LIBRARIES := libexpat libz
 
 include $(MESA_COMMON_MK)
diff --git a/src/broadcom/Android.genxml.mk b/src/broadcom/Android.genxml.mk
index eb5d142fe09..91e0de05d98 100644
--- a/src/broadcom/Android.genxml.mk
+++ b/src/broadcom/Android.genxml.mk
@@ -39,7 +39,7 @@ $(intermediates)/dummy.c:
 # This is the list of auto-generated files headers
 LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/broadcom/, $(BROADCOM_GENXML_GENERATED_FILES))
 
-define header-gen
+define pack-header-gen
 	@mkdir -p $(dir $@)
 	@echo "Gen Header: $(PRIVATE_MODULE) <= $(notdir $(@))"
 	$(hide) $(PRIVATE_SCRIPT) $(PRIVATE_SCRIPT_FLAGS) $(PRIVATE_XML) $(PRIVATE_VER) > $@
@@ -49,25 +49,25 @@ $(intermediates)/broadcom/cle/v3d_packet_v21_pack.h: PRIVATE_SCRIPT := $(MESA_PY
 $(intermediates)/broadcom/cle/v3d_packet_v21_pack.h: PRIVATE_XML := $(LOCAL_PATH)/cle/v3d_packet_v21.xml
 $(intermediates)/broadcom/cle/v3d_packet_v21_pack.h: PRIVATE_VER := 21
 $(intermediates)/broadcom/cle/v3d_packet_v21_pack.h: $(LOCAL_PATH)/cle/v3d_packet_v21.xml $(LOCAL_PATH)/cle/gen_pack_header.py
-	$(call header-gen)
+	$(call pack-header-gen)
 
 $(intermediates)/broadcom/cle/v3d_packet_v33_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/cle/gen_pack_header.py
 $(intermediates)/broadcom/cle/v3d_packet_v33_pack.h: PRIVATE_XML := $(LOCAL_PATH)/cle/v3d_packet_v33.xml
 $(intermediates)/broadcom/cle/v3d_packet_v33_pack.h: PRIVATE_VER := 33
 $(intermediates)/broadcom/cle/v3d_packet_v33_pack.h: $(LOCAL_PATH)/cle/v3d_packet_v33.xml $(LOCAL_PATH)/cle/gen_pack_header.py
-	$(call header-gen)
+	$(call pack-header-gen)
 
 $(intermediates)/broadcom/cle/v3d_packet_v41_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/cle/gen_pack_header.py
 $(intermediates)/broadcom/cle/v3d_packet_v41_pack.h: PRIVATE_XML := $(LOCAL_PATH)/cle/v3d_packet_v33.xml
 $(intermediates)/broadcom/cle/v3d_packet_v41_pack.h: PRIVATE_VER := 41
 $(intermediates)/broadcom/cle/v3d_packet_v41_pack.h: $(LOCAL_PATH)/cle/v3d_packet_v33.xml $(LOCAL_PATH)/cle/gen_pack_header.py
-	$(call header-gen)
+	$(call pack-header-gen)
 
 $(intermediates)/broadcom/cle/v3d_packet_v42_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/cle/gen_pack_header.py
 $(intermediates)/broadcom/cle/v3d_packet_v42_pack.h: PRIVATE_XML := $(LOCAL_PATH)/cle/v3d_packet_v33.xml
 $(intermediates)/broadcom/cle/v3d_packet_v42_pack.h: PRIVATE_VER := 42
 $(intermediates)/broadcom/cle/v3d_packet_v42_pack.h: $(LOCAL_PATH)/cle/v3d_packet_v33.xml $(LOCAL_PATH)/cle/gen_pack_header.py
-	$(call header-gen)
+	$(call pack-header-gen)
 
 $(intermediates)/broadcom/cle/v3d_xml.h: $(addprefix $(MESA_TOP)/src/broadcom/,$(BROADCOM_GENXML_XML_FILES)) $(MESA_TOP)/src/intel/genxml/gen_zipped_file.py
 	@mkdir -p $(dir $@)
diff --git a/src/broadcom/cle/v3d_packet_v33.xml b/src/broadcom/cle/v3d_packet_v33.xml
index 6ce8299e26b..f471d542c56 100644
--- a/src/broadcom/cle/v3d_packet_v33.xml
+++ b/src/broadcom/cle/v3d_packet_v33.xml
@@ -528,6 +528,16 @@
     <field name="number of attribute arrays" size="5" start="0" type="uint"/>
   </packet>
 
+  <packet code="71" name="VCM Cache Size" min_ver="41">
+    <field name="Number of 16-vertex batches for rendering" size="4" start="4" type="uint"/>
+    <field name="Number of 16-vertex batches for binning" size="4" start="0" type="uint"/>
+  </packet>
+
+  <packet code="73" name="VCM Cache Size" max_ver="33">
+    <field name="Number of 16-vertex batches for rendering" size="4" start="4" type="uint"/>
+    <field name="Number of 16-vertex batches for binning" size="4" start="0" type="uint"/>
+  </packet>
+
   <packet code="73" name="Transform Feedback Buffer" min_ver="41">
     <field name="Buffer Address" size="32" start="32" type="address"/>
     <field name="Buffer Size in 32-bit words" size="30" start="2" type="uint"/>
diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h
index 5685c7a2161..b0a2a02154c 100644
--- a/src/broadcom/common/v3d_device_info.h
+++ b/src/broadcom/common/v3d_device_info.h
@@ -27,13 +27,14 @@
 #include <stdint.h>
 
 /**
- * Struct for tracking features of the V3D chip. This is where we'll store
- * boolean flags for features in a specific version, but for now it's just the
- * version
+ * Struct for tracking features of the V3D chip across driver and compiler.
  */
 struct v3d_device_info {
         /** Simple V3D version: major * 10 + minor */
         uint8_t ver;
+
+        /** Size of the VPM, in bytes. */
+        int vpm_size;
 };
 
 #endif
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index fb5ecd6410c..4f3b621fd29 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -462,6 +462,7 @@ struct choose_scoreboard {
         int last_magic_sfu_write_tick;
         int last_ldvary_tick;
         int last_uniforms_reset_tick;
+        int last_thrsw_tick;
         bool tlb_locked;
 };
 
@@ -1095,10 +1096,16 @@ qpu_instruction_valid_in_thrend_slot(struct v3d_compile *c,
 }
 
 static bool
-valid_thrsw_sequence(struct v3d_compile *c,
+valid_thrsw_sequence(struct v3d_compile *c, struct choose_scoreboard *scoreboard,
                      struct qinst *qinst, int instructions_in_sequence,
                      bool is_thrend)
 {
+        /* No emitting our thrsw while the previous thrsw hasn't happened yet. */
+        if (scoreboard->last_thrsw_tick + 3 >
+            scoreboard->tick - instructions_in_sequence) {
+                return false;
+        }
+
         for (int slot = 0; slot < instructions_in_sequence; slot++) {
                 /* No scheduling SFU when the result would land in the other
                  * thread.  The simulator complains for safety, though it
@@ -1159,7 +1166,8 @@ emit_thrsw(struct v3d_compile *c,
                 if (!v3d_qpu_sig_pack(c->devinfo, &sig, &packed_sig))
                         break;
 
-                if (!valid_thrsw_sequence(c, prev_inst, slots_filled + 1,
+                if (!valid_thrsw_sequence(c, scoreboard,
+                                          prev_inst, slots_filled + 1,
                                           is_thrend)) {
                         break;
                 }
@@ -1173,7 +1181,9 @@ emit_thrsw(struct v3d_compile *c,
         if (merge_inst) {
                 merge_inst->qpu.sig.thrsw = true;
                 needs_free = true;
+                scoreboard->last_thrsw_tick = scoreboard->tick - slots_filled;
         } else {
+                scoreboard->last_thrsw_tick = scoreboard->tick;
                 insert_scheduled_instruction(c, block, scoreboard, inst);
                 time++;
                 slots_filled++;
@@ -1475,6 +1485,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c)
         scoreboard.last_ldvary_tick = -10;
         scoreboard.last_magic_sfu_write_tick = -10;
         scoreboard.last_uniforms_reset_tick = -10;
+        scoreboard.last_thrsw_tick = -10;
 
         if (debug) {
                 fprintf(stderr, "Pre-schedule instructions\n");
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 33a9942734d..070e6a3aa59 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -648,6 +648,9 @@ struct v3d_vs_prog_data {
 
         /* Total number of components written, for the shader state record. */
         uint32_t vpm_output_size;
+
+        /* Value to be programmed in VCM_CACHE_SIZE. */
+        uint8_t vcm_cache_size;
 };
 
 struct v3d_fs_prog_data {
@@ -928,7 +931,7 @@ VIR_A_ALU2(OR)
 VIR_A_ALU2(XOR)
 VIR_A_ALU2(VADD)
 VIR_A_ALU2(VSUB)
-VIR_A_ALU2(STVPMV)
+VIR_A_NODST_2(STVPMV)
 VIR_A_ALU1(NOT)
 VIR_A_ALU1(NEG)
 VIR_A_ALU1(FLAPUSH)
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 86379faa5bb..6b55b0e03bc 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -452,6 +452,16 @@ vir_emit_def(struct v3d_compile *c, struct qinst *inst)
 {
         assert(inst->dst.file == QFILE_NULL);
 
+        /* If we're emitting an instruction that's a def, it had better be
+         * writing a register.
+         */
+        if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
+                assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP ||
+                       v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op));
+                assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP ||
+                       v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op));
+        }
+
         inst->dst = vir_get_temp(c);
 
         if (inst->dst.file == QFILE_TEMP)
@@ -746,10 +756,29 @@ uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
         if (prog_data->uses_iid)
                 prog_data->vpm_input_size++;
 
-        /* Input/output segment size are in 8x32-bit multiples. */
+        /* Input/output segment size are in sectors (8 rows of 32 bits per
+         * channel).
+         */
         prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
         prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
 
+        /* Compute VCM cache size.  We set up our program to take up less than
+         * half of the VPM, so that any set of bin and render programs won't
+         * run out of space.  We need space for at least one input segment,
+         * and then allocate the rest to output segments (one for the current
+         * program, the rest to VCM).  The valid range of the VCM cache size
+         * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4
+         * batches.
+         */
+        assert(c->devinfo->vpm_size);
+        int sector_size = 16 * sizeof(uint32_t) * 8;
+        int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size;
+        int half_vpm = vpm_size_in_sectors / 2;
+        int vpm_output_sectors = half_vpm - prog_data->vpm_input_size;
+        int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size;
+        assert(vpm_output_batches >= 2);
+        prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4);
+
         return v3d_return_qpu_insts(c, final_assembly_size);
 }
 
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
index 5a856acd7ed..61d273575d9 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -94,6 +94,15 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
                                 }
                         }
 
+                        /* Refuse to spill a ldvary's dst, because that means
+                         * that ldvary's r5 would end up being used across a
+                         * thrsw.
+                         */
+                        if (inst->qpu.sig.ldvary) {
+                                assert(inst->dst.file == QFILE_TEMP);
+                                BITSET_CLEAR(c->spillable, inst->dst.index);
+                        }
+
                         if (inst->is_last_thrsw)
                                 started_last_seg = true;
 
@@ -102,7 +111,7 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
                                 started_last_seg = true;
 
                         /* Track when we're in between a TMU setup and the
-                         * final LDTMU from that TMU setup.  We can't
+                         * final LDTMU or TMUWT from that TMU setup.  We can't
                          * spill/fill any temps during that time, because that
                          * involves inserting a new TMU setup/LDTMU sequence.
                          */
@@ -110,6 +119,10 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
                             is_last_ldtmu(inst, block))
                                 in_tmu_operation = false;
 
+                        if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
+                            inst->qpu.alu.add.op == V3D_QPU_A_TMUWT)
+                                in_tmu_operation = false;
+
                         if (v3d_qpu_writes_tmu(&inst->qpu))
                                 in_tmu_operation = true;
                 }
@@ -206,6 +219,7 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
                                      inst->dst);
                         v3d_emit_spill_tmua(c, spill_offset);
                         vir_emit_thrsw(c);
+                        vir_TMUWT(c);
                         c->spills++;
                 }
 
diff --git a/src/compiler/Android.glsl.mk b/src/compiler/Android.glsl.mk
index 0aabafa2673..37b3cb80251 100644
--- a/src/compiler/Android.glsl.mk
+++ b/src/compiler/Android.glsl.mk
@@ -48,7 +48,7 @@ LOCAL_STATIC_LIBRARIES := \
 	libmesa_nir
 
 LOCAL_MODULE := libmesa_glsl
-
+LOCAL_CFLAGS += -Wno-error
 include $(LOCAL_PATH)/Android.glsl.gen.mk
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
diff --git a/src/compiler/Android.nir.mk b/src/compiler/Android.nir.mk
index 75a247a245d..59da5dbdc1c 100644
--- a/src/compiler/Android.nir.mk
+++ b/src/compiler/Android.nir.mk
@@ -41,6 +41,9 @@ LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/src/gallium/include \
 	$(MESA_TOP)/src/gallium/auxiliary
 
+LOCAL_CFLAGS := \
+        -Wno-missing-braces
+
 LOCAL_STATIC_LIBRARIES := libmesa_compiler
 
 LOCAL_MODULE := libmesa_nir
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 74160ec142b..d3f7a0fbdd6 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -1683,6 +1683,12 @@ ast_expression::do_hir(exec_list *instructions,
       op[1] = this->subexpressions[1]->hir(instructions, state);
 
       orig_type = op[0]->type;
+
+      /* Break out if operand types were not parsed successfully. */
+      if ((op[0]->type == glsl_type::error_type ||
+           op[1]->type == glsl_type::error_type))
+         break;
+
       type = arithmetic_result_type(op[0], op[1],
                                     (this->oper == ast_mul_assign),
                                     state, & loc);
@@ -1928,6 +1934,11 @@ ast_expression::do_hir(exec_list *instructions,
 
       error_emitted = op[0]->type->is_error() || op[1]->type->is_error();
 
+      if (error_emitted) {
+         result = ir_rvalue::error_value(ctx);
+         break;
+      }
+
       type = arithmetic_result_type(op[0], op[1], false, state, & loc);
 
       ir_rvalue *temp_rhs;
diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l b/src/compiler/glsl/glcpp/glcpp-lex.l
index 9cfcc120222..f7003da0cc8 100644
--- a/src/compiler/glsl/glcpp/glcpp-lex.l
+++ b/src/compiler/glsl/glcpp/glcpp-lex.l
@@ -289,6 +289,7 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
          * token. */
 	if (parser->first_non_space_token_this_line) {
 		BEGIN HASH;
+		yyextra->in_define = false;
 	}
 
 	RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN);
@@ -336,43 +337,55 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
 	/* For the pre-processor directives, we return these tokens
 	 * even when we are otherwise skipping. */
 <HASH>ifdef {
-	BEGIN INITIAL;
-	yyextra->lexing_directive = 1;
-	yyextra->space_tokens = 0;
-	RETURN_TOKEN_NEVER_SKIP (IFDEF);
+	if (!yyextra->in_define) {
+		BEGIN INITIAL;
+		yyextra->lexing_directive = 1;
+		yyextra->space_tokens = 0;
+		RETURN_TOKEN_NEVER_SKIP (IFDEF);
+	}
 }
 
 <HASH>ifndef {
-	BEGIN INITIAL;
-	yyextra->lexing_directive = 1;
-	yyextra->space_tokens = 0;
-	RETURN_TOKEN_NEVER_SKIP (IFNDEF);
+	if (!yyextra->in_define) {
+		BEGIN INITIAL;
+		yyextra->lexing_directive = 1;
+		yyextra->space_tokens = 0;
+		RETURN_TOKEN_NEVER_SKIP (IFNDEF);
+	}
 }
 
 <HASH>if/[^_a-zA-Z0-9] {
-	BEGIN INITIAL;
-	yyextra->lexing_directive = 1;
-	yyextra->space_tokens = 0;
-	RETURN_TOKEN_NEVER_SKIP (IF);
+	if (!yyextra->in_define) {
+		BEGIN INITIAL;
+		yyextra->lexing_directive = 1;
+		yyextra->space_tokens = 0;
+		RETURN_TOKEN_NEVER_SKIP (IF);
+	}
 }
 
 <HASH>elif/[^_a-zA-Z0-9] {
-	BEGIN INITIAL;
-	yyextra->lexing_directive = 1;
-	yyextra->space_tokens = 0;
-	RETURN_TOKEN_NEVER_SKIP (ELIF);
+	if (!yyextra->in_define) {
+		BEGIN INITIAL;
+		yyextra->lexing_directive = 1;
+		yyextra->space_tokens = 0;
+		RETURN_TOKEN_NEVER_SKIP (ELIF);
+	}
 }
 
 <HASH>else {
-	BEGIN INITIAL;
-	yyextra->space_tokens = 0;
-	RETURN_TOKEN_NEVER_SKIP (ELSE);
+	if (!yyextra->in_define) {
+		BEGIN INITIAL;
+		yyextra->space_tokens = 0;
+		RETURN_TOKEN_NEVER_SKIP (ELSE);
+	}
 }
 
 <HASH>endif {
-	BEGIN INITIAL;
-	yyextra->space_tokens = 0;
-	RETURN_TOKEN_NEVER_SKIP (ENDIF);
+	if (!yyextra->in_define) {
+		BEGIN INITIAL;
+		yyextra->space_tokens = 0;
+		RETURN_TOKEN_NEVER_SKIP (ENDIF);
+	}
 }
 
 <HASH>error[^\r\n]* {
@@ -399,7 +412,8 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
 	 *	  and not whitespace). This will generate an error.
 	 */
 <HASH>define{HSPACE}* {
-	if (! parser->skipping) {
+	yyextra->in_define = true;
+	if (!parser->skipping) {
 		BEGIN DEFINE;
 		yyextra->space_tokens = 0;
 		RETURN_TOKEN (DEFINE_TOKEN);
@@ -420,8 +434,10 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
 
 	/* This will catch any non-directive garbage after a HASH */
 <HASH>{NONSPACE} {
-	BEGIN INITIAL;
-	RETURN_TOKEN (GARBAGE);
+	if (!parser->skipping) {
+		BEGIN INITIAL;
+		RETURN_TOKEN (GARBAGE);
+	}
 }
 
 	/* An identifier immediately followed by '(' */
diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y
index 4be5cfa3d54..c951d9526ac 100644
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -224,10 +224,12 @@ expanded_line:
 			glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro);
 		_glcpp_parser_skip_stack_change_if (parser, & @1, "elif", $2.value);
 	}
-|	LINE_EXPANDED integer_constant NEWLINE {
+|	LINE_EXPANDED expression NEWLINE {
+		if (parser->is_gles && $2.undefined_macro)
+			glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro);
 		parser->has_new_line_number = 1;
-		parser->new_line_number = $2;
-		_mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2);
+		parser->new_line_number = $2.value;
+		_mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2.value);
 	}
 |	LINE_EXPANDED integer_constant integer_constant NEWLINE {
 		parser->has_new_line_number = 1;
@@ -238,6 +240,17 @@ expanded_line:
 					   "#line %" PRIiMAX " %" PRIiMAX "\n",
 					    $2, $3);
 	}
+|	LINE_EXPANDED '(' expression ')' '(' expression ')' NEWLINE {
+		if (parser->is_gles && $3.undefined_macro)
+			glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $3.undefined_macro);
+		if (parser->is_gles && $6.undefined_macro)
+			glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $6.undefined_macro);
+		parser->has_new_line_number = 1;
+		parser->new_line_number = $3.value;
+		parser->has_new_source_number = 1;
+		parser->new_source_number = $6.value;
+		_mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX " %" PRIiMAX "\n", $3.value, $6.value);
+	}
 ;
 
 define:
@@ -1074,6 +1087,20 @@ _token_list_equal_ignoring_space(token_list_t *a, token_list_t *b)
 
    while (1)
    {
+      if (node_a == NULL && node_b == NULL)
+         break;
+
+      /* Ignore trailing whitespace */
+      if (node_a == NULL && node_b->token->type == SPACE) {
+         while (node_b && node_b->token->type == SPACE)
+            node_b = node_b->next;
+      }
+
+      if (node_b == NULL && node_a->token->type == SPACE) {
+         while (node_a && node_a->token->type == SPACE)
+            node_a = node_a->next;
+      }
+
       if (node_a == NULL && node_b == NULL)
          break;
 
diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h
index c7e382ed30c..e786b24b132 100644
--- a/src/compiler/glsl/glcpp/glcpp.h
+++ b/src/compiler/glsl/glcpp/glcpp.h
@@ -197,6 +197,7 @@ struct glcpp_parser {
 	int first_non_space_token_this_line;
 	int newline_as_space;
 	int in_control_line;
+	bool in_define;
 	int paren_count;
 	int commented_newlines;
 	skip_node_t *skip_stack;
diff --git a/src/compiler/glsl/glcpp/meson.build b/src/compiler/glsl/glcpp/meson.build
index 09d44ddd687..287da35006d 100644
--- a/src/compiler/glsl/glcpp/meson.build
+++ b/src/compiler/glsl/glcpp/meson.build
@@ -64,8 +64,9 @@ if with_tests
   foreach m : modes
     test(
       'glcpp test (@0@)'.format(m),
-      find_program('tests/glcpp_test.py'),
+      prog_python2,
       args : [
+        join_paths(meson.current_source_dir(), 'tests/glcpp_test.py'),
         glcpp, join_paths(meson.current_source_dir(), 'tests'),
         '--@0@'.format(m),
       ],
diff --git a/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c
index ae7ea09f67e..2b084e0960a 100644
--- a/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c
+++ b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c
@@ -2,6 +2,7 @@
 #define TWO  ( 1+1 )
 #define FOUR (2 + 2)
 #define SIX  (3 + 3)
+#define EIGHT (8 + 8)
 
 /* Redefinitions with whitespace in same places, but different amounts, (so no
  * error). */
@@ -9,6 +10,9 @@
 #define FOUR    (2	+  2)
 #define SIX	(3/*comment is whitespace*/+   /* collapsed */ /* to */ /* one */ /* space */  3)
 
+/* Trailing whitespace (no error) */
+#define EIGHT (8 + 8)       
+
 /* Redefinitions with whitespace in different places. Each of these should
  * trigger an error. */
 #define TWO  (1 + 1)
diff --git a/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected
index 602bdef94c2..766849e34a9 100644
--- a/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected
+++ b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected
@@ -1,14 +1,15 @@
-0:14(9): preprocessor error: Redefinition of macro TWO
+0:18(9): preprocessor error: Redefinition of macro TWO
 
-0:15(9): preprocessor error: Redefinition of macro FOUR
+0:19(9): preprocessor error: Redefinition of macro FOUR
 
-0:16(9): preprocessor error: Redefinition of macro SIX
+0:20(9): preprocessor error: Redefinition of macro SIX
 
  
 
 
 
 
+
  
 
 
@@ -18,5 +19,8 @@
  
 
 
+ 
+
+
 
 
diff --git a/src/compiler/glsl/glcpp/tests/glcpp_test.py b/src/compiler/glsl/glcpp/tests/glcpp_test.py
old mode 100755
new mode 100644
index 8ac5d7cb0a1..8c7552124a6
--- a/src/compiler/glsl/glcpp/tests/glcpp_test.py
+++ b/src/compiler/glsl/glcpp/tests/glcpp_test.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python2
 # encoding=utf-8
 # Copyright © 2018 Intel Corporation
 
diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
index 67b38f48eff..d05d1998a50 100644
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -667,8 +667,8 @@ class ir_variable : public ir_instruction {
        * variable has been used.  For example, it is an error to redeclare a
        * variable as invariant after it has been used.
        *
-       * This is only maintained in the ast_to_hir.cpp path, not in
-       * Mesa's fixed function or ARB program paths.
+       * This is maintained in the ast_to_hir.cpp path and during linking,
+       * but not in Mesa's fixed function or ARB program paths.
        */
       unsigned used:1;
 
diff --git a/src/compiler/glsl/ir_constant_expression.cpp b/src/compiler/glsl/ir_constant_expression.cpp
index 4a0aff72c6f..c9788c70535 100644
--- a/src/compiler/glsl/ir_constant_expression.cpp
+++ b/src/compiler/glsl/ir_constant_expression.cpp
@@ -826,7 +826,7 @@ ir_dereference_array::constant_expression_value(void *mem_ctx,
          const unsigned component = idx->value.u[0];
 
          return new(mem_ctx) ir_constant(array, component);
-      } else {
+      } else if (array->type->is_array()) {
          const unsigned index = idx->value.u[0];
          return array->get_array_element(index)->clone(mem_ctx, NULL);
       }
diff --git a/src/compiler/glsl/link_interface_blocks.cpp b/src/compiler/glsl/link_interface_blocks.cpp
index e5eca9460e3..801fbcd5d9f 100644
--- a/src/compiler/glsl/link_interface_blocks.cpp
+++ b/src/compiler/glsl/link_interface_blocks.cpp
@@ -417,9 +417,15 @@ validate_interstage_inout_blocks(struct gl_shader_program *prog,
        * write to any of the pre-defined outputs (e.g. if the vertex shader
        * does not write to gl_Position, etc), which is allowed and results in
        * undefined behavior.
+       *
+       * From Section 4.3.4 (Inputs) of the GLSL 1.50 spec:
+       *
+       *    "Only the input variables that are actually read need to be written
+       *     by the previous stage; it is allowed to have superfluous
+       *     declarations of input variables."
        */
       if (producer_def == NULL &&
-          !is_builtin_gl_in_block(var, consumer->Stage)) {
+          !is_builtin_gl_in_block(var, consumer->Stage) && var->data.used) {
          linker_error(prog, "Input block `%s' is not an output of "
                       "the previous stage\n", var->get_interface_type()->name);
          return;
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 3ce78fe6428..b7260aca4ec 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -2187,6 +2187,41 @@ link_cs_input_layout_qualifiers(struct gl_shader_program *prog,
    }
 }
 
+/**
+ * Link all out variables on a single stage which are not
+ * directly used in a shader with the main function.
+ */
+static void
+link_output_variables(struct gl_linked_shader *linked_shader,
+                      struct gl_shader **shader_list,
+                      unsigned num_shaders)
+{
+   struct glsl_symbol_table *symbols = linked_shader->symbols;
+
+   for (unsigned i = 0; i < num_shaders; i++) {
+
+      /* Skip shader object with main function */
+      if (shader_list[i]->symbols->get_function("main"))
+         continue;
+
+      foreach_in_list(ir_instruction, ir, shader_list[i]->ir) {
+         if (ir->ir_type != ir_type_variable)
+            continue;
+
+         ir_variable *var = (ir_variable *) ir;
+
+         if (var->data.mode == ir_var_shader_out &&
+               !symbols->get_variable(var->name)) {
+            var = var->clone(linked_shader, NULL);
+            symbols->add_variable(var);
+            linked_shader->ir->push_head(var);
+         }
+      }
+   }
+
+   return;
+}
+
 
 /**
  * Combine a group of shaders for a single stage to generate a linked shader
@@ -2352,6 +2387,9 @@ link_intrastage_shaders(void *mem_ctx,
       return NULL;
    }
 
+   if (linked->Stage != MESA_SHADER_FRAGMENT)
+      link_output_variables(linked, shader_list, num_shaders);
+
    /* Make a pass over all variable declarations to ensure that arrays with
     * unspecified sizes have a size specified.  The size is inferred from the
     * max_array_access field.
diff --git a/src/compiler/glsl/lower_vector_derefs.cpp b/src/compiler/glsl/lower_vector_derefs.cpp
index 7583d1fdd3e..6cd9a2d819a 100644
--- a/src/compiler/glsl/lower_vector_derefs.cpp
+++ b/src/compiler/glsl/lower_vector_derefs.cpp
@@ -59,8 +59,7 @@ vector_deref_visitor::visit_enter(ir_assignment *ir)
    if (!deref->array->type->is_vector())
       return ir_rvalue_enter_visitor::visit_enter(ir);
 
-   ir_dereference *const new_lhs = (ir_dereference *) deref->array;
-   ir->set_lhs(new_lhs);
+   ir_rvalue *const new_lhs = deref->array;
 
    void *mem_ctx = ralloc_parent(ir);
    ir_constant *old_index_constant =
@@ -72,8 +71,16 @@ vector_deref_visitor::visit_enter(ir_assignment *ir)
                                            ir->rhs,
                                            deref->array_index);
       ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
+      ir->set_lhs(new_lhs);
+   } else if (new_lhs->ir_type != ir_type_swizzle) {
+      ir->set_lhs(new_lhs);
+      ir->write_mask = 1 << old_index_constant->get_uint_component(0);
    } else {
-      ir->write_mask = 1 << old_index_constant->get_int_component(0);
+      /* If the "new" LHS is a swizzle, use the set_lhs helper to instead
+       * swizzle the RHS.
+       */
+      unsigned component[1] = { old_index_constant->get_uint_component(0) };
+      ir->set_lhs(new(mem_ctx) ir_swizzle(new_lhs, component, 1));
    }
 
    return ir_rvalue_enter_visitor::visit_enter(ir);
diff --git a/src/compiler/glsl/tests/meson.build b/src/compiler/glsl/tests/meson.build
index fc7b863a278..821760e962d 100644
--- a/src/compiler/glsl/tests/meson.build
+++ b/src/compiler/glsl/tests/meson.build
@@ -84,8 +84,10 @@ test(
 )
 
 test(
-  'glsl compiler warnings', find_program('warnings_test.py'),
+  'glsl compiler warnings',
+  prog_python2,
   args : [
+    join_paths(meson.current_source_dir(), 'warnings_test.py'),
     '--glsl-compiler', glsl_compiler,
     '--test-directory', join_paths(
       meson.source_root(), 'src', 'compiler', 'glsl', 'tests', 'warnings'
@@ -94,6 +96,9 @@ test(
 )
 test(
   'glsl optimization',
-  find_program('optimization_test.py'),
-  args : ['--test-runner', glsl_test],
+  prog_python2,
+  args : [
+    join_paths(meson.current_source_dir(), 'optimization_test.py'),
+    '--test-runner', glsl_test
+  ],
 )
diff --git a/src/compiler/glsl/tests/optimization_test.py b/src/compiler/glsl/tests/optimization_test.py
old mode 100755
new mode 100644
index 577d2dfc20f..f8518a168e0
--- a/src/compiler/glsl/tests/optimization_test.py
+++ b/src/compiler/glsl/tests/optimization_test.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python2
 # encoding=utf-8
 # Copyright © 2018 Intel Corporation
 
diff --git a/src/compiler/glsl/tests/warnings_test.py b/src/compiler/glsl/tests/warnings_test.py
old mode 100755
new mode 100644
index 2e0f23180f3..2c4fa5a0d5a
--- a/src/compiler/glsl/tests/warnings_test.py
+++ b/src/compiler/glsl/tests/warnings_test.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # encoding=utf-8
 # Copyright © 2017 Intel Corporation
 
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index bca6a32c956..b7c8754b4cf 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2926,6 +2926,7 @@ bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
 
 bool nir_lower_phis_to_regs_block(nir_block *block);
 bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
+bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl);
 
 bool nir_opt_algebraic(nir_shader *shader);
 bool nir_opt_algebraic_before_ffma(nir_shader *shader);
diff --git a/src/compiler/nir/nir_control_flow.c b/src/compiler/nir/nir_control_flow.c
index 1622b35a6c9..0617c6a905d 100644
--- a/src/compiler/nir/nir_control_flow.c
+++ b/src/compiler/nir/nir_control_flow.c
@@ -444,6 +444,23 @@ nearest_loop(nir_cf_node *node)
    return nir_cf_node_as_loop(node);
 }
 
+static void
+remove_phi_src(nir_block *block, nir_block *pred)
+{
+   nir_foreach_instr(instr, block) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      nir_foreach_phi_src_safe(src, phi) {
+         if (src->pred == pred) {
+            list_del(&src->src.use_link);
+            exec_node_remove(&src->node);
+         }
+      }
+   }
+}
+
 /*
  * update the CFG after a jump instruction has been added to the end of a block
  */
@@ -454,6 +471,10 @@ nir_handle_add_jump(nir_block *block)
    nir_instr *instr = nir_block_last_instr(block);
    nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
 
+   if (block->successors[0])
+      remove_phi_src(block->successors[0], block);
+   if (block->successors[1])
+      remove_phi_src(block->successors[1], block);
    unlink_block_successors(block);
 
    nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
@@ -477,23 +498,6 @@ nir_handle_add_jump(nir_block *block)
    }
 }
 
-static void
-remove_phi_src(nir_block *block, nir_block *pred)
-{
-   nir_foreach_instr(instr, block) {
-      if (instr->type != nir_instr_type_phi)
-         break;
-
-      nir_phi_instr *phi = nir_instr_as_phi(instr);
-      nir_foreach_phi_src_safe(src, phi) {
-         if (src->pred == pred) {
-            list_del(&src->src.use_link);
-            exec_node_remove(&src->node);
-         }
-      }
-   }
-}
-
 /* Removes the successor of a block with a jump. Note that the jump to be
  * eliminated may be free-floating.
  */
diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c
index c03acf83597..6f788ad1aa5 100644
--- a/src/compiler/nir/nir_deref.c
+++ b/src/compiler/nir/nir_deref.c
@@ -24,6 +24,7 @@
 #include "nir.h"
 #include "nir_builder.h"
 #include "nir_deref.h"
+#include "util/hash_table.h"
 
 void
 nir_deref_path_init(nir_deref_path *path,
@@ -270,3 +271,135 @@ nir_fixup_deref_modes(nir_shader *shader)
       }
    }
 }
+
+struct rematerialize_deref_state {
+   bool progress;
+   nir_builder builder;
+   nir_block *block;
+   struct hash_table *cache;
+};
+
+static nir_deref_instr *
+rematerialize_deref_in_block(nir_deref_instr *deref,
+                             struct rematerialize_deref_state *state)
+{
+   if (deref->instr.block == state->block)
+      return deref;
+
+   if (!state->cache) {
+      state->cache = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+   }
+
+   struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref);
+   if (cached)
+      return cached->data;
+
+   nir_builder *b = &state->builder;
+   nir_deref_instr *new_deref =
+      nir_deref_instr_create(b->shader, deref->deref_type);
+   new_deref->mode = deref->mode;
+   new_deref->type = deref->type;
+
+   if (deref->deref_type == nir_deref_type_var) {
+      new_deref->var = deref->var;
+   } else {
+      nir_deref_instr *parent = nir_src_as_deref(deref->parent);
+      if (parent) {
+         parent = rematerialize_deref_in_block(parent, state);
+         new_deref->parent = nir_src_for_ssa(&parent->dest.ssa);
+      } else {
+         nir_src_copy(&new_deref->parent, &deref->parent, new_deref);
+      }
+   }
+
+   switch (deref->deref_type) {
+   case nir_deref_type_var:
+   case nir_deref_type_array_wildcard:
+   case nir_deref_type_cast:
+      /* Nothing more to do */
+      break;
+
+   case nir_deref_type_array:
+      assert(!nir_src_as_deref(deref->arr.index));
+      nir_src_copy(&new_deref->arr.index, &deref->arr.index, new_deref);
+      break;
+
+   case nir_deref_type_struct:
+      new_deref->strct.index = deref->strct.index;
+      break;
+
+   default:
+      unreachable("Invalid deref instruction type");
+   }
+
+   nir_ssa_dest_init(&new_deref->instr, &new_deref->dest,
+                     deref->dest.ssa.num_components,
+                     deref->dest.ssa.bit_size,
+                     deref->dest.ssa.name);
+   nir_builder_instr_insert(b, &new_deref->instr);
+
+   return new_deref;
+}
+
+static bool
+rematerialize_deref_src(nir_src *src, void *_state)
+{
+   struct rematerialize_deref_state *state = _state;
+
+   nir_deref_instr *deref = nir_src_as_deref(*src);
+   if (!deref)
+      return true;
+
+   nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state);
+   if (block_deref != deref) {
+      nir_instr_rewrite_src(src->parent_instr, src,
+                            nir_src_for_ssa(&block_deref->dest.ssa));
+      nir_deref_instr_remove_if_unused(deref);
+      state->progress = true;
+   }
+
+   return true;
+}
+
+/** Re-materialize derefs in every block
+ *
+ * This pass re-materializes deref instructions in every block in which it is
+ * used.  After this pass has been run, every use of a deref will be of a
+ * deref in the same block as the use.  Also, all unused derefs will be
+ * deleted as a side-effect.
+ */
+bool
+nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
+{
+   struct rematerialize_deref_state state = { 0 };
+   nir_builder_init(&state.builder, impl);
+
+   nir_foreach_block(block, impl) {
+      state.block = block;
+
+      /* Start each block with a fresh cache */
+      if (state.cache)
+         _mesa_hash_table_clear(state.cache, NULL);
+
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type == nir_instr_type_deref) {
+            nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr));
+            continue;
+         }
+
+         state.builder.cursor = nir_before_instr(instr);
+         nir_foreach_src(instr, rematerialize_deref_src, &state);
+      }
+
+#ifndef NDEBUG
+      nir_if *following_if = nir_block_get_following_if(block);
+      if (following_if)
+         assert(!nir_src_as_deref(following_if->condition));
+#endif
+   }
+
+   _mesa_hash_table_destroy(state.cache, NULL);
+
+   return state.progress;
+}
diff --git a/src/compiler/nir/nir_format_convert.h b/src/compiler/nir/nir_format_convert.h
index 45532b74884..e5cc653c44e 100644
--- a/src/compiler/nir/nir_format_convert.h
+++ b/src/compiler/nir/nir_format_convert.h
@@ -191,7 +191,7 @@ nir_format_unpack_11f11f10f(nir_builder *b, nir_ssa_def *packed)
 {
    nir_ssa_def *chans[3];
    chans[0] = nir_mask_shift(b, packed, 0x000007ff, 4);
-   chans[1] = nir_mask_shift(b, packed, 0x003ff100, -7);
+   chans[1] = nir_mask_shift(b, packed, 0x003ff800, -7);
    chans[2] = nir_mask_shift(b, packed, 0xffc00000, -17);
 
    for (unsigned i = 0; i < 3; i++)
diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c
index 1aa35509b11..413807ff28d 100644
--- a/src/compiler/nir/nir_from_ssa.c
+++ b/src/compiler/nir/nir_from_ssa.c
@@ -974,6 +974,12 @@ nir_lower_ssa_defs_to_regs_block(nir_block *block)
          mov->dest.dest = nir_dest_for_reg(reg);
          mov->dest.write_mask = (1 << reg->num_components) - 1;
          nir_instr_insert(nir_after_instr(&load->instr), &mov->instr);
+      } else if (instr->type == nir_instr_type_deref) {
+         /* Derefs should always be SSA values, don't rewrite them. */
+         nir_deref_instr *deref = nir_instr_as_deref(instr);
+         nir_foreach_use_safe(use, &deref->dest.ssa)
+            assert(use->parent_instr->block == block);
+         assert(list_empty(&deref->dest.ssa.if_uses));
       } else {
          nir_foreach_dest(instr, dest_replace_ssa_with_reg, &state);
       }
diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c
index 85712a7cb1c..6e6655dfc9d 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -176,9 +176,12 @@ nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
 }
 
 static uint8_t
-get_interp_type(nir_variable *var, bool default_to_smooth_interp)
+get_interp_type(nir_variable *var, const struct glsl_type *type,
+                bool default_to_smooth_interp)
 {
-   if (var->data.interpolation != INTERP_MODE_NONE)
+   if (glsl_type_is_integer(type))
+      return INTERP_MODE_FLAT;
+   else if (var->data.interpolation != INTERP_MODE_NONE)
       return var->data.interpolation;
    else if (default_to_smooth_interp)
       return INTERP_MODE_SMOOTH;
@@ -233,7 +236,7 @@ get_slot_component_masks_and_interp_types(struct exec_list *var_list,
          unsigned comps_slot2 = 0;
          for (unsigned i = 0; i < slots; i++) {
             interp_type[location + i] =
-               get_interp_type(var, default_to_smooth_interp);
+               get_interp_type(var, type, default_to_smooth_interp);
             interp_loc[location + i] = get_interp_loc(var);
 
             if (dual_slot) {
@@ -405,7 +408,7 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps,
             continue;
 
          bool found_new_offset = false;
-         uint8_t interp = get_interp_type(var, default_to_smooth_interp);
+         uint8_t interp = get_interp_type(var, type, default_to_smooth_interp);
          for (; cursor[interp] < 32; cursor[interp]++) {
             uint8_t cursor_used_comps = comps[cursor[interp]];
 
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 742c8d8ee66..0be3aba9456 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -107,11 +107,11 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
       if (!b->shader->options->lower_pack_half_2x16)
          return false;
 
+      nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, instr, 0);
+
       nir_ssa_def *val =
-         nir_pack_half_2x16_split(b, nir_channel(b, instr->src[0].src.ssa,
-                                                 instr->src[0].swizzle[0]),
-                                     nir_channel(b, instr->src[0].src.ssa,
-                                                 instr->src[0].swizzle[1]));
+         nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0),
+                                     nir_channel(b, src_vec2, 1));
 
       nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
       nir_instr_remove(&instr->instr);
@@ -130,9 +130,11 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
       if (!b->shader->options->lower_unpack_half_2x16)
          return false;
 
+      nir_ssa_def *packed = nir_ssa_for_alu_src(b, instr, 0);
+
       nir_ssa_def *comps[2];
-      comps[0] = nir_unpack_half_2x16_split_x(b, instr->src[0].src.ssa);
-      comps[1] = nir_unpack_half_2x16_split_y(b, instr->src[0].src.ssa);
+      comps[0] = nir_unpack_half_2x16_split_x(b, packed);
+      comps[1] = nir_unpack_half_2x16_split_y(b, packed);
       nir_ssa_def *vec = nir_vec(b, comps, 2);
 
       nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec));
@@ -144,8 +146,8 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
       assert(b->shader->options->lower_pack_snorm_2x16 ||
              b->shader->options->lower_pack_unorm_2x16);
 
-      nir_ssa_def *word =
-         nir_extract_u16(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
+      nir_ssa_def *word = nir_extract_u16(b, nir_ssa_for_alu_src(b, instr, 0),
+                                             nir_imm_int(b, 0));
       nir_ssa_def *val =
          nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
                                 nir_channel(b, word, 0));
@@ -159,8 +161,8 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
       assert(b->shader->options->lower_pack_snorm_4x8 ||
              b->shader->options->lower_pack_unorm_4x8);
 
-      nir_ssa_def *byte =
-         nir_extract_u8(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
+      nir_ssa_def *byte = nir_extract_u8(b, nir_ssa_for_alu_src(b, instr, 0),
+                                            nir_imm_int(b, 0));
       nir_ssa_def *val =
          nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
                                nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),
@@ -173,14 +175,15 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
    }
 
    case nir_op_fdph: {
+      nir_ssa_def *src0_vec = nir_ssa_for_alu_src(b, instr, 0);
+      nir_ssa_def *src1_vec = nir_ssa_for_alu_src(b, instr, 1);
+
       nir_ssa_def *sum[4];
       for (unsigned i = 0; i < 3; i++) {
-         sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa,
-                                          instr->src[0].swizzle[i]),
-                              nir_channel(b, instr->src[1].src.ssa,
-                                          instr->src[1].swizzle[i]));
+         sum[i] = nir_fmul(b, nir_channel(b, src0_vec, i),
+                              nir_channel(b, src1_vec, i));
       }
-      sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]);
+      sum[3] = nir_channel(b, src1_vec, 3);
 
       nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]),
                                      nir_fadd(b, sum[2], sum[3]));
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 7fc4ff1d407..19526d86d43 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -462,13 +462,13 @@
                            ('unpack_64_2x32_split_y', a)), a),
 
    # Byte extraction
-   (('ushr', a, 24), ('extract_u8', a, 3), '!options->lower_extract_byte'),
+   (('ushr', 'a@32', 24), ('extract_u8', a, 3), '!options->lower_extract_byte'),
    (('iand', 0xff, ('ushr', a, 16)), ('extract_u8', a, 2), '!options->lower_extract_byte'),
    (('iand', 0xff, ('ushr', a,  8)), ('extract_u8', a, 1), '!options->lower_extract_byte'),
    (('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte'),
 
     # Word extraction
-   (('ushr', a, 16), ('extract_u16', a, 1), '!options->lower_extract_word'),
+   (('ushr', 'a@32', 16), ('extract_u16', a, 1), '!options->lower_extract_word'),
    (('iand', 0xffff, a), ('extract_u16', a, 0), '!options->lower_extract_word'),
 
    # Subtracts
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index b3d0bf1decb..d8e03d6ccbb 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -180,6 +180,13 @@ opt_peel_loop_initial_if(nir_loop *loop)
       }
    }
 
+   /* We're about to re-arrange a bunch of blocks so make sure that we don't
+    * have deref uses which cross block boundaries.  We don't want a deref
+    * accidentally ending up in a phi.
+    */
+   nir_rematerialize_derefs_in_use_blocks_impl(
+      nir_cf_node_get_function(&loop->cf_node));
+
    /* Before we do anything, convert the loop to LCSSA.  We're about to
     * replace a bunch of SSA defs with registers and this will prevent any of
     * it from leaking outside the loop.
@@ -423,12 +430,6 @@ nir_opt_if(nir_shader *shader)
           */
          nir_lower_regs_to_ssa_impl(function->impl);
 
-         /* Calling nir_convert_loop_to_lcssa() in opt_peel_loop_initial_if()
-          * adds extra phi nodes which may not be valid if they're used for
-          * something such as a deref.  Remove any unneeded phis.
-          */
-         nir_opt_remove_phis_impl(function->impl);
-
          progress = true;
       }
    }
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
index 955dfede694..161c4ba04e9 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -49,6 +49,9 @@
 static void
 loop_prepare_for_unroll(nir_loop *loop)
 {
+   nir_rematerialize_derefs_in_use_blocks_impl(
+      nir_cf_node_get_function(&loop->cf_node));
+
    nir_convert_loop_to_lcssa(loop);
 
    /* Lower phis at the top level of the loop body */
diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp
index c8a29404969..e00273995db 100644
--- a/src/compiler/nir_types.cpp
+++ b/src/compiler/nir_types.cpp
@@ -279,6 +279,11 @@ glsl_type_is_boolean(const struct glsl_type *type)
 {
    return type->is_boolean();
 }
+bool
+glsl_type_is_integer(const struct glsl_type *type)
+{
+   return type->is_integer();
+}
 
 const glsl_type *
 glsl_void_type(void)
diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h
index db3a4dee2d9..7db32e3e008 100644
--- a/src/compiler/nir_types.h
+++ b/src/compiler/nir_types.h
@@ -139,6 +139,7 @@ bool glsl_type_is_image(const struct glsl_type *type);
 bool glsl_type_is_dual_slot(const struct glsl_type *type);
 bool glsl_type_is_numeric(const struct glsl_type *type);
 bool glsl_type_is_boolean(const struct glsl_type *type);
+bool glsl_type_is_integer(const struct glsl_type *type);
 bool glsl_sampler_type_is_shadow(const struct glsl_type *type);
 bool glsl_sampler_type_is_array(const struct glsl_type *type);
 bool glsl_contains_atomic(const struct glsl_type *type);
diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c
index 32ebdd78a1f..688b33e6607 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1771,11 +1771,17 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
          nir_const_value src[4];
 
          for (unsigned i = 0; i < count - 4; i++) {
-            nir_constant *c =
-               vtn_value(b, w[4 + i], vtn_value_type_constant)->constant;
+            struct vtn_value *src_val =
+               vtn_value(b, w[4 + i], vtn_value_type_constant);
+
+            /* If this is an unsized source, pull the bit size from the
+             * source; otherwise, we'll use the bit size from the destination.
+             */
+            if (!nir_alu_type_get_type_size(nir_op_infos[op].input_types[i]))
+               bit_size = glsl_get_bit_size(src_val->type->type);
 
             unsigned j = swap ? 1 - i : i;
-            src[j] = c->values[0];
+            src[j] = src_val->constant->values[0];
          }
 
          val->constant->values[0] =
diff --git a/src/egl/Android.mk b/src/egl/Android.mk
index 11128ded93c..e775b176082 100644
--- a/src/egl/Android.mk
+++ b/src/egl/Android.mk
@@ -46,7 +46,10 @@ LOCAL_CFLAGS := \
 LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/include/drm-uapi \
 	$(MESA_TOP)/src/egl/main \
-	$(MESA_TOP)/src/egl/drivers/dri2
+	$(MESA_TOP)/src/egl/drivers/dri2 \
+	frameworks/native/libs/nativebase/include \
+	frameworks/native/libs/nativewindow/include \
+	frameworks/native/libs/arect/include
 
 LOCAL_STATIC_LIBRARIES := \
 	libmesa_util \
@@ -65,6 +68,10 @@ ifeq ($(BOARD_USES_DRM_GRALLOC),true)
 	LOCAL_SHARED_LIBRARIES += libgralloc_drm
 endif
 
+ifeq ($(strip $(BOARD_USES_GRALLOC1)),true)
+LOCAL_CFLAGS += -DHAVE_GRALLOC1
+endif
+
 ifeq ($(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5 6 7),)
 LOCAL_SHARED_LIBRARIES += libnativewindow
 endif
@@ -80,8 +87,12 @@ ifneq ($(MESA_BUILD_GALLIUM),)
 LOCAL_REQUIRED_MODULES += gallium_dri
 endif
 
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0)
+LOCAL_HEADER_LIBRARIES += libnativebase_headers
+endif
+
 LOCAL_MODULE := libGLES_mesa
 LOCAL_MODULE_RELATIVE_PATH := egl
-
+LOCAL_CFLAGS += -Wno-error
 include $(MESA_COMMON_MK)
 include $(BUILD_SHARED_LIBRARY)
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index c3024795a10..81337d20920 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -65,6 +65,38 @@
 #include "util/u_vector.h"
 #include "mapi/glapi/glapi.h"
 
+/* The kernel header drm_fourcc.h defines the DRM formats below.  We duplicate
+ * some of the definitions here so that building Mesa won't bleeding-edge
+ * kernel headers.
+ */
+#ifndef DRM_FORMAT_R8
+#define DRM_FORMAT_R8            fourcc_code('R', '8', ' ', ' ') /* [7:0] R */
+#endif
+
+#ifndef DRM_FORMAT_RG88
+#define DRM_FORMAT_RG88          fourcc_code('R', 'G', '8', '8') /* [15:0] R:G 8:8 little endian */
+#endif
+
+#ifndef DRM_FORMAT_GR88
+#define DRM_FORMAT_GR88          fourcc_code('G', 'R', '8', '8') /* [15:0] G:R 8:8 little endian */
+#endif
+
+#ifndef DRM_FORMAT_R16
+#define DRM_FORMAT_R16           fourcc_code('R', '1', '6', ' ') /* [15:0] R 16 little endian */
+#endif
+
+#ifndef DRM_FORMAT_GR1616
+#define DRM_FORMAT_GR1616        fourcc_code('G', 'R', '3', '2') /* [31:0] R:G 16:16 little endian */
+#endif
+
+#ifndef DRM_FORMAT_P010
+#define DRM_FORMAT_P010 	 fourcc_code('P', '0', '1', '0') /* 2x2 subsampled Cb:Cr plane 10 bits per channel */
+#endif
+
+#ifndef DRM_FORMAT_MOD_INVALID
+#define DRM_FORMAT_MOD_INVALID ((1ULL<<56) - 1)
+#endif
+
 #define NUM_ATTRIBS 12
 
 static void
@@ -669,7 +701,7 @@ dri2_setup_screen(_EGLDisplay *disp)
       dri2_renderer_query_integer(dri2_dpy,
                                   __DRI2_RENDERER_HAS_CONTEXT_PRIORITY);
 
-   disp->Extensions.EXT_pixel_format_float = EGL_TRUE;
+   disp->Extensions.EXT_pixel_format_float = EGL_FALSE;
 
    if (dri2_renderer_query_integer(dri2_dpy,
                                    __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
@@ -2195,13 +2227,13 @@ dri2_check_dma_buf_attribs(const _EGLImageAttribs *attrs)
    return EGL_TRUE;
 }
 
-/* Returns the total number of file descriptors. Zero indicates an error. */
+/* Returns the total number of planes for the format or zero if it isn't a
+ * valid fourcc format.
+ */
 static unsigned
-dri2_check_dma_buf_format(const _EGLImageAttribs *attrs)
+dri2_num_fourcc_format_planes(EGLint format)
 {
-   unsigned plane_n;
-
-   switch (attrs->DMABufFourCC.Value) {
+   switch (format) {
    case DRM_FORMAT_R8:
    case DRM_FORMAT_RG88:
    case DRM_FORMAT_GR88:
@@ -2249,14 +2281,15 @@ dri2_check_dma_buf_format(const _EGLImageAttribs *attrs)
    case DRM_FORMAT_YVYU:
    case DRM_FORMAT_UYVY:
    case DRM_FORMAT_VYUY:
-      plane_n = 1;
-      break;
+      return 1;
+
    case DRM_FORMAT_NV12:
    case DRM_FORMAT_NV21:
    case DRM_FORMAT_NV16:
    case DRM_FORMAT_NV61:
-      plane_n = 2;
-      break;
+   case DRM_FORMAT_P010:
+      return 2;
+
    case DRM_FORMAT_YUV410:
    case DRM_FORMAT_YVU410:
    case DRM_FORMAT_YUV411:
@@ -2267,9 +2300,19 @@ dri2_check_dma_buf_format(const _EGLImageAttribs *attrs)
    case DRM_FORMAT_YVU422:
    case DRM_FORMAT_YUV444:
    case DRM_FORMAT_YVU444:
-      plane_n = 3;
-      break;
+      return 3;
+
    default:
+      return 0;
+   }
+}
+
+/* Returns the total number of file descriptors. Zero indicates an error. */
+static unsigned
+dri2_check_dma_buf_format(const _EGLImageAttribs *attrs)
+{
+   unsigned plane_n = dri2_num_fourcc_format_planes(attrs->DMABufFourCC.Value);
+   if (plane_n == 0) {
       _eglError(EGL_BAD_ATTRIBUTE, "invalid format");
       return 0;
    }
@@ -2343,6 +2386,18 @@ dri2_query_dma_buf_formats(_EGLDriver *drv, _EGLDisplay *disp,
                                             formats, count))
       return EGL_FALSE;
 
+   if (max > 0) {
+      /* Assert that all of the formats returned are actually fourcc formats.
+       * Some day, if we want the internal interface function to be able to
+       * return the fake fourcc formats defined in dri_interface.h, we'll have
+       * to do something more clever here to pair the list down to just real
+       * fourcc formats so that we don't leak the fake internal ones.
+       */
+      for (int i = 0; i < *count; i++) {
+         assert(dri2_num_fourcc_format_planes(formats[i]) > 0);
+      }
+   }
+
    return EGL_TRUE;
 }
 
@@ -2353,6 +2408,9 @@ dri2_query_dma_buf_modifiers(_EGLDriver *drv, _EGLDisplay *disp, EGLint format,
 {
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
 
+   if (dri2_num_fourcc_format_planes(format) == 0)
+      return _eglError(EGL_BAD_PARAMETER, "invalid fourcc format");
+
    if (max < 0)
       return _eglError(EGL_BAD_PARAMETER, "invalid value for max count of formats");
 
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index f8001ec4b66..3ce4a3e04ea 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -69,6 +69,10 @@ struct zwp_linux_dmabuf_v1;
 #include <hardware/gralloc.h>
 #endif /* HAVE_ANDROID_PLATFORM */
 
+#ifdef HAVE_GRALLOC1
+#include <hardware/gralloc1.h>
+#endif
+
 #include "eglconfig.h"
 #include "eglcontext.h"
 #include "egldisplay.h"
@@ -229,7 +233,14 @@ struct dri2_egl_display
 #endif
 
 #ifdef HAVE_ANDROID_PLATFORM
-   const gralloc_module_t *gralloc;
+   const hw_module_t *gralloc;
+   uint16_t gralloc_version;
+#ifdef HAVE_GRALLOC1
+   gralloc1_device_t *gralloc1_dvc;
+   GRALLOC1_PFN_LOCK_FLEX pfn_lockflex;
+   GRALLOC1_PFN_GET_FORMAT pfn_getFormat;
+   GRALLOC1_PFN_UNLOCK pfn_unlock;
+#endif
 #endif
 
    bool                      is_render_node;
@@ -290,6 +301,7 @@ struct dri2_egl_surface
    struct {
 #ifdef HAVE_WAYLAND_PLATFORM
       struct wl_buffer   *wl_buffer;
+      bool                wl_release;
       __DRIimage         *dri_image;
       /* for is_different_gpu case. NULL else */
       __DRIimage         *linear_copy;
diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c
index cc16fd8118f..f6499ce2bb0 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -49,6 +49,8 @@
 
 #define ALIGN(val, align)	(((val) + (align) - 1) & ~((align) - 1))
 
+#define GRALLOC_DRM_GET_FORMAT   1
+
 struct droid_yuv_format {
    /* Lookup keys */
    int native; /* HAL_PIXEL_FORMAT_ */
@@ -59,14 +61,26 @@ struct droid_yuv_format {
    int fourcc; /* __DRI_IMAGE_FOURCC_ */
 };
 
+/* This enumeration can be deleted if Android defined it in
+ * system/core/include/system/graphics.h
+ */
+enum {
+   HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL = 0x100,
+   HAL_PIXEL_FORMAT_NV12 = 0x10F,
+   HAL_PIXEL_FORMAT_P010_INTEL = 0x110
+};
+
 /* The following table is used to look up a DRI image FourCC based
  * on native format and information contained in android_ycbcr struct. */
 static const struct droid_yuv_format droid_yuv_formats[] = {
    /* Native format, YCrCb, Chroma step, DRI image FourCC */
    { HAL_PIXEL_FORMAT_YCbCr_420_888,   0, 2, __DRI_IMAGE_FOURCC_NV12 },
+   { HAL_PIXEL_FORMAT_P010_INTEL,      0, 4, __DRI_IMAGE_FOURCC_P010 },
    { HAL_PIXEL_FORMAT_YCbCr_420_888,   0, 1, __DRI_IMAGE_FOURCC_YUV420 },
    { HAL_PIXEL_FORMAT_YCbCr_420_888,   1, 1, __DRI_IMAGE_FOURCC_YVU420 },
    { HAL_PIXEL_FORMAT_YV12,            1, 1, __DRI_IMAGE_FOURCC_YVU420 },
+   { HAL_PIXEL_FORMAT_NV12,            0, 2, __DRI_IMAGE_FOURCC_NV12 },
+   { HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL, 0, 2, __DRI_IMAGE_FOURCC_NV12 },
    /* HACK: See droid_create_image_from_prime_fd() and
     * https://issuetracker.google.com/32077885. */
    { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   0, 2, __DRI_IMAGE_FOURCC_NV12 },
@@ -248,6 +262,51 @@ droid_window_dequeue_buffer(struct dri2_egl_surface *dri2_surf)
    return EGL_TRUE;
 }
 
+static int
+droid_resolve_format(struct dri2_egl_display *dri2_dpy,
+                     struct ANativeWindowBuffer *buf)
+{
+   int format = -1;
+   int ret;
+
+   if (buf->format != HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED)
+      return buf->format;
+#ifdef HAVE_GRALLOC1
+   if(dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) {
+
+     if (!dri2_dpy->pfn_getFormat) {
+        _eglLog(_EGL_WARNING, "Gralloc does not support getFormat");
+        return -1;
+     }
+     ret = dri2_dpy->pfn_getFormat(dri2_dpy->gralloc1_dvc, buf->handle,
+                                       &format);
+     if (ret) {
+        _eglLog(_EGL_WARNING, "gralloc->getFormat failed: %d", ret);
+        return -1;
+     }
+   } else {
+#else
+     const gralloc_module_t *gralloc0;
+     gralloc0 = dri2_dpy->gralloc;
+
+     if (!gralloc0->perform) {
+       _eglLog(_EGL_WARNING, "gralloc->perform not supported");
+       return -1;
+     }
+     ret = gralloc0->perform(dri2_dpy->gralloc,
+                                    GRALLOC_DRM_GET_FORMAT,
+                                    buf->handle, &format);
+     if (ret){
+       _eglLog(_EGL_WARNING, "gralloc->perform failed with error: %d", ret);
+       return -1;
+     }
+#endif
+#ifdef HAVE_GRALLOC1
+   }
+#endif
+   return format;
+}
+
 static EGLBoolean
 droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf)
 {
@@ -436,7 +495,7 @@ droid_swap_interval(_EGLDriver *drv, _EGLDisplay *dpy,
    struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
    struct ANativeWindow *window = dri2_surf->window;
 
-   if (window->setSwapInterval(window, interval))
+   if (window && window->setSwapInterval(window, interval))
       return EGL_FALSE;
 
    surf->SwapInterval = interval;
@@ -622,11 +681,18 @@ droid_query_buffer_age(_EGLDriver *drv,
 {
    struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surface);
 
+   /* To avoid blocking other EGL calls, release the display mutex before
+    * we enter droid_window_dequeue_buffer() and re-acquire the mutex upon
+    * return.
+    */
+   mtx_unlock(&disp->Mutex);
    if (update_buffers(dri2_surf) < 0) {
       _eglError(EGL_BAD_ALLOC, "droid_query_buffer_age");
+      mtx_lock(&disp->Mutex);
       return -1;
    }
 
+   mtx_lock(&disp->Mutex);
    return dri2_surf->back ? dri2_surf->back->age : 0;
 }
 
@@ -666,6 +732,31 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
    return EGL_TRUE;
 }
 
+static int get_ycbcr_from_flexlayout(struct android_flex_layout *outFlexLayout, struct android_ycbcr *ycbcr)
+{
+
+    for( int i = 0; i < outFlexLayout->num_planes; i++) {
+       switch(outFlexLayout->planes[i].component){
+         case FLEX_COMPONENT_Y:
+             ycbcr->y = outFlexLayout->planes[i].top_left;
+             ycbcr->ystride = outFlexLayout->planes[i].v_increment;
+         break;
+         case FLEX_COMPONENT_Cb:
+             ycbcr->cb = outFlexLayout->planes[i].top_left;
+             ycbcr->cstride = outFlexLayout->planes[i].v_increment;
+         break;
+         case FLEX_COMPONENT_Cr:
+             ycbcr->cr = outFlexLayout->planes[i].top_left;
+             ycbcr->chroma_step = outFlexLayout->planes[i].h_increment;
+         break;
+         default:
+             _eglLog(_EGL_WARNING,"unknown component 0x%x", __func__, outFlexLayout->planes[i].component);
+         break;
+       }
+  }
+  return 0;
+}
+
 #if ANDROID_API_LEVEL >= 23
 static EGLBoolean
 droid_set_damage_region(_EGLDriver *drv,
@@ -709,30 +800,70 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx,
 {
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    struct android_ycbcr ycbcr;
+#ifdef HAVE_GRALLOC1
+   struct android_flex_layout outFlexLayout;
+   gralloc1_rect_t accessRegion;
+#endif
    size_t offsets[3];
    size_t pitches[3];
    int is_ycrcb;
    int fourcc;
    int ret;
 
-   if (!dri2_dpy->gralloc->lock_ycbcr) {
-      _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr");
+   int format = droid_resolve_format(dri2_dpy, buf);
+   if (format < 0) {
+      _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR");
       return NULL;
    }
 
    memset(&ycbcr, 0, sizeof(ycbcr));
-   ret = dri2_dpy->gralloc->lock_ycbcr(dri2_dpy->gralloc, buf->handle,
-                                       0, 0, 0, 0, 0, &ycbcr);
-   if (ret) {
-      /* HACK: See droid_create_image_from_prime_fd() and
-       * https://issuetracker.google.com/32077885.*/
-      if (buf->format == HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED)
-         return NULL;
-
-      _eglLog(_EGL_WARNING, "gralloc->lock_ycbcr failed: %d", ret);
-      return NULL;
-   }
-   dri2_dpy->gralloc->unlock(dri2_dpy->gralloc, buf->handle);
+#ifdef HAVE_GRALLOC1
+   if(dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) {
+     if (!dri2_dpy->pfn_lockflex) {
+        _eglLog(_EGL_WARNING, "Gralloc does not support lockflex");
+        return NULL;
+     }
+
+     ret = dri2_dpy->pfn_lockflex(dri2_dpy->gralloc1_dvc, buf->handle,
+                                       0, 0, &accessRegion, &outFlexLayout, -1);
+     if (ret) {
+        _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret);
+        return NULL;
+     }
+     ret = get_ycbcr_from_flexlayout(&outFlexLayout, &ycbcr);
+     if (ret) {
+        _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret);
+        return NULL;
+     }
+     int outReleaseFence = 0;
+     dri2_dpy->pfn_unlock(dri2_dpy->gralloc1_dvc, buf->handle, &outReleaseFence);
+   } else {
+#endif
+     const gralloc_module_t *gralloc0;
+     gralloc0 = dri2_dpy->gralloc;
+
+     if (!gralloc0->lock_ycbcr) {
+        _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr");
+        return NULL;
+     }
+
+     ret = gralloc0->lock_ycbcr(gralloc0, buf->handle,
+                                        0, 0, 0, 0, 0, &ycbcr);
+
+     if (ret) {
+        /* HACK: See droid_create_image_from_prime_fd() and
+         * https://issuetracker.google.com/32077885.*/
+        if (buf->format == HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED)
+           return NULL;
+
+        _eglLog(_EGL_WARNING, "gralloc->lock_ycbcr failed: %d", ret);
+        return NULL;
+     }
+
+     gralloc0->unlock(dri2_dpy->gralloc, buf->handle);
+#ifdef HAVE_GRALLOC1
+  }
+#endif
 
    /* When lock_ycbcr's usage argument contains no SW_READ/WRITE flags
     * it will return the .y/.cb/.cr pointers based on a NULL pointer,
@@ -757,14 +888,15 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx,
 
    /* .chroma_step is the byte distance between the same chroma channel
     * values of subsequent pixels, assumed to be the same for Cb and Cr. */
-   fourcc = get_fourcc_yuv(buf->format, is_ycrcb, ycbcr.chroma_step);
+   fourcc = get_fourcc_yuv(format, is_ycrcb, ycbcr.chroma_step);
    if (fourcc == -1) {
       _eglLog(_EGL_WARNING, "unsupported YUV format, native = %x, is_ycrcb = %d, chroma_step = %d",
-              buf->format, is_ycrcb, ycbcr.chroma_step);
+              format, is_ycrcb, ycbcr.chroma_step);
       return NULL;
    }
 
-   if (ycbcr.chroma_step == 2) {
+   /* FIXME? we should not rely on chroma_step */
+   if (ycbcr.chroma_step == 2 || ycbcr.chroma_step == 4) {
       /* Semi-planar Y + CbCr or Y + CrCb format. */
       const EGLint attr_list_2plane[] = {
          EGL_WIDTH, buf->width,
@@ -806,9 +938,16 @@ static _EGLImage *
 droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx,
                                  struct ANativeWindowBuffer *buf, int fd)
 {
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    unsigned int pitch;
 
-   if (is_yuv(buf->format)) {
+   int format = droid_resolve_format(dri2_dpy, buf);
+   if (format < 0) {
+      _eglLog(_EGL_WARNING, "Could not resolve buffer format");
+      return NULL;
+   }
+
+   if (is_yuv(format)) {
       _EGLImage *image;
 
       image = droid_create_image_from_prime_fd_yuv(disp, ctx, buf, fd);
@@ -823,13 +962,13 @@ droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx,
          return image;
    }
 
-   const int fourcc = get_fourcc(buf->format);
+   const int fourcc = get_fourcc(format);
    if (fourcc == -1) {
       _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR");
       return NULL;
    }
 
-   pitch = buf->stride * get_format_bpp(buf->format);
+   pitch = buf->stride * get_format_bpp(format);
    if (pitch == 0) {
       _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR");
       return NULL;
@@ -1134,6 +1273,25 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
    return (config_count != 0);
 }
 
+#ifdef HAVE_DRM_GRALLOC
+static int
+droid_open_device_drm_gralloc(struct dri2_egl_display *dri2_dpy)
+{
+   int fd = -1, err = -EINVAL;
+
+   if (dri2_dpy->gralloc->perform)
+         err = dri2_dpy->gralloc->perform(dri2_dpy->gralloc,
+                                          GRALLOC_MODULE_PERFORM_GET_DRM_FD,
+                                          &fd);
+   if (err || fd < 0) {
+      _eglLog(_EGL_WARNING, "fail to get drm fd");
+      fd = -1;
+   }
+
+   return (fd >= 0) ? fcntl(fd, F_DUPFD_CLOEXEC, 3) : -1;
+}
+#endif /* HAVE_DRM_GRALLOC */
+
 static const struct dri2_egl_display_vtbl droid_display_vtbl = {
    .authenticate = NULL,
    .create_window_surface = droid_create_window_surface,
@@ -1207,7 +1365,7 @@ droid_load_driver(_EGLDisplay *disp)
    dri2_dpy->is_render_node = drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER;
 
    if (!dri2_dpy->is_render_node) {
-   #ifdef HAVE_DRM_GRALLOC
+#ifdef HAVE_DRM_GRALLOC
        /* Handle control nodes using __DRI_DRI2_LOADER extension and GEM names
         * for backwards compatibility with drm_gralloc. (Do not use on new
         * systems.) */
@@ -1216,10 +1374,10 @@ droid_load_driver(_EGLDisplay *disp)
           err = "DRI2: failed to load driver";
           goto error;
        }
-   #else
+#else
        err = "DRI2: handle is not for a render node";
        goto error;
-   #endif
+#endif
    } else {
        dri2_dpy->loader_extensions = droid_image_loader_extensions;
        if (!dri2_load_driver_dri3(disp)) {
@@ -1362,6 +1520,7 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp)
 {
    struct dri2_egl_display *dri2_dpy;
    const char *err;
+   hw_device_t *device;
    int ret;
 
    /* Not supported yet */
@@ -1381,10 +1540,35 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp)
       err = "DRI2: failed to get gralloc module";
       goto cleanup;
    }
+   dri2_dpy->gralloc_version = dri2_dpy->gralloc->module_api_version;
+#ifdef HAVE_GRALLOC1
+   if (dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) {
+      ret = dri2_dpy->gralloc->methods->open(dri2_dpy->gralloc, GRALLOC_HARDWARE_MODULE_ID, &device);
+      if (ret) {
+        err = "Failed to open hw_device device";
+        goto cleanup;
+      } else {
+        dri2_dpy->gralloc1_dvc = (gralloc1_device_t *)device;
+
+        dri2_dpy->pfn_lockflex = (GRALLOC1_PFN_LOCK_FLEX)\
+             dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_LOCK_FLEX);
+
+        dri2_dpy->pfn_getFormat = (GRALLOC1_PFN_GET_FORMAT)\
+             dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_GET_FORMAT);
+
+        dri2_dpy->pfn_unlock = (GRALLOC1_PFN_UNLOCK)\
+             dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_UNLOCK);
+      }
+   }
+#endif
 
    disp->DriverData = (void *) dri2_dpy;
 
+#ifdef HAVE_DRM_GRALLOC
+   dri2_dpy->fd = droid_open_device_drm_gralloc(dri2_dpy);
+#else
    dri2_dpy->fd = droid_open_device(disp);
+#endif
    if (dri2_dpy->fd < 0) {
       err = "DRI2: failed to open device";
       goto cleanup;
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
index dca099500a8..15eeee5d686 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -182,9 +182,12 @@ wl_buffer_release(void *data, struct wl_buffer *buffer)
       if (dri2_surf->color_buffers[i].wl_buffer == buffer)
          break;
 
-   if (i == ARRAY_SIZE(dri2_surf->color_buffers)) {
+   assert (i < ARRAY_SIZE(dri2_surf->color_buffers));
+
+   if (dri2_surf->color_buffers[i].wl_release) {
       wl_buffer_destroy(buffer);
-      return;
+      dri2_surf->color_buffers[i].wl_release = false;
+      dri2_surf->color_buffers[i].wl_buffer = NULL;
    }
 
    dri2_surf->color_buffers[i].locked = false;
@@ -201,6 +204,17 @@ resize_callback(struct wl_egl_window *wl_win, void *data)
    struct dri2_egl_display *dri2_dpy =
       dri2_egl_display(dri2_surf->base.Resource.Display);
 
+   /* Update the surface size as soon as native window is resized; from user
+    * pov, this makes the effect that resize is done inmediately after native
+    * window resize, without requiring to wait until the first draw.
+    *
+    * A more detailed and lengthy explanation can be found at
+    * https://lists.freedesktop.org/archives/mesa-dev/2018-June/196474.html
+    */
+   if (!dri2_surf->back) {
+      dri2_surf->base.Width = wl_win->width;
+      dri2_surf->base.Height = wl_win->height;
+   }
    dri2_dpy->flush->invalidate(dri2_surf->dri_drawable);
 }
 
@@ -258,6 +272,9 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
       goto cleanup_surf;
    }
 
+   dri2_surf->base.Width = window->width;
+   dri2_surf->base.Height = window->height;
+
    visual_idx = dri2_wl_visual_idx_from_config(dri2_dpy, config);
    assert(visual_idx != -1);
 
@@ -411,9 +428,14 @@ dri2_wl_release_buffers(struct dri2_egl_surface *dri2_surf)
       dri2_egl_display(dri2_surf->base.Resource.Display);
 
    for (int i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
-      if (dri2_surf->color_buffers[i].wl_buffer &&
-          !dri2_surf->color_buffers[i].locked)
-         wl_buffer_destroy(dri2_surf->color_buffers[i].wl_buffer);
+      if (dri2_surf->color_buffers[i].wl_buffer) {
+         if (dri2_surf->color_buffers[i].locked) {
+            dri2_surf->color_buffers[i].wl_release = true;
+         } else {
+            wl_buffer_destroy(dri2_surf->color_buffers[i].wl_buffer);
+            dri2_surf->color_buffers[i].wl_buffer = NULL;
+         }
+      }
       if (dri2_surf->color_buffers[i].dri_image)
          dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].dri_image);
       if (dri2_surf->color_buffers[i].linear_copy)
@@ -422,11 +444,9 @@ dri2_wl_release_buffers(struct dri2_egl_surface *dri2_surf)
          munmap(dri2_surf->color_buffers[i].data,
                 dri2_surf->color_buffers[i].data_size);
 
-      dri2_surf->color_buffers[i].wl_buffer = NULL;
       dri2_surf->color_buffers[i].dri_image = NULL;
       dri2_surf->color_buffers[i].linear_copy = NULL;
       dri2_surf->color_buffers[i].data = NULL;
-      dri2_surf->color_buffers[i].locked = false;
    }
 
    if (dri2_dpy->dri2)
@@ -577,8 +597,8 @@ update_buffers(struct dri2_egl_surface *dri2_surf)
    struct dri2_egl_display *dri2_dpy =
       dri2_egl_display(dri2_surf->base.Resource.Display);
 
-   if (dri2_surf->base.Width != dri2_surf->wl_win->width ||
-       dri2_surf->base.Height != dri2_surf->wl_win->height) {
+   if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width ||
+       dri2_surf->base.Height != dri2_surf->wl_win->attached_height) {
 
       dri2_wl_release_buffers(dri2_surf);
 
@@ -955,6 +975,8 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,
       dri2_surf->current->wl_buffer =
          create_wl_buffer(dri2_dpy, dri2_surf, image);
 
+      dri2_surf->current->wl_release = false;
+
       wl_buffer_add_listener(dri2_surf->current->wl_buffer,
                              &wl_buffer_listener, dri2_surf);
    }
diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c b/src/egl/drivers/dri2/platform_x11_dri3.c
index c3c9c2dd45d..e1967422f0a 100644
--- a/src/egl/drivers/dri2/platform_x11_dri3.c
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -107,12 +107,17 @@ static const struct loader_dri3_vtable egl_dri3_vtable = {
 static EGLBoolean
 dri3_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
 {
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+   xcb_drawable_t drawable = dri3_surf->loader_drawable.drawable;
 
    (void) drv;
 
    loader_dri3_drawable_fini(&dri3_surf->loader_drawable);
 
+   if (surf->Type == EGL_PBUFFER_BIT)
+      xcb_free_pixmap (dri2_dpy->conn, drawable);
+
    dri2_fini_surface(surf);
    free(surf);
 
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index deb479b6d56..fadb2b16871 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -1208,6 +1208,9 @@ eglSwapInterval(EGLDisplay dpy, EGLint interval)
    if (_eglGetSurfaceHandle(surf) == EGL_NO_SURFACE)
       RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);
 
+   if (surf->Type != EGL_WINDOW_BIT)
+      RETURN_EGL_EVAL(disp, EGL_TRUE);
+
    interval = CLAMP(interval,
                     surf->Config->MinSwapInterval,
                     surf->Config->MaxSwapInterval);
@@ -1243,6 +1246,9 @@ eglSwapBuffers(EGLDisplay dpy, EGLSurface surface)
       RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);
    #endif
 
+   if (surf->Type != EGL_WINDOW_BIT)
+      RETURN_EGL_EVAL(disp, EGL_TRUE);
+
    /* From the EGL 1.5 spec:
     *
     *    If eglSwapBuffers is called and the native window associated with
@@ -1282,6 +1288,9 @@ _eglSwapBuffersWithDamageCommon(_EGLDisplay *disp, _EGLSurface *surf,
        surf != ctx->DrawSurface)
       RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);
 
+   if (surf->Type != EGL_WINDOW_BIT)
+      RETURN_EGL_EVAL(disp, EGL_TRUE);
+
    if ((n_rects > 0 && rects == NULL) || n_rects < 0)
       RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_FALSE);
 
diff --git a/src/egl/main/eglcurrent.c b/src/egl/main/eglcurrent.c
index 7af3011b757..545697e5662 100644
--- a/src/egl/main/eglcurrent.c
+++ b/src/egl/main/eglcurrent.c
@@ -137,13 +137,37 @@ _eglDestroyThreadInfo(_EGLThreadInfo *t)
 }
 
 
+/**
+ * Delete/free a _EGLThreadInfo object.
+ */
+static void
+_eglDestroyThreadInfoCallback(_EGLThreadInfo *t)
+{
+   /* If this callback is called on thread termination then try to also give a
+    * chance to cleanup to the client drivers. If called for module termination
+    * then just release the thread information as calling eglReleaseThread
+    * would result in a deadlock.
+    */
+   if (_egl_TSDInitialized) {
+      /* The callback handler has replaced the TLS entry, which is passed in as
+       * 't', with NULL. Restore it here so that the release thread finds it in
+       * the TLS entry.
+       */
+      _eglSetTSD(t);
+      eglReleaseThread();
+   } else {
+      _eglDestroyThreadInfo(t);
+   }
+}
+
+
 /**
  * Make sure TSD is initialized and return current value.
  */
 static inline _EGLThreadInfo *
 _eglCheckedGetTSD(void)
 {
-   if (_eglInitTSD(&_eglDestroyThreadInfo) != EGL_TRUE) {
+   if (_eglInitTSD(&_eglDestroyThreadInfoCallback) != EGL_TRUE) {
       _eglLog(_EGL_FATAL, "failed to initialize \"current\" system");
       return NULL;
    }
diff --git a/src/egl/main/egldispatchstubs.c b/src/egl/main/egldispatchstubs.c
index bfc3195c779..96708aeb0dc 100644
--- a/src/egl/main/egldispatchstubs.c
+++ b/src/egl/main/egldispatchstubs.c
@@ -59,6 +59,11 @@ static __eglMustCastToProperFunctionPointerType FetchVendorFunc(__EGLvendorInfo
     }
     if (func == NULL) {
         if (errorCode != EGL_SUCCESS) {
+            // Since we have no vendor, the follow-up eglGetError() call will
+            // end up using the GLVND error code. Set it here.
+            if (vendor == NULL) {
+                exports->setEGLError(errorCode);
+            }
             _eglError(errorCode, __EGL_DISPATCH_FUNC_NAMES[index]);
         }
         return NULL;
diff --git a/src/egl/meson.build b/src/egl/meson.build
index 89a84fd8908..1e0b1d33af5 100644
--- a/src/egl/meson.build
+++ b/src/egl/meson.build
@@ -99,10 +99,10 @@ endif
 
 if with_platform_x11
   files_egl += files('drivers/dri2/platform_x11.c')
+  incs_for_egl += inc_loader
   if with_dri3
     files_egl += files('drivers/dri2/platform_x11_dri3.c')
     link_for_egl += libloader_dri3_helper
-    incs_for_egl += inc_loader
   endif
   deps_for_egl += [dep_x11_xcb, dep_xcb_dri2, dep_xcb_xfixes]
 endif
@@ -114,6 +114,7 @@ if with_platform_drm
 endif
 if with_platform_surfaceless
   files_egl += files('drivers/dri2/platform_surfaceless.c')
+  incs_for_egl += [inc_loader]
 endif
 if with_platform_wayland
   deps_for_egl += [dep_wayland_client, dep_wayland_server, dep_wayland_egl_headers]
@@ -129,6 +130,7 @@ endif
 if with_platform_android
   deps_for_egl += dep_android
   files_egl += files('drivers/dri2/platform_android.c')
+  incs_for_egl += [inc_loader]
 endif
 if with_platform_haiku
   incs_for_egl += inc_haikugl
diff --git a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
index 90dbf658a6d..284e07386dc 100644
--- a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
+++ b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h
@@ -30,6 +30,7 @@ DRI_CONF_SECTION_DEBUG
    DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false")
    DRI_CONF_FORCE_GLSL_ABS_SQRT("false")
    DRI_CONF_GLSL_CORRECT_DERIVATIVES_AFTER_DISCARD("false")
+   DRI_CONF_FORCE_COMPAT_PROFILE("false")
 DRI_CONF_SECTION_END
 
 DRI_CONF_SECTION_MISCELLANEOUS
diff --git a/src/gallium/auxiliary/pipe-loader/meson.build b/src/gallium/auxiliary/pipe-loader/meson.build
index 32e8188c68b..c0b9a53cf65 100644
--- a/src/gallium/auxiliary/pipe-loader/meson.build
+++ b/src/gallium/auxiliary/pipe-loader/meson.build
@@ -31,6 +31,9 @@ libpipe_loader_defines = []
 if dep_libdrm.found()
   files_pipe_loader += files('pipe_loader_drm.c')
 endif
+if with_dri
+  libpipe_loader_defines += '-DHAVE_PIPE_LOADER_DRI'
+endif
 if with_gallium_drisw_kms
   libpipe_loader_defines += '-DHAVE_PIPE_LOADER_KMS'
 endif
@@ -42,10 +45,7 @@ libpipe_loader_static = static_library(
     inc_util, inc_loader, inc_gallium, inc_include, inc_src, inc_gallium_aux,
     inc_gallium_winsys,
   ],
-  c_args : [
-    c_vis_args, '-DHAVE_PIPE_LOADER_DRI', '-DGALLIUM_STATIC_TARGETS=1',
-    libpipe_loader_defines,
-  ],
+  c_args : [c_vis_args, libpipe_loader_defines, '-DGALLIUM_STATIC_TARGETS=1'],
   link_with : [libloader, libxmlconfig],
   dependencies : [dep_libdrm],
   build_by_default : false,
@@ -59,7 +59,7 @@ libpipe_loader_dynamic = static_library(
     inc_gallium_winsys,
   ],
   c_args : [
-    c_vis_args, libpipe_loader_defines, '-DHAVE_PIPE_LOADER_DRI',
+    c_vis_args, libpipe_loader_defines,
     '-DPIPE_SEARCH_DIR="@0@"'.format(
       join_paths(get_option('prefix'), get_option('libdir'), 'gallium-pipe')
     )
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h
index 53c5a7e8c4c..245a8af2ac5 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -408,14 +408,6 @@ threaded_transfer(struct pipe_transfer *transfer)
    return (struct threaded_transfer*)transfer;
 }
 
-static inline struct pipe_context *
-threaded_context_unwrap_unsync(struct pipe_context *pipe)
-{
-   if (!pipe || !pipe->priv)
-      return pipe;
-   return (struct pipe_context*)pipe->priv;
-}
-
 static inline void
 tc_unflushed_batch_token_reference(struct tc_unflushed_batch_token **dst,
                                    struct tc_unflushed_batch_token *src)
diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c
index 746ff1085ce..f721613cbc5 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -1131,6 +1131,31 @@ static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
    mgr->dirty_real_vb_mask = 0;
 }
 
+static void
+u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info,
+                               unsigned *indirect_data, unsigned stride,
+                               unsigned draw_count)
+{
+   assert(info->index_size);
+   info->indirect = NULL;
+
+   for (unsigned i = 0; i < draw_count; i++) {
+      unsigned offset = i * stride / 4;
+
+      info->count = indirect_data[offset + 0];
+      info->instance_count = indirect_data[offset + 1];
+
+      if (!info->count || !info->instance_count)
+         continue;
+
+      info->start = indirect_data[offset + 2];
+      info->index_bias = indirect_data[offset + 3];
+      info->start_instance = indirect_data[offset + 4];
+
+      u_vbuf_draw_vbo(mgr, info);
+   }
+}
+
 void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
 {
    struct pipe_context *pipe = mgr->pipe;
@@ -1160,33 +1185,164 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
 
    new_info = *info;
 
-   /* Fallback. We need to know all the parameters. */
+   /* Handle indirect (multi)draws. */
    if (new_info.indirect) {
-      struct pipe_transfer *transfer = NULL;
-      int *data;
-
-      if (new_info.index_size) {
-         data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
-                                      new_info.indirect->offset, 20,
-                                      PIPE_TRANSFER_READ, &transfer);
-         new_info.index_bias = data[3];
-         new_info.start_instance = data[4];
-      }
-      else {
-         data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
-                                      new_info.indirect->offset, 16,
-                                      PIPE_TRANSFER_READ, &transfer);
-         new_info.start_instance = data[3];
+      const struct pipe_draw_indirect_info *indirect = new_info.indirect;
+      unsigned draw_count = 0;
+
+      /* Get the number of draws. */
+      if (indirect->indirect_draw_count) {
+         pipe_buffer_read(pipe, indirect->indirect_draw_count,
+                          indirect->indirect_draw_count_offset,
+                          4, &draw_count);
+      } else {
+         draw_count = indirect->draw_count;
       }
 
-      new_info.count = data[0];
-      new_info.instance_count = data[1];
-      new_info.start = data[2];
-      pipe_buffer_unmap(pipe, transfer);
-      new_info.indirect = NULL;
-
-      if (!new_info.count)
+      if (!draw_count)
          return;
+
+      unsigned data_size = (draw_count - 1) * indirect->stride +
+                           (new_info.index_size ? 20 : 16);
+      unsigned *data = malloc(data_size);
+      if (!data)
+         return; /* report an error? */
+
+      /* Read the used buffer range only once, because the read can be
+       * uncached.
+       */
+      pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size,
+                       data);
+
+      if (info->index_size) {
+         /* Indexed multidraw. */
+         unsigned index_bias0 = data[3];
+         bool index_bias_same = true;
+
+         /* If we invoke the translate path, we have to split the multidraw. */
+         if (incompatible_vb_mask ||
+             mgr->ve->incompatible_elem_mask) {
+            u_vbuf_split_indexed_multidraw(mgr, &new_info, data,
+                                           indirect->stride, draw_count);
+            free(data);
+            return;
+         }
+
+         /* See if index_bias is the same for all draws. */
+         for (unsigned i = 1; i < draw_count; i++) {
+            if (data[i * indirect->stride / 4 + 3] != index_bias0) {
+               index_bias_same = false;
+               break;
+            }
+         }
+
+         /* Split the multidraw if index_bias is different. */
+         if (!index_bias_same) {
+            u_vbuf_split_indexed_multidraw(mgr, &new_info, data,
+                                           indirect->stride, draw_count);
+            free(data);
+            return;
+         }
+
+         /* If we don't need to use the translate path and index_bias is
+          * the same, we can process the multidraw with the time complexity
+          * equal to 1 draw call (except for the index range computation).
+          * We only need to compute the index range covering all draw calls
+          * of the multidraw.
+          *
+          * The driver will not look at these values because indirect != NULL.
+          * These values determine the user buffer bounds to upload.
+          */
+         new_info.index_bias = index_bias0;
+         new_info.min_index = ~0u;
+         new_info.max_index = 0;
+         new_info.start_instance = ~0u;
+         unsigned end_instance = 0;
+
+         struct pipe_transfer *transfer = NULL;
+         const uint8_t *indices;
+
+         if (info->has_user_indices) {
+            indices = (uint8_t*)info->index.user;
+         } else {
+            indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource,
+                                                PIPE_TRANSFER_READ, &transfer);
+         }
+
+         for (unsigned i = 0; i < draw_count; i++) {
+            unsigned offset = i * indirect->stride / 4;
+            unsigned start = data[offset + 2];
+            unsigned count = data[offset + 0];
+            unsigned start_instance = data[offset + 4];
+            unsigned instance_count = data[offset + 1];
+
+            if (!count || !instance_count)
+               continue;
+
+            /* Update the ranges of instances. */
+            new_info.start_instance = MIN2(new_info.start_instance,
+                                           start_instance);
+            end_instance = MAX2(end_instance, start_instance + instance_count);
+
+            /* Update the index range. */
+            unsigned min, max;
+            new_info.count = count; /* only used by get_minmax_index */
+            u_vbuf_get_minmax_index_mapped(&new_info,
+                                           indices +
+                                           new_info.index_size * start,
+                                           &min, &max);
+
+            new_info.min_index = MIN2(new_info.min_index, min);
+            new_info.max_index = MAX2(new_info.max_index, max);
+         }
+         free(data);
+
+         if (transfer)
+            pipe_buffer_unmap(pipe, transfer);
+
+         /* Set the final instance count. */
+         new_info.instance_count = end_instance - new_info.start_instance;
+
+         if (new_info.start_instance == ~0u || !new_info.instance_count)
+            return;
+      } else {
+         /* Non-indexed multidraw.
+          *
+          * Keep the draw call indirect and compute minimums & maximums,
+          * which will determine the user buffer bounds to upload, but
+          * the driver will not look at these values because indirect != NULL.
+          *
+          * This efficiently processes the multidraw with the time complexity
+          * equal to 1 draw call.
+          */
+         new_info.start = ~0u;
+         new_info.start_instance = ~0u;
+         unsigned end_vertex = 0;
+         unsigned end_instance = 0;
+
+         for (unsigned i = 0; i < draw_count; i++) {
+            unsigned offset = i * indirect->stride / 4;
+            unsigned start = data[offset + 2];
+            unsigned count = data[offset + 0];
+            unsigned start_instance = data[offset + 3];
+            unsigned instance_count = data[offset + 1];
+
+            new_info.start = MIN2(new_info.start, start);
+            new_info.start_instance = MIN2(new_info.start_instance,
+                                           start_instance);
+
+            end_vertex = MAX2(end_vertex, start + count);
+            end_instance = MAX2(end_instance, start_instance + instance_count);
+         }
+         free(data);
+
+         /* Set the final counts. */
+         new_info.count = end_vertex - new_info.start;
+         new_info.instance_count = end_instance - new_info.start_instance;
+
+         if (new_info.start == ~0u || !new_info.count || !new_info.instance_count)
+            return;
+      }
    }
 
    if (new_info.index_size) {
@@ -1211,7 +1367,8 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
           * We would have to break this drawing operation into several ones. */
          /* Use some heuristic to see if unrolling indices improves
           * performance. */
-         if (!new_info.primitive_restart &&
+         if (!info->indirect &&
+             !new_info.primitive_restart &&
              num_vertices > new_info.count*2 &&
              num_vertices - new_info.count > 32 &&
              !u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
index 8e3c4a0e04d..df2c9c0e50c 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
@@ -712,7 +712,6 @@ vl_dri3_screen_destroy(struct vl_screen *vscreen)
    if (scrn->front_buffer) {
       dri3_free_front_buffer(scrn, scrn->front_buffer);
       scrn->front_buffer = NULL;
-      return;
    }
 
    for (i = 0; i < BACK_BUFFER_NUM; ++i) {
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index 88f6fb557d0..7d5ca25e686 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -211,6 +211,15 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
 	struct fd_context *ctx = fd_context(pctx);
 	struct pipe_framebuffer_state *cso;
 
+	cso = &ctx->batch->framebuffer;
+
+	if (util_framebuffer_state_equal(cso, framebuffer))
+		return;
+
+	util_copy_framebuffer_state(cso, framebuffer);
+
+	cso->samples = util_framebuffer_get_num_samples(cso);
+
 	if (ctx->screen->reorder) {
 		struct fd_batch *batch, *old_batch = NULL;
 
@@ -239,14 +248,9 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
 		DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush,
 				framebuffer->cbufs[0], framebuffer->zsbuf);
 		fd_batch_flush(ctx->batch, false, false);
+		util_copy_framebuffer_state(&ctx->batch->framebuffer, cso);
 	}
 
-	cso = &ctx->batch->framebuffer;
-
-	util_copy_framebuffer_state(cso, framebuffer);
-
-	cso->samples = util_framebuffer_get_num_samples(cso);
-
 	ctx->dirty |= FD_DIRTY_FRAMEBUFFER;
 
 	ctx->disabled_scissor.minx = 0;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index c7436e2e297..49a5f3b01f2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq)
 
       if (mask & 0x1)
          bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0),
-                   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless));
+                   loadMsAdjInfo32(suq->tex.target, 0, slot, ind, suq->tex.bindless));
       if (mask & 0x2) {
          int d = util_bitcount(mask & 0x1);
          bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d),
-                   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless));
+                   loadMsAdjInfo32(suq->tex.target, 1, slot, ind, suq->tex.bindless));
       }
    }
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 1410cf26c87..3feb1fcf138 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1732,6 +1732,45 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
                         prog->driver->io.suInfoBase);
 }
 
+Value *
+NVC0LoweringPass::loadMsAdjInfo32(TexInstruction::Target target, uint32_t index, int slot, Value *ind, bool bindless)
+{
+   if (!bindless || targ->getChipset() < NVISA_GM107_CHIPSET)
+      return loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(index), bindless);
+
+   assert(bindless);
+
+   Value *samples = bld.getSSA();
+   // this shouldn't be lowered because it's being inserted before the current instruction
+   TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
+   tex->tex.target = target;
+   tex->tex.query = TXQ_TYPE;
+   tex->tex.mask = 0x4;
+   tex->tex.r = 0xff;
+   tex->tex.s = 0x1f;
+   tex->tex.rIndirectSrc = 0;
+   tex->setDef(0, samples);
+   tex->setSrc(0, ind);
+   tex->setSrc(1, bld.loadImm(NULL, 0));
+   bld.insert(tex);
+
+   // doesn't work with sample counts other than 1/2/4/8 but they aren't supported
+   switch (index) {
+   case 0: {
+      Value *tmp = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), samples, bld.mkImm(2));
+      return bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(2));
+   }
+   case 1: {
+      Value *tmp = bld.mkCmp(OP_SET, CC_GT, TYPE_U32, bld.getSSA(), TYPE_U32, samples, bld.mkImm(2))->getDef(0);
+      return bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(1));
+   }
+   default: {
+      assert(false);
+      return NULL;
+   }
+   }
+}
+
 static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c)
 {
    switch (su->tex.target.getEnum()) {
@@ -1817,8 +1856,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
    Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
    Value *ind = tex->getIndirectR();
 
-   Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless);
-   Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless);
+   Value *ms_x = loadMsAdjInfo32(tex->tex.target, 0, slot, ind, tex->tex.bindless);
+   Value *ms_y = loadMsAdjInfo32(tex->tex.target, 1, slot, ind, tex->tex.bindless);
 
    bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x);
    bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y);
@@ -2151,13 +2190,36 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
    }
 }
 
+void
+NVC0LoweringPass::insertOOBSurfaceOpResult(TexInstruction *su)
+{
+   if (!su->getPredicate())
+      return;
+
+   bld.setPosition(su, true);
+
+   for (unsigned i = 0; su->defExists(i); ++i) {
+      ValueDef &def = su->def(i);
+
+      Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
+      assert(su->cc == CC_NOT_P);
+      mov->setPredicate(CC_P, su->getPredicate());
+      Instruction *uni = bld.mkOp2(OP_UNION, TYPE_U32, bld.getSSA(), NULL, mov->getDef(0));
+
+      def.replace(uni->getDef(0), false);
+      uni->setSrc(0, def.get());
+   }
+}
+
 void
 NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
 {
    processSurfaceCoordsNVE4(su);
 
-   if (su->op == OP_SULDP)
+   if (su->op == OP_SULDP) {
       convertSurfaceFormat(su);
+      insertOOBSurfaceOpResult(su);
+   }
 
    if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
       assert(su->getPredicate());
@@ -2267,8 +2329,10 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
 
    processSurfaceCoordsNVC0(su);
 
-   if (su->op == OP_SULDP)
+   if (su->op == OP_SULDP) {
       convertSurfaceFormat(su);
+      insertOOBSurfaceOpResult(su);
+   }
 
    if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
       const int dim = su->tex.target.getDim();
@@ -2370,8 +2434,10 @@ NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su)
 {
    processSurfaceCoordsGM107(su);
 
-   if (su->op == OP_SULDP)
+   if (su->op == OP_SULDP) {
       convertSurfaceFormat(su);
+      insertOOBSurfaceOpResult(su);
+   }
 
    if (su->op == OP_SUREDP) {
       Value *def = su->getDef(0);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 8724c09afd9..4136b1ecfeb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -148,7 +148,7 @@ class NVC0LoweringPass : public Pass
    void handlePIXLD(Instruction *);
 
    void checkPredicate(Instruction *);
-   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
+   Value *loadMsAdjInfo32(TexInstruction::Target targ, uint32_t index, int slot, Value *ind, bool bindless);
 
    virtual bool visit(Instruction *);
 
@@ -161,6 +161,7 @@ class NVC0LoweringPass : public Pass
    Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base);
    Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base);
    Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base);
+   Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless);
    Value *loadBufInfo64(Value *ptr, uint32_t off);
    Value *loadBufLength32(Value *ptr, uint32_t off);
    Value *loadUboInfo64(Value *ptr, uint32_t off);
@@ -172,6 +173,7 @@ class NVC0LoweringPass : public Pass
    void processSurfaceCoordsNVE4(TexInstruction *);
    void processSurfaceCoordsNVC0(TexInstruction *);
    void convertSurfaceFormat(TexInstruction *);
+   void insertOOBSurfaceOpResult(TexInstruction *);
    Value *calculateSampleOffset(Value *sampleID);
 
 protected:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 16022e6f237..7bb12cdf4ce 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -3855,7 +3855,7 @@ Program::optimizeSSA(int level)
    RUN_PASS(2, AlgebraicOpt, run);
    RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks
    RUN_PASS(1, ConstantFolding, foldAll);
-   RUN_PASS(1, Split64BitOpPreRA, run);
+   RUN_PASS(0, Split64BitOpPreRA, run);
    RUN_PASS(2, LateAlgebraicOpt, run);
    RUN_PASS(1, LoadPropagation, run);
    RUN_PASS(1, IndirectPropagation, run);
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index c97b707955c..d7898ed58fe 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -148,20 +148,21 @@ nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
 static void
 nouveau_disk_cache_create(struct nouveau_screen *screen)
 {
-   uint32_t mesa_timestamp;
-   char *timestamp_str;
-   int res;
-
-   if (disk_cache_get_function_timestamp(nouveau_disk_cache_create,
-                                         &mesa_timestamp)) {
-      res = asprintf(&timestamp_str, "%u", mesa_timestamp);
-      if (res != -1) {
-         screen->disk_shader_cache =
-            disk_cache_create(nouveau_screen_get_name(&screen->base),
-                              timestamp_str, 0);
-         free(timestamp_str);
-      }
-   }
+   struct mesa_sha1 ctx;
+   unsigned char sha1[20];
+   char cache_id[20 * 2 + 1];
+
+   _mesa_sha1_init(&ctx);
+   if (!disk_cache_get_function_identifier(nouveau_disk_cache_create,
+                                           &ctx))
+      return;
+
+   _mesa_sha1_final(&ctx, sha1);
+   disk_cache_format_hex_id(cache_id, sha1, 20 * 2);
+
+   screen->disk_shader_cache =
+      disk_cache_create(nouveau_screen_get_name(&screen->base),
+                        cache_id, 0);
 }
 
 int
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
index a55adfa59f4..0a693d7b173 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -318,8 +318,8 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
    F3(A, R16G16B16X16_FLOAT, RGBX16_FLOAT, R, G, B, xx, FLOAT, R16_G16_B16_A16, TB),
    F3(A, R16G16B16X16_UNORM, RGBA16_UNORM, R, G, B, xx, UNORM, R16_G16_B16_A16, T),
    F3(A, R16G16B16X16_SNORM, RGBA16_SNORM, R, G, B, xx, SNORM, R16_G16_B16_A16, T),
-   I3(A, R16G16B16X16_SINT, RGBA16_SINT, R, G, B, xx, SINT, R16_G16_B16_A16, T),
-   I3(A, R16G16B16X16_UINT, RGBA16_UINT, R, G, B, xx, UINT, R16_G16_B16_A16, T),
+   I3(A, R16G16B16X16_SINT, RGBA16_SINT, R, G, B, xx, SINT, R16_G16_B16_A16, TR),
+   I3(A, R16G16B16X16_UINT, RGBA16_UINT, R, G, B, xx, UINT, R16_G16_B16_A16, TR),
 
    F2(A, R16G16_FLOAT, RG16_FLOAT, R, G, xx, xx, FLOAT, R16_G16, IB),
    F2(A, R16G16_UNORM, RG16_UNORM, R, G, xx, xx, UNORM, R16_G16, IC),
@@ -337,8 +337,8 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
    C4(A, R8G8B8A8_SINT, RGBA8_SINT, R, G, B, A, SINT, A8B8G8R8, IR),
    C4(A, R8G8B8A8_UINT, RGBA8_UINT, R, G, B, A, UINT, A8B8G8R8, IR),
    F3(A, R8G8B8X8_SNORM, RGBA8_SNORM, R, G, B, xx, SNORM, A8B8G8R8, T),
-   I3(A, R8G8B8X8_SINT, RGBA8_SINT, R, G, B, xx, SINT, A8B8G8R8, T),
-   I3(A, R8G8B8X8_UINT, RGBA8_UINT, R, G, B, xx, UINT, A8B8G8R8, T),
+   I3(A, R8G8B8X8_SINT, RGBA8_SINT, R, G, B, xx, SINT, A8B8G8R8, TR),
+   I3(A, R8G8B8X8_UINT, RGBA8_UINT, R, G, B, xx, UINT, A8B8G8R8, TR),
 
    F2(A, R8G8_UNORM, RG8_UNORM, R, G, xx, xx, UNORM, G8R8, IB),
    F2(A, R8G8_SNORM, RG8_SNORM, R, G, xx, xx, SNORM, G8R8, IC),
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 1ef0f5030a5..de840eb531b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -1669,6 +1669,13 @@ nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    bool eng3d = FALSE;
 
+   if (info->src.box.width == 0 || info->src.box.height == 0 ||
+       info->dst.box.width == 0 || info->dst.box.height == 0) {
+      pipe_debug_message(&nv50->base.debug, ERROR,
+                         "Blit with zero-size src or dst box");
+      return;
+   }
+
    if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
       if (!(info->mask & PIPE_MASK_ZS))
          return;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index df5723dc37c..726160d7c05 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -429,9 +429,9 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
     * mov b32 $r6 $pm6
     * mov b32 $r7 $pm7
     * set $p0 0x1 eq u32 $r8 0x0
-    * mov b32 $r10 c7[0x620]
+    * mov b32 $r10 c7[0x6a0]
     * ext u32 $r8 $r12 0x414
-    * mov b32 $r11 c7[0x624]
+    * mov b32 $r11 c7[0x6a4]
     * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
     * ext u32 $r9 $r12 0x208
     * (not $p0) exit
@@ -449,7 +449,7 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
     * add b32 $r12 $c $r12 $r9
     * st b128 wt g[$r10d] $r0q
     * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
-    * mov b32 $r0 c7[0x628]
+    * mov b32 $r0 c7[0x6a8]
     * add b32 $r13 $r13 0x0 $c
     * $p1 st b128 wt g[$r12d+0x40] $r4q
     * st b32 wt g[$r12d+0x50] $r0
@@ -467,9 +467,9 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
    0x2c00000028019c04ULL,
    0x2c0000002c01dc04ULL,
    0x190e0000fc81dc03ULL,
-   0x28005c1880029de4ULL,
+   0x28005c1a80029de4ULL,
    0x7000c01050c21c03ULL,
-   0x28005c189002dde4ULL,
+   0x28005c1a9002dde4ULL,
    0x204282020042e047ULL,
    0x7000c00820c25c03ULL,
    0x80000000000021e7ULL,
@@ -487,7 +487,7 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
    0x4801000024c31c03ULL,
    0x9400000000a01fc5ULL,
    0x200002e04202c047ULL,
-   0x28005c18a0001de4ULL,
+   0x28005c1aa0001de4ULL,
    0x0800000000d35c42ULL,
    0x9400000100c107c5ULL,
    0x9400000140c01f85ULL,
@@ -510,9 +510,9 @@ static const uint64_t nvf0_read_hw_sm_counters_code[] =
    0x86400000051c001aULL,
    0x86400000059c001eULL,
    0xdb201c007f9c201eULL,
-   0x64c03ce0c41c002aULL,
+   0x64c03ce0d41c002aULL,
    0xc00000020a1c3021ULL,
-   0x64c03ce0c49c002eULL,
+   0x64c03ce0d49c002eULL,
    0x0810a0808010b810ULL,
    0xc0000001041c3025ULL,
    0x180000000020003cULL,
@@ -530,7 +530,7 @@ static const uint64_t nvf0_read_hw_sm_counters_code[] =
    0xe0840000049c3032ULL,
    0xfe800000001c2800ULL,
    0x080000b81080b010ULL,
-   0x64c03ce0c51c0002ULL,
+   0x64c03ce0d51c0002ULL,
    0xe08040007f9c3436ULL,
    0xfe80000020043010ULL,
    0xfc800000281c3000ULL,
@@ -554,10 +554,10 @@ static const uint64_t gm107_read_hw_sm_counters_code[] =
    0x001f8401fc2007a1ULL, /* sched (st 0x1 wr 0x5) (st 0x1 wt 0x1) (st 0x1)         */
    0xf0c8000000b70007ULL, /* mov $r7 $pm7                                           */
    0x5b6403800087ff07ULL, /* isetp eq u32 and $p0 0x1 0x0 $r8 0x1                   */
-   0x4c98079c1887000aULL, /* mov $r10 c7[0x620] 0xf                                 */
+   0x4c98079c1a87000aULL, /* mov $r10 c7[0x6a0] 0xf                                 */
    0x001fa400fc2017e1ULL, /* sched (st 0x1 wt 0x2) (st 0x1) (st 0x9)                */
    0x3800000091470c08ULL, /* bfe u32 $r8 $r12 0x914                                 */
-   0x4c98079c1897000bULL, /* mov $r11 c7[0x624] 0xf                                 */
+   0x4c98079c1a97000bULL, /* mov $r11 c7[0x6a4] 0xf                                 */
    0x3800000020870c09ULL, /* bfe u32 $r9 $r12 0x208                                 */
    0x001c1800fc2007edULL, /* sched (st 0xd) (st 0x1) (st 0x6 wr 0x0)                */
    0xe30000000008000fULL, /* not $p0 exit                                           */
@@ -578,7 +578,7 @@ static const uint64_t gm107_read_hw_sm_counters_code[] =
    0x003f983c1c4007e1ULL, /* sched (st 0x1) (st 0x2 rd 0x0 wt 0x3c) (st 0x6 wt 0x1) */
    0x5c1008000ff70d0dULL, /* iadd x $r13 $r13 0x0                                   */
    0xbfd0000000070a00ULL, /* st e wt b128 g[$r10] $r0 0x1                           */
-   0x4c98079c18a70000ULL, /* mov $r0 c7[0x628] 0xf                                  */
+   0x4c98079c1aa70000ULL, /* mov $r0 c7[0x6a8] 0xf                                  */
    0x001fbc00fc2007e6ULL, /* sched (st 0x1) (st 0x1) (st 0xf)                       */
    0xbfd0000004010c04ULL, /* $p1 st e wt b128 g[$r12+0x40] $r4 0x1                  */
    0xbf90000005070c00ULL, /* st e wt b32 g[$r12+0x50] $r0 0x1                       */
@@ -1760,14 +1760,14 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
     * mov b32 $r6 $pm6
     * mov b32 $r7 $pm7
     * set $p0 0x1 eq u32 $r8 0x0
-    * mov b32 $r10 c15[0x620]
-    * mov b32 $r11 c15[0x624]
+    * mov b32 $r10 c15[0x6a0]
+    * mov b32 $r11 c15[0x6a4]
     * ext u32 $r8 $r9 0x414
     * (not $p0) exit
     * mul $r8 u32 $r8 u32 48
     * add b32 $r10 $c $r10 $r8
     * add b32 $r11 $r11 0x0 $c
-    * mov b32 $r8 c15[0x628]
+    * mov b32 $r8 c15[0x6a8]
     * st b128 wt g[$r10d+0x00] $r0q
     * st b128 wt g[$r10d+0x10] $r4q
     * st b32 wt g[$r10d+0x20] $r8
@@ -1783,14 +1783,14 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
    0x2c00000028019c04ULL,
    0x2c0000002c01dc04ULL,
    0x190e0000fc81dc03ULL,
-   0x28007c1880029de4ULL,
-   0x28007c189002dde4ULL,
+   0x28007c1a80029de4ULL,
+   0x28007c1a9002dde4ULL,
    0x7000c01050921c03ULL,
    0x80000000000021e7ULL,
    0x10000000c0821c02ULL,
    0x4801000020a29c03ULL,
    0x0800000000b2dc42ULL,
-   0x28007c18a0021de4ULL,
+   0x28007c1aa0021de4ULL,
    0x9400000000a01fc5ULL,
    0x9400000040a11fc5ULL,
    0x9400000080a21f85ULL,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 39b1369758a..03881c62785 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -1573,6 +1573,13 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    bool eng3d = false;
 
+   if (info->src.box.width == 0 || info->src.box.height == 0 ||
+       info->dst.box.width == 0 || info->dst.box.height == 0) {
+      pipe_debug_message(&nvc0->base.debug, ERROR,
+                         "Blit with zero-size src or dst box");
+      return;
+   }
+
    if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
       if (!(info->mask & PIPE_MASK_ZS))
          return;
@@ -1610,6 +1617,10 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
             else
             if (util_format_is_alpha(info->src.format))
                eng3d = info->src.format != PIPE_FORMAT_A8_UNORM;
+            else
+            if (util_format_is_srgb(info->dst.format) &&
+                util_format_get_nr_components(info->src.format) == 1)
+               eng3d = true;
             else
                eng3d = !nv50_2d_format_supported(info->src.format);
          }
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 90eae1e2829..a77f58242e3 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -715,7 +715,6 @@ static void compute_emit_cs(struct r600_context *rctx,
 		rctx->cmd_buf_is_compute = true;
 	}
 
-	r600_need_cs_space(rctx, 0, true);
 	if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) {
 		r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty);
 		current = rctx->cs_shader_state.shader->sel->current;
@@ -742,16 +741,22 @@ static void compute_emit_cs(struct r600_context *rctx,
 		}
 		rctx->cs_block_grid_sizes[3] = rctx->cs_block_grid_sizes[7] = 0;
 		rctx->driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true;
+
+		evergreen_emit_atomic_buffer_setup_count(rctx, current, combined_atomics, &atomic_used_mask);
+		r600_need_cs_space(rctx, 0, true, util_bitcount(atomic_used_mask));
+
 		if (need_buf_const) {
 			eg_setup_buffer_constants(rctx, PIPE_SHADER_COMPUTE);
 		}
 		r600_update_driver_const_buffers(rctx, true);
 
-		if (evergreen_emit_atomic_buffer_setup(rctx, current, combined_atomics, &atomic_used_mask)) {
+		evergreen_emit_atomic_buffer_setup(rctx, true, combined_atomics, atomic_used_mask);
+		if (atomic_used_mask) {
 			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 			radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
 		}
-	}
+	} else
+		r600_need_cs_space(rctx, 0, true, 0);
 
 	/* Initialize all the compute-related registers.
 	 *
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index d3f3e227c1f..5e0e27b0f16 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -109,7 +109,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
 
 		r600_need_cs_space(rctx,
 				   10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +
-				   R600_MAX_PFP_SYNC_ME_DWORDS, FALSE);
+				   R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0);
 
 		/* Flush the caches for the first copy only. */
 		if (rctx->b.flags) {
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 57e81e30c27..cc41e114369 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -4030,7 +4030,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
 
 		if (!buffers || !buffers[idx].buffer) {
 			pipe_resource_reference(&abuf->buffer, NULL);
-			astate->enabled_mask &= ~(1 << i);
 			continue;
 		}
 		buf = &buffers[idx];
@@ -4038,7 +4037,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
 		pipe_resource_reference(&abuf->buffer, buf->buffer);
 		abuf->buffer_offset = buf->buffer_offset;
 		abuf->buffer_size = buf->buffer_size;
-		astate->enabled_mask |= (1 << i);
 	}
 }
 
@@ -4868,20 +4866,15 @@ static void cayman_write_count_to_gds(struct r600_context *rctx,
 	radeon_emit(cs, reloc);
 }
 
-bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
-					struct r600_pipe_shader *cs_shader,
-					struct r600_shader_atomic *combined_atomics,
-					uint8_t *atomic_used_mask_p)
+void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
+					      struct r600_pipe_shader *cs_shader,
+					      struct r600_shader_atomic *combined_atomics,
+					      uint8_t *atomic_used_mask_p)
 {
-	struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
-	unsigned pkt_flags = 0;
 	uint8_t atomic_used_mask = 0;
 	int i, j, k;
 	bool is_compute = cs_shader ? true : false;
 
-	if (is_compute)
-		pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
-
 	for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) {
 		uint8_t num_atomic_stage;
 		struct r600_pipe_shader *pshader;
@@ -4914,8 +4907,25 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
 			}
 		}
 	}
+	*atomic_used_mask_p = atomic_used_mask;
+}
+
+void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
+					bool is_compute,
+					struct r600_shader_atomic *combined_atomics,
+					uint8_t atomic_used_mask)
+{
+	struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
+	unsigned pkt_flags = 0;
+	uint32_t mask;
+
+	if (is_compute)
+		pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
+
+	mask = atomic_used_mask;
+	if (!mask)
+		return;
 
-	uint32_t mask = atomic_used_mask;
 	while (mask) {
 		unsigned atomic_index = u_bit_scan(&mask);
 		struct r600_shader_atomic *atomic = &combined_atomics[atomic_index];
@@ -4927,8 +4937,6 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
 		else
 			evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags);
 	}
-	*atomic_used_mask_p = atomic_used_mask;
-	return true;
 }
 
 void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
@@ -4940,7 +4948,7 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
 	struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
 	uint32_t pkt_flags = 0;
 	uint32_t event = EVENT_TYPE_PS_DONE;
-	uint32_t mask = astate->enabled_mask;
+	uint32_t mask;
 	uint64_t dst_offset;
 	unsigned reloc;
 
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 1cfc180ad6c..a2f5f637b20 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -31,7 +31,7 @@
 
 
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
-			boolean count_draw_in)
+			boolean count_draw_in, unsigned num_atomics)
 {
 	/* Flush the DMA IB if it's not empty. */
 	if (radeon_emitted(ctx->b.dma.cs, 0))
@@ -61,6 +61,9 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 		num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
 	}
 
+	/* add atomic counters, 8 pre + 8 post per counter + 16 post if any counters */
+	num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0);
+
 	/* Count in r600_suspend_queries. */
 	num_dw += ctx->b.num_cs_dw_queries_suspend;
 
@@ -526,7 +529,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 
 		r600_need_cs_space(rctx,
 				   10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +
-				   3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE);
+				   3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0);
 
 		/* Flush the caches for the first copy only. */
 		if (rctx->b.flags) {
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 6204e3c557b..239005cab7f 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -446,8 +446,6 @@ struct r600_shader_state {
 };
 
 struct r600_atomic_buffer_state {
-	uint32_t enabled_mask;
-	uint32_t dirty_mask;
 	struct pipe_shader_buffer buffer[EG_MAX_ATOMIC_BUFFERS];
 };
 
@@ -773,7 +771,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
 			    struct pipe_fence_handle **fence);
 void r600_begin_new_cs(struct r600_context *ctx);
 void r600_flush_emit(struct r600_context *ctx);
-void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
+void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in, unsigned num_atomics);
 void r600_emit_pfp_sync_me(struct r600_context *rctx);
 void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 			     struct pipe_resource *dst, uint64_t dst_offset,
@@ -1067,10 +1065,14 @@ void r600_delete_shader_selector(struct pipe_context *ctx,
 				 struct r600_pipe_shader_selector *sel);
 
 struct r600_shader_atomic;
-bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
-					struct r600_pipe_shader *cs_shader,
+void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
+					      struct r600_pipe_shader *cs_shader,
+					      struct r600_shader_atomic *combined_atomics,
+					      uint8_t *atomic_used_mask_p);
+void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
+					bool is_compute,
 					struct r600_shader_atomic *combined_atomics,
-					uint8_t *atomic_used_mask_p);
+					uint8_t atomic_used_mask);
 void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
 				       bool is_compute,
 				       struct r600_shader_atomic *combined_atomics,
diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c
index f7cfd0d46a6..e7c645611d7 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@@ -854,27 +854,28 @@ static void r600_disk_cache_create(struct r600_common_screen *rscreen)
 	if (rscreen->debug_flags & DBG_ALL_SHADERS)
 		return;
 
-	uint32_t mesa_timestamp;
-	if (disk_cache_get_function_timestamp(r600_disk_cache_create,
-					      &mesa_timestamp)) {
-		char *timestamp_str;
-		int res = -1;
-
-		res = asprintf(&timestamp_str, "%u",mesa_timestamp);
-		if (res != -1) {
-			/* These flags affect shader compilation. */
-			uint64_t shader_debug_flags =
-				rscreen->debug_flags &
-				(DBG_FS_CORRECT_DERIVS_AFTER_KILL |
-				 DBG_UNSAFE_MATH);
-
-			rscreen->disk_shader_cache =
-				disk_cache_create(r600_get_family_name(rscreen),
-						  timestamp_str,
-						  shader_debug_flags);
-			free(timestamp_str);
-		}
-	}
+	struct mesa_sha1 ctx;
+	unsigned char sha1[20];
+	char cache_id[20 * 2 + 1];
+
+	_mesa_sha1_init(&ctx);
+	if (!disk_cache_get_function_identifier(r600_disk_cache_create,
+						&ctx))
+		return;
+
+	_mesa_sha1_final(&ctx, sha1);
+	disk_cache_format_hex_id(cache_id, sha1, 20 * 2);
+
+	/* These flags affect shader compilation. */
+	uint64_t shader_debug_flags =
+		rscreen->debug_flags &
+		(DBG_FS_CORRECT_DERIVS_AFTER_KILL |
+		 DBG_UNSAFE_MATH);
+
+	rscreen->disk_shader_cache =
+		disk_cache_create(r600_get_family_name(rscreen),
+				  cache_id,
+				  shader_debug_flags);
 }
 
 static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen)
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 402d95838f0..e6c1b0be97c 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -2085,8 +2085,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		: (rctx->tes_shader)? rctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]
 		: info->mode;
 
-	if (rctx->b.chip_class >= EVERGREEN)
-		evergreen_emit_atomic_buffer_setup(rctx, NULL, combined_atomics, &atomic_used_mask);
+	if (rctx->b.chip_class >= EVERGREEN) {
+		evergreen_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, &atomic_used_mask);
+	}
 
 	if (index_size) {
 		index_offset += info->start * index_size;
@@ -2172,7 +2173,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		evergreen_setup_tess_constants(rctx, info, &num_patches);
 
 	/* Emit states. */
-	r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE);
+	r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE, util_bitcount(atomic_used_mask));
 	r600_flush_emit(rctx);
 
 	mask = rctx->dirty_atoms;
@@ -2180,6 +2181,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]);
 	}
 
+	if (rctx->b.chip_class >= EVERGREEN) {
+		evergreen_emit_atomic_buffer_setup(rctx, false, combined_atomics, atomic_used_mask);
+	}
+		
 	if (rctx->b.chip_class == CAYMAN) {
 		/* Copied from radeonsi. */
 		unsigned primgroup_size = 128; /* recommended without a GS */
@@ -3284,7 +3289,7 @@ static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable
 static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
                                    bool include_draw_vbo)
 {
-	r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo);
+	r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo, 0);
 }
 
 /* keep this at the end of this file, please */
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 08db6bab04c..d08c6e5637c 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -774,8 +774,8 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
 		return;
 	}
 
-	width = align(rtex->resource.b.b.width0, cl_width * 8);
-	height = align(rtex->resource.b.b.height0, cl_height * 8);
+	width = align(rtex->surface.u.legacy.level[0].nblk_x, cl_width * 8);
+	height = align(rtex->surface.u.legacy.level[0].nblk_y, cl_height * 8);
 
 	slice_elements = (width * height) / (8 * 8);
 	slice_bytes = slice_elements * 4;
diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
index 5681fdc4425..b7d87eac9f4 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
@@ -567,7 +567,7 @@ int bc_builder::build_fetch_gds(fetch_node *n) {
 	const fetch_op_info *fop = bc.op_ptr;
 	unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f;
 	unsigned mem_op = 4;
-	assert(fop->flags && FF_GDS);
+	assert(fop->flags & FF_GDS);
 
 	if (bc.op == FETCH_OP_TF_WRITE) {
 		mem_op = 5;
diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
index a7b828268b9..4b909f85f19 100644
--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -617,7 +617,7 @@ int bc_parser::decode_fetch_clause(cf_node* cf) {
 	int r;
 	unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
 
-	if (cf->bc.op_ptr->flags && FF_GDS)
+	if (cf->bc.op_ptr->flags & FF_GDS)
 		cf->subtype = NST_GDS_CLAUSE;
 	else
 		cf->subtype = NST_TEX_CLAUSE;
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index 923216d77f1..a7ef4252ee0 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -1003,25 +1003,35 @@ static void get_mjpeg_slice_header(struct ruvd_decoder *dec, struct pipe_mjpeg_p
 	size++;
 
 	for (i = 0; i < 2; ++i) {
+		int num = 0, j;
+
 		if (pic->huffman_table.load_huffman_table[i] == 0)
 			continue;
 
 		buf[size++] = 0x00 | i;
 		memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16);
 		size += 16;
-		memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12);
-		size += 12;
+		for (j = 0; j < 16; ++j)
+			num += pic->huffman_table.table[i].num_dc_codes[j];
+		assert(num <= 12);
+		memcpy((buf + size), &pic->huffman_table.table[i].dc_values, num);
+		size += num;
 	}
 
 	for (i = 0; i < 2; ++i) {
+		int num = 0, j;
+
 		if (pic->huffman_table.load_huffman_table[i] == 0)
 			continue;
 
 		buf[size++] = 0x10 | i;
 		memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16);
 		size += 16;
-		memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162);
-		size += 162;
+		for (j = 0; j < 16; ++j)
+			num += pic->huffman_table.table[i].num_ac_codes[j];
+		assert(num <= 162);
+		memcpy((buf + size), &pic->huffman_table.table[i].ac_values, num);
+		size += num;
 	}
 
 	bs = (uint16_t*)&buf[len_pos];
diff --git a/src/gallium/drivers/radeonsi/si_compute.h b/src/gallium/drivers/radeonsi/si_compute.h
index 3a4cdea25ef..d0a89820ddf 100644
--- a/src/gallium/drivers/radeonsi/si_compute.h
+++ b/src/gallium/drivers/radeonsi/si_compute.h
@@ -29,7 +29,7 @@
 
 #include "si_shader.h"
 
-#define MAX_GLOBAL_BUFFERS 22
+#define MAX_GLOBAL_BUFFERS 32
 
 struct si_compute {
 	struct pipe_reference reference;
diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c
index 186a785437d..abb7057f299 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -291,8 +291,12 @@ static boolean si_fence_finish(struct pipe_screen *screen,
 {
 	struct radeon_winsys *rws = ((struct si_screen*)screen)->ws;
 	struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
+	struct si_context *sctx;
 	int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
 
+	ctx = threaded_context_unwrap_sync(ctx);
+	sctx = (struct si_context*)(ctx ? ctx : NULL);
+
 	if (!util_queue_fence_is_signalled(&rfence->ready)) {
 		if (rfence->tc_token) {
 			/* Ensure that si_flush_from_st will be called for
@@ -345,49 +349,43 @@ static boolean si_fence_finish(struct pipe_screen *screen,
 	}
 
 	/* Flush the gfx IB if it hasn't been flushed yet. */
-	if (ctx && rfence->gfx_unflushed.ctx) {
-		struct si_context *sctx;
-
-		sctx = (struct si_context *)threaded_context_unwrap_unsync(ctx);
-		if (rfence->gfx_unflushed.ctx == sctx &&
-		    rfence->gfx_unflushed.ib_index == sctx->num_gfx_cs_flushes) {
-			/* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile)
-			 * spec says:
-			 *
-			 *    "If the sync object being blocked upon will not be
-			 *     signaled in finite time (for example, by an associated
-			 *     fence command issued previously, but not yet flushed to
-			 *     the graphics pipeline), then ClientWaitSync may hang
-			 *     forever. To help prevent this behavior, if
-			 *     ClientWaitSync is called and all of the following are
-			 *     true:
-			 *
-			 *     * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags,
-			 *     * sync is unsignaled when ClientWaitSync is called,
-			 *     * and the calls to ClientWaitSync and FenceSync were
-			 *       issued from the same context,
-			 *
-			 *     then the GL will behave as if the equivalent of Flush
-			 *     were inserted immediately after the creation of sync."
-			 *
-			 * This means we need to flush for such fences even when we're
-			 * not going to wait.
-			 */
-			threaded_context_unwrap_sync(ctx);
-			si_flush_gfx_cs(sctx,
-					(timeout ? 0 : PIPE_FLUSH_ASYNC) |
-					 RADEON_FLUSH_START_NEXT_GFX_IB_NOW,
-					NULL);
-			rfence->gfx_unflushed.ctx = NULL;
-
-			if (!timeout)
-				return false;
+	if (sctx && rfence->gfx_unflushed.ctx == sctx &&
+	    rfence->gfx_unflushed.ib_index == sctx->num_gfx_cs_flushes) {
+		/* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile)
+		 * spec says:
+		 *
+		 *    "If the sync object being blocked upon will not be
+		 *     signaled in finite time (for example, by an associated
+		 *     fence command issued previously, but not yet flushed to
+		 *     the graphics pipeline), then ClientWaitSync may hang
+		 *     forever. To help prevent this behavior, if
+		 *     ClientWaitSync is called and all of the following are
+		 *     true:
+		 *
+		 *     * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags,
+		 *     * sync is unsignaled when ClientWaitSync is called,
+		 *     * and the calls to ClientWaitSync and FenceSync were
+		 *       issued from the same context,
+		 *
+		 *     then the GL will behave as if the equivalent of Flush
+		 *     were inserted immediately after the creation of sync."
+		 *
+		 * This means we need to flush for such fences even when we're
+		 * not going to wait.
+		 */
+		si_flush_gfx_cs(sctx,
+				(timeout ? 0 : PIPE_FLUSH_ASYNC) |
+				 RADEON_FLUSH_START_NEXT_GFX_IB_NOW,
+				NULL);
+		rfence->gfx_unflushed.ctx = NULL;
 
-			/* Recompute the timeout after all that. */
-			if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
-				int64_t time = os_time_get_nano();
-				timeout = abs_timeout > time ? abs_timeout - time : 0;
-			}
+		if (!timeout)
+			return false;
+
+		/* Recompute the timeout after all that. */
+		if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
+			int64_t time = os_time_get_nano();
+			timeout = abs_timeout > time ? abs_timeout - time : 0;
 		}
 	}
 
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 9dbd4c64f2a..c0688d448b9 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -133,12 +133,13 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 
 	if (ctx->current_saved_cs) {
 		si_trace_emit(ctx);
-		si_log_hw_flush(ctx);
 
 		/* Save the IB for debug contexts. */
 		si_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true);
 		ctx->current_saved_cs->flushed = true;
 		ctx->current_saved_cs->time_flush = os_time_get_nano();
+
+		si_log_hw_flush(ctx);
 	}
 
 	/* Flush the CS. */
@@ -146,8 +147,6 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 	if (fence)
 		ws->fence_reference(fence, ctx->last_gfx_fence);
 
-	/* This must be after cs_flush returns, since the context's API
-	 * thread can concurrently read this value in si_fence_finish. */
 	ctx->num_gfx_cs_flushes++;
 
 	/* Check VM faults if needed. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index cc05d2f8de3..6b36893698c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -751,42 +751,39 @@ static void si_disk_cache_create(struct si_screen *sscreen)
 	if (sscreen->debug_flags & DBG_ALL_SHADERS)
 		return;
 
-	uint32_t mesa_timestamp;
-	if (disk_cache_get_function_timestamp(si_disk_cache_create,
-					      &mesa_timestamp)) {
-		char *timestamp_str;
-		int res = -1;
-		uint32_t llvm_timestamp;
-
-		if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
-						      &llvm_timestamp)) {
-			res = asprintf(&timestamp_str, "%u_%u",
-				       mesa_timestamp, llvm_timestamp);
-		}
+	struct mesa_sha1 ctx;
+	unsigned char sha1[20];
+	char cache_id[20 * 2 + 1];
 
-		if (res != -1) {
-			/* These flags affect shader compilation. */
-			#define ALL_FLAGS (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | \
-					   DBG(SI_SCHED) | \
-					   DBG(GISEL) | \
-					   DBG(UNSAFE_MATH) | \
-					   DBG(NIR))
-			uint64_t shader_debug_flags = sscreen->debug_flags &
-						      ALL_FLAGS;
-
-			/* Add the high bits of 32-bit addresses, which affects
-			 * how 32-bit addresses are expanded to 64 bits.
-			 */
-			STATIC_ASSERT(ALL_FLAGS <= UINT_MAX);
-			shader_debug_flags |= (uint64_t)sscreen->info.address32_hi << 32;
-
-			sscreen->disk_shader_cache =
-				disk_cache_create(si_get_family_name(sscreen),
-						  timestamp_str,
-						  shader_debug_flags);
-			free(timestamp_str);
-		}
-	}
+	_mesa_sha1_init(&ctx);
+
+	if (!disk_cache_get_function_identifier(si_disk_cache_create, &ctx) ||
+	    !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo,
+						&ctx))
+		return;
+
+	_mesa_sha1_final(&ctx, sha1);
+	disk_cache_format_hex_id(cache_id, sha1, 20 * 2);
+
+	/* These flags affect shader compilation. */
+	#define ALL_FLAGS (DBG(FS_CORRECT_DERIVS_AFTER_KILL) |	\
+			   DBG(SI_SCHED) |			\
+			   DBG(GISEL) |				\
+			   DBG(UNSAFE_MATH) |			\
+			   DBG(NIR))
+	uint64_t shader_debug_flags = sscreen->debug_flags &
+		ALL_FLAGS;
+
+	/* Add the high bits of 32-bit addresses, which affects
+	 * how 32-bit addresses are expanded to 64 bits.
+	 */
+	STATIC_ASSERT(ALL_FLAGS <= UINT_MAX);
+	shader_debug_flags |= (uint64_t)sscreen->info.address32_hi << 32;
+
+	sscreen->disk_shader_cache =
+		disk_cache_create(si_get_family_name(sscreen),
+				  cache_id,
+				  shader_debug_flags);
 }
 
 struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 405833d3ba7..0b25592093e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4388,9 +4388,12 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
 	gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, ctx->i32_1, "");
 	LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
 
-	/* Signal vertex emission */
-	ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
-			 si_get_gs_wave_id(ctx));
+	/* Signal vertex emission if vertex data was written. */
+	if (offset) {
+		ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
+				 si_get_gs_wave_id(ctx));
+	}
+
 	if (!use_kill)
 		lp_build_endif(&if_state);
 }
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index c51d057967c..0d292864425 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -36,7 +36,8 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
 
 	for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
 		LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
-		conds[i] = LLVMBuildFCmp(builder, LLVMRealOGE, value,
+		/* UGE because NaN shouldn't get killed */
+		conds[i] = LLVMBuildFCmp(builder, LLVMRealUGE, value,
 					ctx->ac.f32_0, "");
 	}
 
@@ -505,18 +506,37 @@ static void emit_bfe(const struct lp_build_tgsi_action *action,
 		     struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
-	LLVMValueRef bfe_sm5;
-	LLVMValueRef cond;
 
-	bfe_sm5 = ac_build_bfe(&ctx->ac, emit_data->args[0],
-			       emit_data->args[1], emit_data->args[2],
-			       emit_data->info->opcode == TGSI_OPCODE_IBFE);
+	if (HAVE_LLVM < 0x0700) {
+		LLVMValueRef bfe_sm5 =
+			ac_build_bfe(&ctx->ac, emit_data->args[0],
+				     emit_data->args[1], emit_data->args[2],
+				     emit_data->info->opcode == TGSI_OPCODE_IBFE);
 
-	/* Correct for GLSL semantics. */
-	cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
-			     LLVMConstInt(ctx->i32, 32, 0), "");
-	emit_data->output[emit_data->chan] =
-		LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
+		/* Correct for GLSL semantics. */
+		LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
+						  LLVMConstInt(ctx->i32, 32, 0), "");
+		emit_data->output[emit_data->chan] =
+			LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
+	} else {
+		/* FIXME: LLVM 7 returns incorrect result when count is 0.
+		 * https://bugs.freedesktop.org/show_bug.cgi?id=107276
+		 */
+		LLVMValueRef zero = ctx->i32_0;
+		LLVMValueRef bfe_sm5 =
+			ac_build_bfe(&ctx->ac, emit_data->args[0],
+				     emit_data->args[1], emit_data->args[2],
+				     emit_data->info->opcode == TGSI_OPCODE_IBFE);
+
+		/* Correct for GLSL semantics. */
+		LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
+						  LLVMConstInt(ctx->i32, 32, 0), "");
+		LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2],
+						   zero, "");
+		bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
+		emit_data->output[emit_data->chan] =
+			LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, "");
+	}
 }
 
 /* this is ffs in C */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index d901401f0bb..4157e5ea3fc 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -383,7 +383,7 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen,
 		 * Polaris supports primitive restart with WD_SWITCH_ON_EOP=0
 		 * for points, line strips, and tri strips.
 		 */
-		if (sscreen->info.max_se < 4 ||
+		if (sscreen->info.max_se <= 2 ||
 		    key->u.prim == PIPE_PRIM_POLYGON ||
 		    key->u.prim == PIPE_PRIM_LINE_LOOP ||
 		    key->u.prim == PIPE_PRIM_TRIANGLE_FAN ||
@@ -414,7 +414,7 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen,
 			wd_switch_on_eop = true;
 
 		/* Required on CIK and later. */
-		if (sscreen->info.max_se > 2 && !wd_switch_on_eop)
+		if (sscreen->info.max_se == 4 && !wd_switch_on_eop)
 			ia_switch_on_eoi = true;
 
 		/* Required by Hawaii and, for some special cases, by VI. */
@@ -429,6 +429,12 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen,
 		    key->u.uses_instancing)
 			partial_vs_wave = true;
 
+		/* This only applies to Polaris10 and later 4 SE chips.
+		 * wd_switch_on_eop is already true on all other chips.
+		 */
+		if (!wd_switch_on_eop && key->u.primitive_restart)
+			partial_vs_wave = true;
+
 		/* If the WD switch is false, the IA switch must be false too. */
 		assert(wd_switch_on_eop || !ia_switch_on_eop);
 	}
diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
index e55fd815264..bcff226a586 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -931,8 +931,8 @@ static void si_texture_get_htile_size(struct si_screen *sscreen,
 		return;
 	}
 
-	width = align(tex->buffer.b.b.width0, cl_width * 8);
-	height = align(tex->buffer.b.b.height0, cl_height * 8);
+	width = align(tex->surface.u.legacy.level[0].nblk_x, cl_width * 8);
+	height = align(tex->surface.u.legacy.level[0].nblk_y, cl_height * 8);
 
 	slice_elements = (width * height) / (8 * 8);
 	slice_bytes = slice_elements * 4;
diff --git a/src/gallium/drivers/svga/meson.build b/src/gallium/drivers/svga/meson.build
index 2976212fdfb..7981e2991f3 100644
--- a/src/gallium/drivers/svga/meson.build
+++ b/src/gallium/drivers/svga/meson.build
@@ -79,7 +79,7 @@ files_svga = files(
 
 libsvga = static_library(
   'svga',
-  files_svga,
+  [files_svga, sha1_h],
   c_args : [c_vis_args, c_msvc_compat_args],
   include_directories : [
     inc_src, inc_include, inc_gallium, inc_gallium_aux,
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 2e7f1a88a0a..485403ae1ec 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -57,7 +57,6 @@
     ['VHSUBPS',     ['a', 'b'], 'a'],
     ['VPTESTC',     ['a', 'b'], 'mInt32Ty'],
     ['VPTESTZ',     ['a', 'b'], 'mInt32Ty'],
-    ['VFMADDPS',    ['a', 'b', 'c'], 'a'],
     ['VPHADDD',     ['a', 'b'], 'a'],
     ['PDEP32',      ['a', 'b'], 'a'],
     ['RDTSC',       [], 'mInt64Ty'],
@@ -71,6 +70,7 @@
     ['STACKRESTORE', 'stackrestore', ['a'], []],
     ['VMINPS', 'minnum', ['a', 'b'], ['a']],
     ['VMAXPS', 'maxnum', ['a', 'b'], ['a']],
+    ['VFMADDPS', 'fmuladd', ['a', 'b', 'c'], ['a']],
     ['DEBUGTRAP', 'debugtrap', [], []],
     ['POPCNT', 'ctpop', ['a'], ['a']],
     ['LOG2', 'log2', ['a'], ['a']],
@@ -161,7 +161,8 @@ def parse_ir_builder(input_file):
                         func_name == 'CreateAlignmentAssumptionHelper' or
                         func_name == 'CreateGEP' or
                         func_name == 'CreateLoad' or
-                        func_name == 'CreateMaskedLoad'):
+                        func_name == 'CreateMaskedLoad' or
+                        func_name == 'CreateElementUnorderedAtomicMemCpy'):
                         ignore = True
 
                     # Convert CamelCase to CAMEL_CASE
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
index f89c502db7d..d5328c8e4e6 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
@@ -870,7 +870,6 @@ struct BlendJit : public Builder
         passes.add(createCFGSimplificationPass());
         passes.add(createEarlyCSEPass());
         passes.add(createInstructionCombiningPass());
-        passes.add(createInstructionSimplifierPass());
         passes.add(createConstantPropagationPass());
         passes.add(createSCCPPass());
         passes.add(createAggressiveDCEPass());
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index 4116dad4430..26d8688f5e9 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -755,15 +755,8 @@ namespace SwrJit
     Value* Builder::FMADDPS(Value* a, Value* b, Value* c)
     {
         Value* vOut;
-        // use FMADs if available
-        if (JM()->mArch.AVX2())
-        {
-            vOut = VFMADDPS(a, b, c);
-        }
-        else
-        {
-            vOut = FADD(FMUL(a, b), c);
-        }
+        // This maps to LLVM fmuladd intrinsic
+        vOut = VFMADDPS(a, b, c);
         return vOut;
     }
 
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index b4d326ebdcc..3ad0fabe81f 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -294,7 +294,6 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
     optPasses.add(createCFGSimplificationPass());
     optPasses.add(createEarlyCSEPass());
     optPasses.add(createInstructionCombiningPass());
-    optPasses.add(createInstructionSimplifierPass());
     optPasses.add(createConstantPropagationPass());
     optPasses.add(createSCCPPass());
     optPasses.add(createAggressiveDCEPass());
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index 7605823c04d..c34959d35ee 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -76,7 +76,6 @@ namespace SwrJit
         {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
         {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
         {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
-        {"meta.intrinsic.VFMADDPS", Intrinsic::x86_fma_vfmadd_ps_256},
         {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
         {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
         {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc},
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
index 8f86af2a4b4..11ad36521b3 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
@@ -306,7 +306,6 @@ struct StreamOutJit : public Builder
         passes.add(createCFGSimplificationPass());
         passes.add(createEarlyCSEPass());
         passes.add(createInstructionCombiningPass());
-        passes.add(createInstructionSimplifierPass());
         passes.add(createConstantPropagationPass());
         passes.add(createSCCPPass());
         passes.add(createAggressiveDCEPass());
diff --git a/src/gallium/drivers/swr/swr_public.h b/src/gallium/drivers/swr/swr_public.h
index 07ea6280cd6..b32f41fdf7b 100644
--- a/src/gallium/drivers/swr/swr_public.h
+++ b/src/gallium/drivers/swr/swr_public.h
@@ -37,7 +37,7 @@ extern "C" {
 struct pipe_screen *swr_create_screen(struct sw_winsys *winsys);
 
 // arch-specific dll entry point
-PUBLIC struct pipe_screen *swr_create_screen_internal(struct sw_winsys *winsys);
+struct pipe_screen *swr_create_screen_internal(struct sw_winsys *winsys);
 
 // cleanup for failed screen creation
 void swr_destroy_screen_internal(struct swr_screen **screen);
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
index fa232b6838b..084f55dab99 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -1143,12 +1143,10 @@ swr_validate_env_options(struct swr_screen *screen)
 }
 
 
-PUBLIC
 struct pipe_screen *
 swr_create_screen_internal(struct sw_winsys *winsys)
 {
    struct swr_screen *screen = CALLOC_STRUCT(swr_screen);
-   memset(screen, 0, sizeof(struct swr_screen));
 
    if (!screen)
       return NULL;
diff --git a/src/gallium/drivers/tegra/tegra_screen.c b/src/gallium/drivers/tegra/tegra_screen.c
index 034ea271eec..73fdc632933 100644
--- a/src/gallium/drivers/tegra/tegra_screen.c
+++ b/src/gallium/drivers/tegra/tegra_screen.c
@@ -203,6 +203,7 @@ static int tegra_open_render_node(void)
          }
 
          if (strcmp(version->name, "nouveau") != 0) {
+            drmFreeVersion(version);
             close(fd);
             continue;
          }
diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c
index 2e743851bea..2f084792e61 100644
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@@ -585,6 +585,8 @@ v3d_get_device_info(struct v3d_screen *screen)
         uint32_t minor = (ident1.value >> 0) & 0xf;
         screen->devinfo.ver = major * 10 + minor;
 
+        screen->devinfo.vpm_size = (ident1.value >> 28 & 0xf) * 8192;
+
         switch (screen->devinfo.ver) {
         case 33:
         case 41:
diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c
index 479adb70fdb..bfb4af13ceb 100644
--- a/src/gallium/drivers/v3d/v3dx_draw.c
+++ b/src/gallium/drivers/v3d/v3dx_draw.c
@@ -306,6 +306,13 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                 }
         }
 
+        cl_emit(&job->bcl, VCM_CACHE_SIZE, vcm) {
+                vcm.number_of_16_vertex_batches_for_binning =
+                        v3d->prog.cs->prog_data.vs->vcm_cache_size;
+                vcm.number_of_16_vertex_batches_for_rendering =
+                        v3d->prog.vs->prog_data.vs->vcm_cache_size;
+        }
+
         cl_emit(&job->bcl, GL_SHADER_STATE, state) {
                 state.address = cl_address(job->indirect.bo, shader_rec_offset);
                 state.number_of_attribute_arrays = num_elements_to_emit;
diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c
index c58ac4b44a2..537dd1ff9ae 100644
--- a/src/gallium/drivers/v3d/v3dx_emit.c
+++ b/src/gallium/drivers/v3d/v3dx_emit.c
@@ -69,7 +69,9 @@ v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
                 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-                return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE;
+                return (dst_alpha_one ?
+                        V3D_BLEND_FACTOR_ZERO :
+                        V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE);
         default:
                 unreachable("Bad blend factor");
         }
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 900c0abaf20..06785516cae 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -222,6 +222,8 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
                         attr.coordinate_shader_vpm_offset = 0;
                         attr.vertex_shader_vpm_offset = 0;
                 }
+
+                vc4_bo_unreference(&bo);
         }
 
         cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) {
diff --git a/src/gallium/drivers/vc4/vc4_fence.c b/src/gallium/drivers/vc4/vc4_fence.c
index 7071425595c..0dbfbe966b8 100644
--- a/src/gallium/drivers/vc4/vc4_fence.c
+++ b/src/gallium/drivers/vc4/vc4_fence.c
@@ -121,7 +121,8 @@ vc4_fence_server_sync(struct pipe_context *pctx,
         struct vc4_context *vc4 = vc4_context(pctx);
         struct vc4_fence *fence = vc4_fence(pfence);
 
-        sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd);
+        if (fence->fd >= 0)
+                sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd);
 }
 
 static int
@@ -142,8 +143,12 @@ vc4_fence_context_init(struct vc4_context *vc4)
         /* Since we initialize the in_fence_fd to -1 (no wait necessary),
          * we also need to initialize our in_syncobj as signaled.
          */
-        return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
-                                &vc4->in_syncobj);
+        if (vc4->screen->has_syncobj) {
+                return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
+                                        &vc4->in_syncobj);
+        } else {
+                return 0;
+        }
 }
 
 void
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 13c3b7678b2..1f46b64005b 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -38,6 +38,7 @@
 #include "vc4_context.h"
 #include "vc4_qpu.h"
 #include "vc4_qir.h"
+#include "mesa/state_tracker/st_glsl_types.h"
 
 static struct qreg
 ntq_get_src(struct vc4_compile *c, nir_src src, int i);
@@ -50,6 +51,12 @@ type_size(const struct glsl_type *type)
    return glsl_count_attribute_slots(type, false);
 }
 
+static int
+uniforms_type_size(const struct glsl_type *type)
+{
+        return st_glsl_storage_type_size(type, false);
+}
+
 static void
 resize_qreg_array(struct vc4_compile *c,
                   struct qreg **regs,
@@ -679,25 +686,45 @@ ntq_fceil(struct vc4_compile *c, struct qreg src)
         return qir_MOV(c, result);
 }
 
+static struct qreg
+ntq_shrink_sincos_input_range(struct vc4_compile *c, struct qreg x)
+{
+        /* Since we're using a Taylor approximation, we want to have a small
+         * number of coefficients and take advantage of sin/cos repeating
+         * every 2pi.  We keep our x as close to 0 as we can, since the series
+         * will be less accurate as |x| increases.  (Also, be careful of
+         * shifting the input x value to be tricky with sin/cos relations,
+         * because getting accurate values for x==0 is very important for SDL
+         * rendering)
+         */
+        struct qreg scaled_x =
+                qir_FMUL(c, x,
+                         qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
+        /* Note: FTOI truncates toward 0. */
+        struct qreg x_frac = qir_FSUB(c, scaled_x,
+                                      qir_ITOF(c, qir_FTOI(c, scaled_x)));
+        /* Map [0.5, 1] to [-0.5, 0] */
+        qir_SF(c, qir_FSUB(c, x_frac, qir_uniform_f(c, 0.5)));
+        qir_FSUB_dest(c, x_frac, x_frac, qir_uniform_f(c, 1.0))->cond = QPU_COND_NC;
+        /* Map [-1, -0.5] to [0, 0.5] */
+        qir_SF(c, qir_FADD(c, x_frac, qir_uniform_f(c, 0.5)));
+        qir_FADD_dest(c, x_frac, x_frac, qir_uniform_f(c, 1.0))->cond = QPU_COND_NS;
+
+        return x_frac;
+}
+
 static struct qreg
 ntq_fsin(struct vc4_compile *c, struct qreg src)
 {
         float coeff[] = {
-                -2.0 * M_PI,
-                pow(2.0 * M_PI, 3) / (3 * 2 * 1),
-                -pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
-                pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
-                -pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
+                2.0 * M_PI,
+                -pow(2.0 * M_PI, 3) / (3 * 2 * 1),
+                pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
+                -pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
+                pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
         };
 
-        struct qreg scaled_x =
-                qir_FMUL(c,
-                         src,
-                         qir_uniform_f(c, 1.0 / (M_PI * 2.0)));
-
-        struct qreg x = qir_FADD(c,
-                                 ntq_ffract(c, scaled_x),
-                                 qir_uniform_f(c, -0.5));
+        struct qreg x = ntq_shrink_sincos_input_range(c, src);
         struct qreg x2 = qir_FMUL(c, x, x);
         struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0]));
         for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
@@ -715,21 +742,15 @@ static struct qreg
 ntq_fcos(struct vc4_compile *c, struct qreg src)
 {
         float coeff[] = {
-                -1.0f,
-                pow(2.0 * M_PI, 2) / (2 * 1),
-                -pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
-                pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
-                -pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
-                pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
+                1.0f,
+                -pow(2.0 * M_PI, 2) / (2 * 1),
+                pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
+                -pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
+                pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
+                -pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
         };
 
-        struct qreg scaled_x =
-                qir_FMUL(c, src,
-                         qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
-        struct qreg x_frac = qir_FADD(c,
-                                      ntq_ffract(c, scaled_x),
-                                      qir_uniform_f(c, -0.5));
-
+        struct qreg x_frac = ntq_shrink_sincos_input_range(c, src);
         struct qreg sum = qir_uniform_f(c, coeff[0]);
         struct qreg x2 = qir_FMUL(c, x_frac, x_frac);
         struct qreg x = x2; /* Current x^2, x^4, or x^6 */
@@ -1685,7 +1706,7 @@ static void
 ntq_setup_uniforms(struct vc4_compile *c)
 {
         nir_foreach_variable(var, &c->s->uniforms) {
-                uint32_t vec4_count = type_size(var->type);
+                uint32_t vec4_count = uniforms_type_size(var->type);
                 unsigned vec4_size = 4 * sizeof(float);
 
                 declare_uniform_range(c, var->data.driver_location * vec4_size,
@@ -2469,9 +2490,13 @@ vc4_shader_state_create(struct pipe_context *pctx,
                  */
                 s = cso->ir.nir;
 
-                NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size,
+                NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform,
+                           type_size,
                            (nir_lower_io_options)0);
-        } else {
+                NIR_PASS_V(s, nir_lower_io, nir_var_uniform,
+                           uniforms_type_size,
+                           (nir_lower_io_options)0);
+       } else {
                 assert(cso->type == PIPE_SHADER_IR_TGSI);
 
                 if (vc4_debug & VC4_DEBUG_TGSI) {
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index 408a9e0af2a..1e4657a7922 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -614,7 +614,9 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
         }
 
         so->texture_p0 =
-                (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
+                (VC4_SET_FIELD((rsc->slices[0].offset +
+                                cso->u.tex.first_layer *
+                                rsc->cube_map_stride) >> 12, VC4_TEX_P0_OFFSET) |
                  VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) |
                  VC4_SET_FIELD(so->force_first_level ?
                                cso->u.tex.last_level :
diff --git a/src/gallium/drivers/virgl/virgl_encode.c b/src/gallium/drivers/virgl/virgl_encode.c
index 670c5fe6c3d..9aece384288 100644
--- a/src/gallium/drivers/virgl/virgl_encode.c
+++ b/src/gallium/drivers/virgl/virgl_encode.c
@@ -284,7 +284,7 @@ int virgl_encode_shader_state(struct virgl_context *ctx,
    while (left_bytes) {
       uint32_t length, offlen;
       int hdr_len = base_hdr_size + (first_pass ? strm_hdr_size : 0);
-      if (ctx->cbuf->cdw + hdr_len + 1 > VIRGL_MAX_CMDBUF_DWORDS)
+      if (ctx->cbuf->cdw + hdr_len + 1 >= VIRGL_MAX_CMDBUF_DWORDS)
          ctx->base.flush(&ctx->base, NULL, 0);
 
       thispass = (VIRGL_MAX_CMDBUF_DWORDS - ctx->cbuf->cdw - hdr_len - 1) * 4;
diff --git a/src/gallium/drivers/virgl/virgl_texture.c b/src/gallium/drivers/virgl/virgl_texture.c
index 150a5ebd8c7..7ac794623da 100644
--- a/src/gallium/drivers/virgl/virgl_texture.c
+++ b/src/gallium/drivers/virgl/virgl_texture.c
@@ -177,6 +177,8 @@ static void *virgl_texture_transfer_map(struct pipe_context *ctx,
       /* we want to do a resolve blit into the temporary */
       hw_res = trans->resolve_tmp->hw_res;
       offset = 0;
+      trans->base.stride = ((struct virgl_texture*)trans->resolve_tmp)->stride[level];
+      trans->base.layer_stride = trans->base.stride * nblocksy;
    } else {
       offset = vrend_get_tex_image_offset(vtex, level, box->z);
 
diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h
index 5b72c0afc99..03377a3025a 100644
--- a/src/gallium/include/state_tracker/st_api.h
+++ b/src/gallium/include/state_tracker/st_api.h
@@ -190,6 +190,8 @@ struct st_egl_image
  */
 struct st_visual
 {
+   bool no_config;
+
    /**
     * Available buffers.  Bitfield of ST_ATTACHMENT_*_MASK bits.
     */
diff --git a/src/gallium/state_trackers/clover/meson.build b/src/gallium/state_trackers/clover/meson.build
index d1497e657ea..1a09d8f2ca9 100644
--- a/src/gallium/state_trackers/clover/meson.build
+++ b/src/gallium/state_trackers/clover/meson.build
@@ -115,7 +115,7 @@ clover_files = files(
 
 libclover = static_library(
   'clover',
-  clover_files,
+  [clover_files, sha1_h],
   include_directories : clover_incs,
   cpp_args : [clover_cpp_args, cpp_vis_args],
   link_with : [libcltgsi, libclllvm],
diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c
index 2ac32205d9a..b89726ceac2 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -160,6 +160,12 @@ static int convert_fourcc(int format, int *dri_components_p)
       format = __DRI_IMAGE_FORMAT_R8;
       dri_components = __DRI_IMAGE_COMPONENTS_Y_UV;
       break;
+   case __DRI_IMAGE_FOURCC_P010:
+   case __DRI_IMAGE_FOURCC_P012:
+   case __DRI_IMAGE_FOURCC_P016:
+      format = __DRI_IMAGE_FORMAT_R16;
+      dri_components = __DRI_IMAGE_COMPONENTS_Y_UV;
+      break;
    default:
       return -1;
    }
@@ -1485,6 +1491,12 @@ dri2_query_dma_buf_formats(__DRIscreen *_screen, int max, int *formats,
 
    for (i = 0, j = 0; (i < ARRAY_SIZE(fourcc_formats)) &&
          (j < max || max == 0); i++) {
+      /* The sRGB format is not a real FourCC as defined by drm_fourcc.h, so we
+       * must not leak it out to clients.
+       */
+      if (fourcc_formats[i] == __DRI_IMAGE_FOURCC_SARGB8888)
+         continue;
+
       if (pscreen->is_format_supported(pscreen,
                                        fourcc_to_pipe_format(
                                           fourcc_formats[i]),
@@ -2213,8 +2225,10 @@ dri_kms_init_screen(__DRIscreen * sPriv)
       dri2ImageExtension.createImageFromFds = dri2_from_fds;
       dri2ImageExtension.createImageFromDmaBufs = dri2_from_dma_bufs;
       dri2ImageExtension.createImageFromDmaBufs2 = dri2_from_dma_bufs2;
-      dri2ImageExtension.queryDmaBufFormats = dri2_query_dma_buf_formats;
-      dri2ImageExtension.queryDmaBufModifiers = dri2_query_dma_buf_modifiers;
+      if (pscreen->query_dmabuf_modifiers) {
+         dri2ImageExtension.queryDmaBufFormats = dri2_query_dma_buf_formats;
+         dri2ImageExtension.queryDmaBufModifiers = dri2_query_dma_buf_modifiers;
+      }
    }
 
    sPriv->extensions = dri_screen_extensions;
diff --git a/src/gallium/state_trackers/dri/dri_context.c b/src/gallium/state_trackers/dri/dri_context.c
index fb307337a90..af9e3325f98 100644
--- a/src/gallium/state_trackers/dri/dri_context.c
+++ b/src/gallium/state_trackers/dri/dri_context.c
@@ -62,6 +62,7 @@ dri_create_context(gl_api api, const struct gl_config * visual,
       __DRIVER_CONTEXT_ATTRIB_RELEASE_BEHAVIOR;
    const __DRIbackgroundCallableExtension *backgroundCallable =
       screen->sPriv->dri2.backgroundCallable;
+   const struct driOptionCache *optionCache = &screen->dev->option_cache;
 
    if (screen->has_reset_status_query) {
       allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
@@ -88,8 +89,13 @@ dri_create_context(gl_api api, const struct gl_config * visual,
       break;
    case API_OPENGL_COMPAT:
    case API_OPENGL_CORE:
-      attribs.profile = api == API_OPENGL_COMPAT ? ST_PROFILE_DEFAULT
-                                                 : ST_PROFILE_OPENGL_CORE;
+      if (driQueryOptionb(optionCache, "force_compat_profile")) {
+         attribs.profile = ST_PROFILE_DEFAULT;
+      } else {
+         attribs.profile = api == API_OPENGL_COMPAT ? ST_PROFILE_DEFAULT
+                                                    : ST_PROFILE_OPENGL_CORE;
+      }
+
       attribs.major = ctx_config->major_version;
       attribs.minor = ctx_config->minor_version;
 
diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c
index fe4e39b3f93..a0dcdb53dd2 100644
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -308,8 +308,10 @@ dri_fill_st_visual(struct st_visual *stvis,
 {
    memset(stvis, 0, sizeof(*stvis));
 
-   if (!mode)
+   if (!mode) {
+      stvis->no_config = true;
       return;
+   }
 
    /* Deduce the color format. */
    switch (mode->redMask) {
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index 150f5e3e05e..61eb5d9a45f 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -784,6 +784,10 @@ NineDevice9_SetCursorPosition( struct NineDevice9 *This,
 
     DBG("This=%p X=%d Y=%d Flags=%d\n", This, X, Y, Flags);
 
+    if (This->cursor.pos.x == X &&
+        This->cursor.pos.y == Y)
+        return;
+
     This->cursor.pos.x = X;
     This->cursor.pos.y = Y;
 
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 7db07d8f693..f9e6b962a75 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -483,7 +483,7 @@ struct shader_translator
         struct ureg_dst a0;
         struct ureg_dst tS[8]; /* texture stage registers */
         struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
-        struct ureg_dst t[5]; /* scratch TEMPs */
+        struct ureg_dst t[8]; /* scratch TEMPs */
         struct ureg_src vC[2]; /* PS color in */
         struct ureg_src vT[8]; /* PS texcoord in */
         struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
@@ -2273,6 +2273,18 @@ DECL_SPECIAL(POW)
     return D3D_OK;
 }
 
+DECL_SPECIAL(RCP)
+{
+    struct ureg_program *ureg = tx->ureg;
+    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+    struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
+    struct ureg_dst tmp = tx_scratch(tx);
+    ureg_RCP(ureg, tmp, src);
+    ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
+    ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), ureg_src(tmp));
+    return D3D_OK;
+}
+
 DECL_SPECIAL(RSQ)
 {
     struct ureg_program *ureg = tx->ureg;
@@ -2909,7 +2921,7 @@ static const struct sm1_op_info inst_table[] =
     _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
     _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
     _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
-    _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
+    _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
     _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
     _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
     _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
diff --git a/src/gallium/state_trackers/nine/threadpool.c b/src/gallium/state_trackers/nine/threadpool.c
index cc62fd25799..19721aab2dd 100644
--- a/src/gallium/state_trackers/nine/threadpool.c
+++ b/src/gallium/state_trackers/nine/threadpool.c
@@ -37,6 +37,7 @@
 #include "os/os_thread.h"
 #include "threadpool.h"
 
+/* POSIX thread function */
 static void *
 threadpool_worker(void *data)
 {
@@ -76,6 +77,15 @@ threadpool_worker(void *data)
     return NULL;
 }
 
+/* Windows thread function */
+static DWORD NINE_WINAPI
+wthreadpool_worker(void *data)
+{
+    threadpool_worker(data);
+
+    return 0;
+}
+
 struct threadpool *
 _mesa_threadpool_create(struct NineSwapChain9 *swapchain)
 {
@@ -87,7 +97,9 @@ _mesa_threadpool_create(struct NineSwapChain9 *swapchain)
     pthread_mutex_init(&pool->m, NULL);
     pthread_cond_init(&pool->new_work, NULL);
 
-    pool->wthread = NineSwapChain9_CreateThread(swapchain, threadpool_worker, pool);
+    /* This uses WINE's CreateThread, so the thread function needs to use
+     * the Windows ABI */
+    pool->wthread = NineSwapChain9_CreateThread(swapchain, wthreadpool_worker, pool);
     if (!pool->wthread) {
         /* using pthread as fallback */
         pthread_create(&pool->pthread, NULL, threadpool_worker, pool);
diff --git a/src/gallium/state_trackers/va/image.c b/src/gallium/state_trackers/va/image.c
index 3f892c9842c..807fc832c7f 100644
--- a/src/gallium/state_trackers/va/image.c
+++ b/src/gallium/state_trackers/va/image.c
@@ -353,6 +353,23 @@ vlVaGetImage(VADriverContextP ctx, VASurfaceID surface, int x, int y,
       return VA_STATUS_ERROR_INVALID_IMAGE;
    }
 
+   if (x < 0 || y < 0) {
+      mtx_unlock(&drv->mutex);
+      return VA_STATUS_ERROR_INVALID_PARAMETER;
+   }
+
+   if (x + width > surf->templat.width ||
+       y + height > surf->templat.height) {
+      mtx_unlock(&drv->mutex);
+      return VA_STATUS_ERROR_INVALID_PARAMETER;
+   }
+
+   if (width > vaimage->width ||
+       height > vaimage->height) {
+      mtx_unlock(&drv->mutex);
+      return VA_STATUS_ERROR_INVALID_PARAMETER;
+   }
+
    img_buf = handle_table_get(drv->htab, vaimage->buf);
    if (!img_buf) {
       mtx_unlock(&drv->mutex);
@@ -400,11 +417,19 @@ vlVaGetImage(VADriverContextP ctx, VASurfaceID surface, int x, int y,
    }
 
    for (i = 0; i < vaimage->num_planes; i++) {
-      unsigned width, height;
+      unsigned box_w = align(width, 2);
+      unsigned box_h = align(height, 2);
+      unsigned box_x = x & ~1;
+      unsigned box_y = y & ~1;
       if (!views[i]) continue;
-      vlVaVideoSurfaceSize(surf, i, &width, &height);
+      vl_video_buffer_adjust_size(&box_w, &box_h, i,
+                                  surf->templat.chroma_format,
+                                  surf->templat.interlaced);
+      vl_video_buffer_adjust_size(&box_x, &box_y, i,
+                                  surf->templat.chroma_format,
+                                  surf->templat.interlaced);
       for (j = 0; j < views[i]->texture->array_size; ++j) {
-         struct pipe_box box = {0, 0, j, width, height, 1};
+         struct pipe_box box = {box_x, box_y, j, box_w, box_h, 1};
          struct pipe_transfer *transfer;
          uint8_t *map;
          map = drv->pipe->transfer_map(drv->pipe, views[i]->texture, 0,
diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c
index cc26efe1c1a..d69313932c8 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -598,10 +598,8 @@ surface_from_external_memory(VADriverContextP ctx, vlVaSurface *surface,
    return VA_STATUS_SUCCESS;
 
 fail:
-   for (i = 0; i < VL_NUM_COMPONENTS; i++) {
-      if (resources[i])
-         pscreen->resource_destroy(pscreen, resources[i]);
-   }
+   for (i = 0; i < VL_NUM_COMPONENTS; i++)
+      pipe_resource_reference(&resources[i], NULL);
    return result;
 }
 
diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index 9c5bd8a15b2..507a267b5fe 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -107,7 +107,7 @@ drm_destroy( struct d3dadapter9_context *ctx )
     if (drm->dev)
         pipe_loader_release(&drm->dev, 1);
 
-    close(drm->fd);
+    /* The pipe loader takes ownership of the fd */
     FREE(ctx);
 }
 
diff --git a/src/gallium/targets/d3dadapter9/meson.build b/src/gallium/targets/d3dadapter9/meson.build
index bd05b4f9692..bc72b1110a0 100644
--- a/src/gallium/targets/d3dadapter9/meson.build
+++ b/src/gallium/targets/d3dadapter9/meson.build
@@ -53,7 +53,7 @@ libgallium_nine = shared_library(
     libswkmsdri,
   ],
   dependencies : [
-    dep_selinux, dep_expat, dep_libdrm, dep_llvm,
+    dep_selinux, dep_expat, dep_libdrm, dep_llvm, dep_thread,
     driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
     driver_i915, driver_svga,
   ],
diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk
index 9c43fa1e8fd..83f439071f8 100644
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -43,9 +43,17 @@ LOCAL_SHARED_LIBRARIES := \
 	libbacktrace \
 	libdl \
 	libglapi \
-	libexpat \
 	libz
 
+# If Android version >=8 MESA should static link libexpat else should dynamic link
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0)
+LOCAL_STATIC_LIBRARIES := \
+	libexpat
+else
+LOCAL_SHARED_LIBRARIES += \
+	libexpat
+endif
+
 $(foreach d, $(MESA_BUILD_GALLIUM), $(eval LOCAL_CFLAGS += $(patsubst HAVE_%,-D%,$(d))))
 
 # sort GALLIUM_LIBS to remove any duplicates
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c
index fda1ba7870e..20cfc86ebe0 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c
@@ -257,8 +257,8 @@ static void si_compute_cmask(const struct radeon_info *info,
 
 	unsigned base_align = num_pipes * pipe_interleave_bytes;
 
-	unsigned width = align(config->info.width, cl_width*8);
-	unsigned height = align(config->info.height, cl_height*8);
+	unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8);
+	unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8);
 	unsigned slice_elements = (width * height) / (8*8);
 
 	/* Each element of CMASK is a nibble. */
diff --git a/src/gallium/winsys/sw/dri/dri_sw_winsys.c b/src/gallium/winsys/sw/dri/dri_sw_winsys.c
index 40007200a5d..d519bcfedd3 100644
--- a/src/gallium/winsys/sw/dri/dri_sw_winsys.c
+++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.c
@@ -26,8 +26,12 @@
  *
  **************************************************************************/
 
+#if !defined(ANDROID) || ANDROID_API_LEVEL >= 26
+/* Android's libc began supporting shm in Oreo */
+#define HAVE_SHM
 #include <sys/ipc.h>
 #include <sys/shm.h>
+#endif
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_format.h"
@@ -83,6 +87,7 @@ dri_sw_is_displaytarget_format_supported( struct sw_winsys *ws,
    return TRUE;
 }
 
+#ifdef HAVE_SHM
 static char *
 alloc_shm(struct dri_sw_displaytarget *dri_sw_dt, unsigned size)
 {
@@ -101,6 +106,7 @@ alloc_shm(struct dri_sw_displaytarget *dri_sw_dt, unsigned size)
 
    return addr;
 }
+#endif
 
 static struct sw_displaytarget *
 dri_sw_displaytarget_create(struct sw_winsys *winsys,
@@ -131,8 +137,11 @@ dri_sw_displaytarget_create(struct sw_winsys *winsys,
    size = dri_sw_dt->stride * nblocksy;
 
    dri_sw_dt->shmid = -1;
+
+#ifdef HAVE_SHM
    if (ws->lf->put_image_shm)
       dri_sw_dt->data = alloc_shm(dri_sw_dt, size);
+#endif
 
    if(!dri_sw_dt->data)
       dri_sw_dt->data = align_malloc(size, alignment);
@@ -156,8 +165,10 @@ dri_sw_displaytarget_destroy(struct sw_winsys *ws,
    struct dri_sw_displaytarget *dri_sw_dt = dri_sw_displaytarget(dt);
 
    if (dri_sw_dt->shmid >= 0) {
+#ifdef HAVE_SHM
       shmdt(dri_sw_dt->data);
       shmctl(dri_sw_dt->shmid, IPC_RMID, 0);
+#endif
    } else {
       align_free(dri_sw_dt->data);
    }
diff --git a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
index 3fe1b1a7313..9564d9424b1 100644
--- a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
+++ b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
@@ -176,6 +176,8 @@ kms_sw_displaytarget_create(struct sw_winsys *ws,
 
    list_inithead(&kms_sw_dt->planes);
    kms_sw_dt->ref_count = 1;
+   kms_sw_dt->mapped = MAP_FAILED;
+   kms_sw_dt->ro_mapped = MAP_FAILED;
 
    kms_sw_dt->format = format;
 
@@ -262,7 +264,7 @@ kms_sw_displaytarget_map(struct sw_winsys *ws,
 
    prot = (flags == PIPE_TRANSFER_READ) ? PROT_READ : (PROT_READ | PROT_WRITE);
    void **ptr = (flags == PIPE_TRANSFER_READ) ? &kms_sw_dt->ro_mapped : &kms_sw_dt->mapped;
-   if (!*ptr) {
+   if (*ptr == MAP_FAILED) {
       void *tmp = mmap(0, kms_sw_dt->size, prot, MAP_SHARED,
                        kms_sw->fd, map_req.offset);
       if (tmp == MAP_FAILED)
@@ -332,6 +334,8 @@ kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd,
       FREE(kms_sw_dt);
       return NULL;
    }
+   kms_sw_dt->mapped = MAP_FAILED;
+   kms_sw_dt->ro_mapped = MAP_FAILED;
    kms_sw_dt->size = lseek_ret;
    kms_sw_dt->ref_count = 1;
    kms_sw_dt->handle = handle;
@@ -368,10 +372,14 @@ kms_sw_displaytarget_unmap(struct sw_winsys *ws,
    DEBUG_PRINT("KMS-DEBUG: unmapped buffer %u (was %p)\n", kms_sw_dt->handle, kms_sw_dt->mapped);
    DEBUG_PRINT("KMS-DEBUG: unmapped buffer %u (was %p)\n", kms_sw_dt->handle, kms_sw_dt->ro_mapped);
 
-   munmap(kms_sw_dt->mapped, kms_sw_dt->size);
-   kms_sw_dt->mapped = NULL;
-   munmap(kms_sw_dt->ro_mapped, kms_sw_dt->size);
-   kms_sw_dt->ro_mapped = NULL;
+   if (kms_sw_dt->mapped != MAP_FAILED) {
+      munmap(kms_sw_dt->mapped, kms_sw_dt->size);
+      kms_sw_dt->mapped = MAP_FAILED;
+   }
+   if (kms_sw_dt->ro_mapped != MAP_FAILED) {
+      munmap(kms_sw_dt->ro_mapped, kms_sw_dt->size);
+      kms_sw_dt->ro_mapped = MAP_FAILED;
+   }
 }
 
 static struct sw_displaytarget *
diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
index aad6430c417..d55e4c7126a 100644
--- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
@@ -313,7 +313,7 @@ virgl_drm_winsys_resource_cache_create(struct virgl_winsys *qws,
    struct virgl_hw_res *res, *curr_res;
    struct list_head *curr, *next;
    int64_t now;
-   int ret;
+   int ret = 0;
 
    /* only store binds for vertex/index/const buffers */
    if (bind != VIRGL_BIND_CONSTANT_BUFFER && bind != VIRGL_BIND_INDEX_BUFFER &&
@@ -617,13 +617,26 @@ static void virgl_drm_add_res(struct virgl_drm_winsys *qdws,
 {
    unsigned hash = res->res_handle & (sizeof(cbuf->is_handle_added)-1);
 
-   if (cbuf->cres > cbuf->nres) {
-      cbuf->nres += 256;
-      cbuf->res_bo = realloc(cbuf->res_bo, cbuf->nres * sizeof(struct virgl_hw_buf*));
-      if (!cbuf->res_bo) {
-          fprintf(stderr,"failure to add relocation %d, %d\n", cbuf->cres, cbuf->nres);
+   if (cbuf->cres >= cbuf->nres) {
+      unsigned new_nres = cbuf->nres + 256;
+      void *new_ptr = REALLOC(cbuf->res_bo,
+                              cbuf->nres * sizeof(struct virgl_hw_buf*),
+                              new_nres * sizeof(struct virgl_hw_buf*));
+      if (!new_ptr) {
+          fprintf(stderr,"failure to add relocation %d, %d\n", cbuf->cres, new_nres);
+          return;
+      }
+      cbuf->res_bo = new_ptr;
+
+      new_ptr = REALLOC(cbuf->res_hlist,
+                        cbuf->nres * sizeof(uint32_t),
+                        new_nres * sizeof(uint32_t));
+      if (!new_ptr) {
+          fprintf(stderr,"failure to add hlist relocation %d, %d\n", cbuf->cres, cbuf->nres);
           return;
       }
+      cbuf->res_hlist = new_ptr;
+      cbuf->nres = new_nres;
    }
 
    cbuf->res_bo[cbuf->cres] = NULL;
diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
index d25f9a3bd9e..21349205143 100644
--- a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c
@@ -243,8 +243,10 @@ int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws,
    vtest_hdr[VTEST_CMD_LEN] = VCMD_TRANSFER_HDR_SIZE;
    vtest_hdr[VTEST_CMD_ID] = vcmd;
 
+   /* The host expects the size in dwords so calculate the rounded up
+    * value here. */
    if (vcmd == VCMD_TRANSFER_PUT)
-      vtest_hdr[VTEST_CMD_LEN] += data_size + 3 / 4;
+      vtest_hdr[VTEST_CMD_LEN] += (data_size + 3) / 4;
 
    cmd[0] = handle;
    cmd[1] = level;
diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
index 9a96c6eb83f..d1fd6050a71 100644
--- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
@@ -536,7 +536,7 @@ virgl_cs_create_fence(struct virgl_winsys *vws)
    res = virgl_vtest_winsys_resource_cache_create(vws,
                                                 PIPE_BUFFER,
                                                 PIPE_FORMAT_R8_UNORM,
-                                                PIPE_BIND_CUSTOM,
+                                                VIRGL_BIND_CUSTOM,
                                                 8, 1, 1, 0, 0, 0, 8);
 
    return (struct pipe_fence_handle *)res;
diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am
index 5233257fb40..8f9d80c9f41 100644
--- a/src/glx/Makefile.am
+++ b/src/glx/Makefile.am
@@ -19,9 +19,6 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-if HAVE_SHARED_GLAPI
-SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la
-endif
 
 SUBDIRS =
 
@@ -181,7 +178,7 @@ GL_LIBS = \
 	$(LIBDRM_LIBS) \
 	libglx.la \
 	$(top_builddir)/src/mapi/glapi/libglapi.la \
-	$(SHARED_GLAPI_LIB) \
+	$(top_builddir)/src/mapi/shared-glapi/libglapi.la \
 	$(GL_LIB_DEPS)
 
 GL_LDFLAGS = \
diff --git a/src/glx/apple/apple_glx_log.c b/src/glx/apple/apple_glx_log.c
index a3f446c26f2..ea39d30954e 100644
--- a/src/glx/apple/apple_glx_log.c
+++ b/src/glx/apple/apple_glx_log.c
@@ -97,6 +97,7 @@ void _apple_glx_vlog(int level, const char *file, const char *function,
         fprintf(stderr, "%-9s %24s:%-4d %s(%"PRIu64"): ",
                 _asl_level_string(level), file, line, function, thread);
         vfprintf(stderr, fmt, args2);
+        va_end(args2);
     }
 
     msg = asl_new(ASL_TYPE_MSG);
diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c
index a575862670f..e85a8c92846 100644
--- a/src/glx/glxextensions.c
+++ b/src/glx/glxextensions.c
@@ -152,7 +152,7 @@ static const struct extension_info known_glx_extensions[] = {
    { GLX(ATI_pixel_format_float),      VER(0,0), N, N, N, N },
    { GLX(INTEL_swap_event),            VER(0,0), Y, N, N, N },
    { GLX(MESA_copy_sub_buffer),        VER(0,0), Y, N, N, N },
-   { GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N },
+   { GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, N, Y },
    { GLX(MESA_query_renderer),         VER(0,0), Y, N, N, Y },
    { GLX(MESA_swap_control),           VER(0,0), Y, N, N, Y },
    { GLX(NV_float_buffer),             VER(0,0), N, N, N, N },
diff --git a/src/intel/Android.common.mk b/src/intel/Android.common.mk
index 12cea6e5472..12bd8947e2e 100644
--- a/src/intel/Android.common.mk
+++ b/src/intel/Android.common.mk
@@ -38,7 +38,17 @@ LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/src/mapi \
 	$(MESA_TOP)/src/mesa
 
-LOCAL_SHARED_LIBRARIES := libexpat libz
+LOCAL_SHARED_LIBRARIES := libz
+
+# If Android version >=8 MESA should static link libexpat else should dynamic link
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0)
+LOCAL_STATIC_LIBRARIES := \
+	libexpat
+LOCAL_HEADER_LIBRARIES += liblog_headers
+else
+LOCAL_SHARED_LIBRARIES += \
+	libexpat
+endif
 
 LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_genxml
 
diff --git a/src/intel/Android.compiler.mk b/src/intel/Android.compiler.mk
index c2b01221dfc..41af7b20b9c 100644
--- a/src/intel/Android.compiler.mk
+++ b/src/intel/Android.compiler.mk
@@ -28,7 +28,7 @@
 # ---------------------------------------
 
 include $(CLEAR_VARS)
-
+LOCAL_CFLAGS += -Wno-error
 LOCAL_MODULE := libmesa_intel_compiler
 LOCAL_MODULE_CLASS := STATIC_LIBRARIES
 
diff --git a/src/intel/Android.dev.mk b/src/intel/Android.dev.mk
index cd2ed66a176..3011ee232ed 100644
--- a/src/intel/Android.dev.mk
+++ b/src/intel/Android.dev.mk
@@ -33,5 +33,8 @@ LOCAL_C_INCLUDES := $(MESA_TOP)/include/drm-uapi
 
 LOCAL_SRC_FILES := $(DEV_FILES)
 
+LOCAL_CFLAGS := \
+           -Wno-gnu-variable-sized-type-not-at-end
+
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
diff --git a/src/intel/Android.vulkan.mk b/src/intel/Android.vulkan.mk
index 09dc22875a1..8dc20149784 100644
--- a/src/intel/Android.vulkan.mk
+++ b/src/intel/Android.vulkan.mk
@@ -38,7 +38,10 @@ VULKAN_COMMON_INCLUDES := \
 	$(MESA_TOP)/src/intel \
 	$(MESA_TOP)/include/drm-uapi \
 	$(MESA_TOP)/src/intel/vulkan \
-	frameworks/native/vulkan/include
+	frameworks/native/vulkan/include \
+	frameworks/native/libs/nativebase/include \
+	frameworks/native/libs/nativewindow/include \
+	frameworks/native/libs/arect/include
 
 # libmesa_anv_entrypoints with header and dummy.c
 #
@@ -74,6 +77,8 @@ LOCAL_EXPORT_C_INCLUDE_DIRS := \
 
 LOCAL_SHARED_LIBRARIES := libdrm
 
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
 
@@ -107,6 +112,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml
 
 LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
 
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
 
@@ -120,13 +127,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
 
 LOCAL_SRC_FILES := $(VULKAN_GEN75_FILES)
 LOCAL_CFLAGS := -DGEN_VERSIONx10=75
-
+LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers
 LOCAL_C_INCLUDES := $(ANV_INCLUDES)
 
 LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml
 
 LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
 
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
 
@@ -140,13 +149,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
 
 LOCAL_SRC_FILES := $(VULKAN_GEN8_FILES)
 LOCAL_CFLAGS := -DGEN_VERSIONx10=80
-
+LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers
 LOCAL_C_INCLUDES := $(ANV_INCLUDES)
 
 LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml
 
 LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
 
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
 
@@ -160,13 +171,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
 
 LOCAL_SRC_FILES := $(VULKAN_GEN9_FILES)
 LOCAL_CFLAGS := -DGEN_VERSIONx10=90
-
+LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers
 LOCAL_C_INCLUDES := $(ANV_INCLUDES)
 
 LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml
 
 LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
 
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
 
@@ -180,13 +193,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
 
 LOCAL_SRC_FILES := $(VULKAN_GEN10_FILES)
 LOCAL_CFLAGS := -DGEN_VERSIONx10=100
-
+LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers
 LOCAL_C_INCLUDES := $(ANV_INCLUDES)
 
 LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml
 
 LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
 
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
 
@@ -207,6 +222,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml
 
 LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
 
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
 
@@ -217,7 +234,7 @@ include $(BUILD_STATIC_LIBRARY)
 include $(CLEAR_VARS)
 LOCAL_MODULE := libmesa_vulkan_common
 LOCAL_MODULE_CLASS := STATIC_LIBRARIES
-
+LOCAL_CFLAGS += -Wno-error
 intermediates := $(call local-generated-sources-dir)
 
 LOCAL_SRC_FILES := $(VULKAN_FILES)
@@ -260,6 +277,8 @@ $(intermediates)/vulkan/anv_extensions.h:
 
 LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
 
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
 
@@ -309,5 +328,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
 
 LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) libz libsync liblog
 
+LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers
+
 include $(MESA_COMMON_MK)
 include $(BUILD_SHARED_LIBRARY)
diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index b00cc8cc2cb..00624084e6f 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -21,7 +21,9 @@
 
 noinst_PROGRAMS += \
 	tools/aubinator \
-	tools/aubinator_error_decode
+	tools/aubinator_error_decode \
+	tools/error2aub
+
 
 tools_aubinator_SOURCES = \
 	tools/aubinator.c \
@@ -59,3 +61,23 @@ tools_aubinator_error_decode_LDADD = \
 tools_aubinator_error_decode_CFLAGS = \
 	$(AM_CFLAGS) \
 	$(ZLIB_CFLAGS)
+
+
+tools_error2aub_SOURCES = \
+	tools/gen_context.h \
+	tools/gen8_context.h \
+	tools/gen10_context.h \
+	tools/aub_write.h \
+	tools/aub_write.c \
+	tools/error2aub.c
+
+tools_error2aub_CFLAGS = \
+	$(AM_CFLAGS) \
+	$(ZLIB_CFLAGS)
+
+tools_error2aub_LDADD = \
+	dev/libintel_dev.la \
+	$(PTHREAD_LIBS) \
+	$(DLOPEN_LIBS) \
+	$(ZLIB_LIBS) \
+	-lm
diff --git a/src/intel/Makefile.vulkan.am b/src/intel/Makefile.vulkan.am
index 9555d98095b..d5112633b5c 100644
--- a/src/intel/Makefile.vulkan.am
+++ b/src/intel/Makefile.vulkan.am
@@ -104,7 +104,7 @@ noinst_LTLIBRARIES += $(VULKAN_PER_GEN_LIBS)
 
 VULKAN_CFLAGS = \
 	$(AM_CFLAGS) \
-	-msse2
+	-msse2 -mstackrealign
 
 VULKAN_CPPFLAGS = \
 	-I$(top_srcdir)/src/compiler \
diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c
index c36ec703b18..ffe26076e16 100644
--- a/src/intel/blorp/blorp.c
+++ b/src/intel/blorp/blorp.c
@@ -75,18 +75,6 @@ brw_blorp_surface_info_init(struct blorp_context *blorp,
    if (format == ISL_FORMAT_UNSUPPORTED)
       format = surf->surf->format;
 
-   if (format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
-      /* Unfortunately, ISL_FORMAT_R24_UNORM_X8_TYPELESS it isn't supported as
-       * a render target, which would prevent us from blitting to 24-bit
-       * depth.  The miptree consists of 32 bits per pixel, arranged as 24-bit
-       * depth values interleaved with 8 "don't care" bits.  Since depth
-       * values don't require any blending, it doesn't matter how we interpret
-       * the bit pattern as long as we copy the right amount of data, so just
-       * map it as 8-bit BGRA.
-       */
-      format = ISL_FORMAT_B8G8R8A8_UNORM;
-   }
-
    info->surf = *surf->surf;
    info->addr = surf->addr;
 
diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index c85ec8543a9..7cc580abd06 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -776,6 +776,14 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
        * grid of samples with in a pixel. Sample number layout shows the
        * rectangular grid of samples roughly corresponding to the real sample
        * locations with in a pixel.
+       *
+       * In the case of 2x MSAA, the layout of sample indices is reversed from
+       * the layout of sample numbers:
+       *
+       * sample index layout :  ---------    sample number layout :  ---------
+       *                        | 0 | 1 |                            | 1 | 0 |
+       *                        ---------                            ---------
+       *
        * In case of 4x MSAA, layout of sample indices matches the layout of
        * sample numbers:
        *           ---------
@@ -819,7 +827,9 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
                                             key->x_scale * key->y_scale));
       sample = nir_f2i32(b, sample);
 
-      if (tex_samples == 8) {
+      if (tex_samples == 2) {
+         sample = nir_isub(b, nir_imm_int(b, 1), sample);
+      } else if (tex_samples == 8) {
          sample = nir_iand(b, nir_ishr(b, nir_imm_int(b, 0x64210573),
                                        nir_ishl(b, sample, nir_imm_int(b, 2))),
                            nir_imm_int(b, 0xf));
@@ -984,14 +994,14 @@ convert_color(struct nir_builder *b, nir_ssa_def *color,
    nir_ssa_def *value;
 
    if (key->dst_format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
-      /* The destination image is bound as R32_UNORM but the data needs to be
+      /* The destination image is bound as R32_UINT but the data needs to be
        * in R24_UNORM_X8_TYPELESS.  The bottom 24 are the actual data and the
        * top 8 need to be zero.  We can accomplish this by simply multiplying
        * by a factor to scale things down.
        */
-      float factor = (float)((1 << 24) - 1) / (float)UINT32_MAX;
-      value = nir_fmul(b, nir_fsat(b, nir_channel(b, color, 0)),
-                          nir_imm_float(b, factor));
+      unsigned factor = (1 << 24) - 1;
+      value = nir_fsat(b, nir_channel(b, color, 0));
+      value = nir_f2i32(b, nir_fmul(b, value, nir_imm_float(b, factor)));
    } else if (key->dst_format == ISL_FORMAT_L8_UNORM_SRGB) {
       value = nir_format_linear_to_srgb(b, nir_channel(b, color, 0));
    } else if (key->dst_format == ISL_FORMAT_R8G8B8_UNORM_SRGB) {
@@ -1976,7 +1986,7 @@ try_blorp_blit(struct blorp_batch *batch,
          isl_format_rgbx_to_rgba(params->dst.view.format);
    } else if (params->dst.view.format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
       wm_prog_key->dst_format = params->dst.view.format;
-      params->dst.view.format = ISL_FORMAT_R32_UNORM;
+      params->dst.view.format = ISL_FORMAT_R32_UINT;
    } else if (params->dst.view.format == ISL_FORMAT_A4B4G4R4_UNORM) {
       params->dst.view.swizzle =
          isl_swizzle_compose(params->dst.view.swizzle,
@@ -2240,6 +2250,17 @@ blorp_blit(struct blorp_batch *batch,
       }
    }
 
+   /* ISL_FORMAT_R24_UNORM_X8_TYPELESS it isn't supported as a render target,
+    * which requires shader math to render to it.  Blitting Z24X8 to Z24X8
+    * is fairly common though, so we'd like to avoid it.  Since we don't need
+    * to blend depth values, we can simply pick a renderable format with the
+    * right number of bits-per-pixel, like 8-bit BGRA.
+    */
+   if (dst_surf->surf->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS &&
+       src_surf->surf->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
+      src_format = dst_format = ISL_FORMAT_B8G8R8A8_UNORM;
+   }
+
    brw_blorp_surface_info_init(batch->blorp, &params.src, src_surf, src_level,
                                src_layer, src_format, false);
    brw_blorp_surface_info_init(batch->blorp, &params.dst, dst_surf, dst_level,
diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h
index 50341ab0ecf..6da2485ad07 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -1628,6 +1628,29 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch,
     */
    blorp_emit_3dstate_multisample(batch, params);
 
+   /* According to the SKL PRM formula for WM_INT::ThreadDispatchEnable, the
+    * 3DSTATE_WM::ForceThreadDispatchEnable field can force WM thread dispatch
+    * even when WM_HZ_OP is active.  However, WM thread dispatch is normally
+    * disabled for HiZ ops and it appears that force-enabling it can lead to
+    * GPU hangs on at least Skylake.  Since we don't know the current state of
+    * the 3DSTATE_WM packet, just emit a dummy one prior to 3DSTATE_WM_HZ_OP.
+    */
+   blorp_emit(batch, GENX(3DSTATE_WM), wm);
+
+   /* From the BDW PRM Volume 7, Depth Buffer Clear:
+    *
+    *    The clear value must be between the min and max depth values
+    *    (inclusive) defined in the CC_VIEWPORT. If the depth buffer format is
+    *    D32_FLOAT, then +/-DENORM values are also allowed.
+    *
+    * Set the bounds to match our hardware limits, [0.0, 1.0].
+    */
+   if (params->depth.enabled && params->hiz_op == ISL_AUX_OP_FAST_CLEAR) {
+      assert(params->depth.clear_color.f32[0] >= 0.0f);
+      assert(params->depth.clear_color.f32[0] <= 1.0f);
+      blorp_emit_cc_viewport(batch);
+   }
+
    /* If we can't alter the depth stencil config and multiple layers are
     * involved, the HiZ op will fail. This is because the op requires that a
     * new config is emitted for each additional layer.
diff --git a/src/intel/common/gen_batch_decoder.c b/src/intel/common/gen_batch_decoder.c
index f5be0018afc..f2510e21b4f 100644
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -642,7 +642,6 @@ decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx,
                               int count)
 {
    struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
-   struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type);
 
    uint32_t state_offset = 0;
 
@@ -664,12 +663,28 @@ decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx,
       return;
    }
 
+   struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type);
+   if (strcmp(struct_type, "BLEND_STATE") == 0) {
+      /* Blend states are different from the others because they have a header
+       * struct called BLEND_STATE which is followed by a variable number of
+       * BLEND_STATE_ENTRY structs.
+       */
+      fprintf(ctx->fp, "%s\n", struct_type);
+      ctx_print_group(ctx, state, state_addr, state_map);
+
+      state_addr += state->dw_length * 4;
+      state_map += state->dw_length * 4;
+
+      struct_type = "BLEND_STATE_ENTRY";
+      state = gen_spec_find_struct(ctx->spec, struct_type);
+   }
+
    for (int i = 0; i < count; i++) {
       fprintf(ctx->fp, "%s %d\n", struct_type, i);
-      ctx_print_group(ctx, state, state_offset, state_map);
+      ctx_print_group(ctx, state, state_addr, state_map);
 
       state_addr += state->dw_length * 4;
-      state_map += state->dw_length;
+      state_map += state->dw_length * 4;
    }
 }
 
diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f5818..8990d208207 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = {
    { "nohiz",       DEBUG_NO_HIZ },
    { "color",       DEBUG_COLOR },
    { "reemit",      DEBUG_REEMIT },
+   { "heur32",      DEBUG_HEUR32 },
    { NULL,    0 }
 };
 
diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h
index 72d7ca20a39..c2ca2e2ebd6 100644
--- a/src/intel/common/gen_debug.h
+++ b/src/intel/common/gen_debug.h
@@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG;
 #define DEBUG_NO_HIZ              (1ull << 39)
 #define DEBUG_COLOR               (1ull << 40)
 #define DEBUG_REEMIT              (1ull << 41)
+#define DEBUG_HEUR32              (1ull << 42)
 
 /* These flags are not compatible with the disk shader cache */
 #define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME
@@ -90,7 +91,7 @@ extern uint64_t INTEL_DEBUG;
 /* These flags may affect program generation */
 #define DEBUG_DISK_CACHE_MASK \
    (DEBUG_NO16 | DEBUG_NO_DUAL_OBJECT_GS | DEBUG_NO8 |  DEBUG_SPILL_FS | \
-   DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32)
+   DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_HEUR32)
 
 #ifdef HAVE_ANDROID_PLATFORM
 #define LOG_TAG "INTEL-MESA"
diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
index ec0a486b101..04de7b7bb81 100644
--- a/src/intel/common/gen_decoder.c
+++ b/src/intel/common/gen_decoder.c
@@ -811,6 +811,18 @@ iter_more_groups(const struct gen_field_iterator *iter)
    }
 }
 
+static void
+iter_start_field(struct gen_field_iterator *iter, struct gen_field *field)
+{
+   iter->field = field;
+
+   int group_member_offset = iter_group_offset_bits(iter, iter->group_iter);
+
+   iter->start_bit = group_member_offset + iter->field->start;
+   iter->end_bit = group_member_offset + iter->field->end;
+   iter->struct_desc = NULL;
+}
+
 static void
 iter_advance_group(struct gen_field_iterator *iter)
 {
@@ -825,32 +837,20 @@ iter_advance_group(struct gen_field_iterator *iter)
       }
    }
 
-   iter->field = iter->group->fields;
+   iter_start_field(iter, iter->group->fields);
 }
 
 static bool
 iter_advance_field(struct gen_field_iterator *iter)
 {
    if (iter_more_fields(iter)) {
-      iter->field = iter->field->next;
+      iter_start_field(iter, iter->field->next);
    } else {
       if (!iter_more_groups(iter))
          return false;
 
       iter_advance_group(iter);
    }
-
-   if (iter->field->name)
-      snprintf(iter->name, sizeof(iter->name), "%s", iter->field->name);
-   else
-      memset(iter->name, 0, sizeof(iter->name));
-
-   int group_member_offset = iter_group_offset_bits(iter, iter->group_iter);
-
-   iter->start_bit = group_member_offset + iter->field->start;
-   iter->end_bit = group_member_offset + iter->field->end;
-   iter->struct_desc = NULL;
-
    return true;
 }
 
@@ -991,7 +991,7 @@ gen_field_iterator_init(struct gen_field_iterator *iter,
    iter->p_bit = p_bit;
 
    int length = gen_group_get_length(iter->group, iter->p);
-   iter->p_end = length > 0 ? &p[length] : NULL;
+   iter->p_end = length >= 0 ? &p[length] : NULL;
    iter->print_colors = print_colors;
 }
 
@@ -1001,15 +1001,19 @@ gen_field_iterator_next(struct gen_field_iterator *iter)
    /* Initial condition */
    if (!iter->field) {
       if (iter->group->fields)
-         iter->field = iter->group->fields;
+         iter_start_field(iter, iter->group->fields);
       else
-         iter->field = iter->group->next->fields;
+         iter_start_field(iter, iter->group->next->fields);
 
       bool result = iter_decode_field(iter);
-      if (iter->p_end)
-         assert(result);
+      if (!result && iter->p_end) {
+         /* We're dealing with a non empty struct of length=0 (BLEND_STATE on
+          * Gen 7.5)
+          */
+         assert(iter->group->dw_length == 0);
+      }
 
-      return true;
+      return result;
    }
 
    if (!iter_advance_field(iter))
diff --git a/src/intel/common/gen_sample_positions.h b/src/intel/common/gen_sample_positions.h
index f0ce95dd1fb..da48dcb5ed0 100644
--- a/src/intel/common/gen_sample_positions.h
+++ b/src/intel/common/gen_sample_positions.h
@@ -42,10 +42,10 @@ prefix##0YOffset   = 0.5;
  * c   1
  */
 #define GEN_SAMPLE_POS_2X(prefix) \
-prefix##0XOffset   = 0.25; \
-prefix##0YOffset   = 0.25; \
-prefix##1XOffset   = 0.75; \
-prefix##1YOffset   = 0.75;
+prefix##0XOffset   = 0.75; \
+prefix##0YOffset   = 0.75; \
+prefix##1XOffset   = 0.25; \
+prefix##1YOffset   = 0.25;
 
 /**
  * Sample positions:
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index c510d34ce2e..824aa637f9a 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -38,6 +38,15 @@ struct ra_regs;
 struct nir_shader;
 struct brw_program;
 
+struct brw_simd32_heuristics_control {
+   bool grouped_sends_check;
+   int max_grouped_sends;
+   bool inst_count_check;
+   float inst_count_ratio;
+   bool mrt_check;
+   int max_mrts;
+};
+
 struct brw_compiler {
    const struct gen_device_info *devinfo;
 
@@ -118,6 +127,8 @@ struct brw_compiler {
     * whether nir_opt_large_constants will be run.
     */
    bool supports_shader_constants;
+
+   struct brw_simd32_heuristics_control simd32_heuristics_control;
 };
 
 /**
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 20b89035e1f..329fb00ec8f 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -393,6 +393,25 @@ fs_inst::can_do_source_mods(const struct gen_device_info *devinfo)
    return true;
 }
 
+bool
+fs_inst::can_do_cmod()
+{
+   if (!backend_instruction::can_do_cmod())
+      return false;
+
+   /* The accumulator result appears to get used for the conditional modifier
+    * generation.  When negating a UD value, there is a 33rd bit generated for
+    * the sign in the accumulator value, so now you can't check, for example,
+    * equality with a 32-bit value.  See piglit fs-op-neg-uvec4.
+    */
+   for (unsigned i = 0; i < sources; i++) {
+      if (type_is_unsigned_int(src[i].type) && src[i].negate)
+         return false;
+   }
+
+   return true;
+}
+
 bool
 fs_inst::can_change_types() const
 {
@@ -5115,6 +5134,25 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
       }
    }
 
+   if (devinfo->gen < 6) {
+      /* From the G45 PRM, Volume 4 Page 361:
+       *
+       *    "Operand Alignment Rule: With the exceptions listed below, a
+       *     source/destination operand in general should be aligned to even
+       *     256-bit physical register with a region size equal to two 256-bit
+       *     physical registers."
+       *
+       * Normally we enforce this by allocating virtual registers to the
+       * even-aligned class.  But we need to handle payload registers.
+       */
+      for (unsigned i = 0; i < inst->sources; i++) {
+         if (inst->src[i].file == FIXED_GRF && (inst->src[i].nr & 1) &&
+             inst->size_read(i) > REG_SIZE) {
+            max_width = MIN2(max_width, 8);
+         }
+      }
+   }
+
    /* From the IVB PRMs:
     *  "When an instruction is SIMD32, the low 16 bits of the execution mask
     *   are applied for both halves of the SIMD32 instruction. If different
@@ -6321,6 +6359,7 @@ fs_visitor::optimize()
    if (OPT(lower_load_payload)) {
       split_virtual_grfs();
       OPT(register_coalesce);
+      OPT(lower_simd_width);
       OPT(compute_to_mrf);
       OPT(dead_code_eliminate);
    }
@@ -7059,6 +7098,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
                char **error_str)
 {
    const struct gen_device_info *devinfo = compiler->devinfo;
+   bool simd16_failed = false;
+   bool simd16_spilled = false;
 
    nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
    shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true);
@@ -7126,10 +7167,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
                      shader_time_index16);
       v16.import_uniforms(&v8);
       if (!v16.run_fs(allow_spilling, use_rep_send)) {
+         simd16_failed = true;
          compiler->shader_perf_log(log_data,
                                    "SIMD16 shader failed to compile: %s",
                                    v16.fail_msg);
       } else {
+         simd16_spilled = v16.spilled_any_registers;
          simd16_cfg = v16.cfg;
          prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;
          prog_data->reg_blocks_16 = brw_register_blocks(v16.grf_used);
@@ -7137,9 +7180,17 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
    }
 
    /* Currently, the compiler only supports SIMD32 on SNB+ */
+   const brw_simd32_heuristics_control *ctrl = &compiler->simd32_heuristics_control;
+   uint64_t mrts = shader->info.outputs_written << FRAG_RESULT_DATA0;
+
    if (v8.max_dispatch_width >= 32 && !use_rep_send &&
        compiler->devinfo->gen >= 6 &&
-       unlikely(INTEL_DEBUG & DEBUG_DO32)) {
+       (unlikely(INTEL_DEBUG & DEBUG_DO32) ||
+        (unlikely(INTEL_DEBUG & DEBUG_HEUR32) &&
+         !simd16_failed && !simd16_spilled &&
+         (!ctrl->mrt_check ||
+          (ctrl->mrt_check &&
+          u_count_bits64(&mrts) <= ctrl->max_mrts))))) {
       /* Try a SIMD32 compile */
       fs_visitor v32(compiler, log_data, mem_ctx, key,
                      &prog_data->base, prog, shader, 32,
@@ -7150,9 +7201,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
                                    "SIMD32 shader failed to compile: %s",
                                    v32.fail_msg);
       } else {
-         simd32_cfg = v32.cfg;
-         prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
-         prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
+         if (likely(!(INTEL_DEBUG & DEBUG_HEUR32)) ||
+              v32.run_heuristic(ctrl)) {
+            simd32_cfg = v32.cfg;
+            prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs;
+            prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used);
+         }
       }
    }
 
@@ -7231,13 +7285,49 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
    }
 
    if (simd32_cfg) {
-      prog_data->dispatch_32 = true;
-      prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32);
+      uint32_t offset = g.generate_code(simd32_cfg, 32);
+
+      if (unlikely(INTEL_DEBUG & DEBUG_DO32) ||
+          (unlikely(INTEL_DEBUG & DEBUG_HEUR32) &&
+           (!simd16_cfg ||
+            (simd16_cfg &&
+             (!ctrl->inst_count_check ||
+             (ctrl->inst_count_check &&
+             (float)g.get_inst_count(32) / (float)g.get_inst_count(16) <= ctrl->inst_count_ratio)))))) {
+         prog_data->dispatch_32 = true;
+         prog_data->prog_offset_32 = offset;
+      }
    }
 
    return g.get_assembly();
 }
 
+bool
+fs_visitor::run_heuristic(const struct brw_simd32_heuristics_control *ctrl) {
+   int grouped_sends = 0;
+   int max_grouped_sends = 0;
+   bool pass = true;
+
+   foreach_block_and_inst(block, fs_inst, inst, cfg) {
+      if (inst->opcode >= SHADER_OPCODE_TEX && inst->opcode <= SHADER_OPCODE_SAMPLEINFO_LOGICAL) {
+         ++grouped_sends;
+      } else if (grouped_sends > 0) {
+         if (grouped_sends > max_grouped_sends) {
+            max_grouped_sends = grouped_sends;
+         }
+         grouped_sends = 0;
+      }
+   }
+
+   if (ctrl->grouped_sends_check) {
+      if (max_grouped_sends > ctrl->max_grouped_sends) {
+         pass = false;
+      }
+   }
+
+   return pass;
+}
+
 fs_reg *
 fs_visitor::emit_cs_work_group_id_setup()
 {
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index d56e33715ee..615aff25ba9 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -281,6 +281,8 @@ class fs_visitor : public backend_shader
    void dump_instruction(backend_instruction *inst);
    void dump_instruction(backend_instruction *inst, FILE *file);
 
+   bool run_heuristic(const struct brw_simd32_heuristics_control *ctrl);
+
    const void *const key;
    const struct brw_sampler_prog_key_data *key_tex;
 
@@ -392,6 +394,7 @@ class fs_generator
 
    void enable_debug(const char *shader_name);
    int generate_code(const cfg_t *cfg, int dispatch_width);
+   int get_inst_count(int dispatch_width);
    const unsigned *get_assembly();
 
 private:
@@ -484,6 +487,7 @@ class fs_generator
    struct brw_stage_prog_data * const prog_data;
 
    unsigned dispatch_width; /**< 8, 16 or 32 */
+   int inst_count[3]; /* for 8, 16 and 32 */
 
    exec_list discard_halt_patches;
    unsigned promoted_constants;
diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h
index 7bee2aa0b9b..0cafaf50e56 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -795,7 +795,7 @@ namespace brw {
              !gen_device_info_is_9lp(shader->devinfo))
             return false;
 
-         if (type_sz(type > 4))
+         if (type_sz(type) > 4)
             return true;
 
          if (opcode == BRW_OPCODE_MUL &&
diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp
index 5b74f267359..5fb522f810f 100644
--- a/src/intel/compiler/brw_fs_cmod_propagation.cpp
+++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp
@@ -211,9 +211,17 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
       /* A CMP with a second source of zero can match with anything.  A CMP
        * with a second source that is not zero can only match with an ADD
        * instruction.
+       *
+       * Only apply this optimization to float-point sources.  It can fail for
+       * integers.  For inputs a = 0x80000000, b = 4, int(0x80000000) < 4, but
+       * int(0x80000000) - 4 overflows and results in 0x7ffffffc.  that's not
+       * less than zero, so the flags get set differently than for (a < b).
        */
       if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) {
-         progress = cmod_propagate_cmp_to_add(devinfo, block, inst) || progress;
+         if (brw_reg_type_is_floating_point(inst->src[0].type) &&
+             cmod_propagate_cmp_to_add(devinfo, block, inst))
+            progress = true;
+
          continue;
       }
 
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index e265d59ccbe..ed97935de91 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2464,6 +2464,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
                               fill_count, promoted_constants, before_size,
                               after_size);
 
+   inst_count[ffs(dispatch_width) - 4] = before_size / 16;
+
    return start_offset;
 }
 
@@ -2472,3 +2474,13 @@ fs_generator::get_assembly()
 {
    return brw_get_program(p, &prog_data->program_size);
 }
+
+int
+fs_generator::get_inst_count(int dispatch_width)
+{
+   if (dispatch_width == 8 || dispatch_width == 16 || dispatch_width == 32) {
+      return inst_count[ffs(dispatch_width) - 4];
+   } else {
+      return 0;
+   }
+}
\ No newline at end of file
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index 92dad269a34..07e7224e0f8 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -354,6 +354,7 @@ class fs_inst : public backend_instruction {
    unsigned components_read(unsigned i) const;
    unsigned size_read(int arg) const;
    bool can_do_source_mods(const struct gen_device_info *devinfo);
+   bool can_do_cmod();
    bool can_change_types() const;
    bool has_source_and_destination_hazard() const;
 
diff --git a/src/intel/compiler/brw_ir_vec4.h b/src/intel/compiler/brw_ir_vec4.h
index e401d8b4d16..65b1e4f3b53 100644
--- a/src/intel/compiler/brw_ir_vec4.h
+++ b/src/intel/compiler/brw_ir_vec4.h
@@ -291,6 +291,7 @@ class vec4_instruction : public backend_instruction {
                       int swizzle, int swizzle_mask);
    void reswizzle(int dst_writemask, int swizzle);
    bool can_do_source_mods(const struct gen_device_info *devinfo);
+   bool can_do_cmod();
    bool can_do_writemask(const struct gen_device_info *devinfo);
    bool can_change_types() const;
    bool has_source_and_destination_hazard() const;
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 31ffbe613ec..29ad68fdb2a 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -713,18 +713,6 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
    nir_validate_shader(*producer);
    nir_validate_shader(*consumer);
 
-   const bool p_is_scalar =
-      compiler->scalar_stage[(*producer)->info.stage];
-   const bool c_is_scalar =
-      compiler->scalar_stage[(*consumer)->info.stage];
-
-   if (p_is_scalar && c_is_scalar) {
-      NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
-      NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
-      *producer = brw_nir_optimize(*producer, compiler, p_is_scalar);
-      *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar);
-   }
-
    NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
    NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
 
@@ -741,7 +729,12 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
       NIR_PASS_V(*consumer, nir_lower_indirect_derefs,
                  brw_nir_no_indirect_mask(compiler, (*consumer)->info.stage));
 
+      const bool p_is_scalar =
+         compiler->scalar_stage[(*producer)->info.stage];
       *producer = brw_nir_optimize(*producer, compiler, p_is_scalar);
+
+      const bool c_is_scalar =
+         compiler->scalar_stage[(*consumer)->info.stage];
       *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar);
    }
 }
diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h
index ac12ab3d2dd..46d66198a1d 100644
--- a/src/intel/compiler/brw_reg.h
+++ b/src/intel/compiler/brw_reg.h
@@ -376,6 +376,15 @@ brw_int_type(unsigned sz, bool is_signed)
    }
 }
 
+static inline bool
+type_is_unsigned_int(enum brw_reg_type tp)
+{
+   return tp == BRW_REGISTER_TYPE_UB ||
+          tp == BRW_REGISTER_TYPE_UW ||
+          tp == BRW_REGISTER_TYPE_UD ||
+          tp == BRW_REGISTER_TYPE_UQ;
+}
+
 /**
  * Construct a brw_reg.
  * \param file      one of the BRW_x_REGISTER_FILE values
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 4e242e03032..e2fa58502f0 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -256,6 +256,26 @@ vec4_instruction::can_do_source_mods(const struct gen_device_info *devinfo)
    return true;
 }
 
+bool
+vec4_instruction::can_do_cmod()
+{
+   if (!backend_instruction::can_do_cmod())
+      return false;
+
+   /* The accumulator result appears to get used for the conditional modifier
+    * generation.  When negating a UD value, there is a 33rd bit generated for
+    * the sign in the accumulator value, so now you can't check, for example,
+    * equality with a 32-bit value.  See piglit fs-op-neg-uvec4.
+    */
+   for (unsigned i = 0; i < 3; i++) {
+      if (src[i].file != BAD_FILE &&
+          type_is_unsigned_int(src[i].type) && src[i].negate)
+         return false;
+   }
+
+   return true;
+}
+
 bool
 vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo)
 {
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp
index d506b675776..888cb358fd1 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -929,8 +929,21 @@ generate_tes_add_indirect_urb_offset(struct brw_codegen *p,
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 
    brw_MOV(p, dst, header);
+
+   /* Uniforms will have a stride <0;4,1>, and we need to convert to <0;1,0>.
+    * Other values get <4;1,0>.
+    */
+   struct brw_reg restrided_offset;
+   if (offset.vstride == BRW_VERTICAL_STRIDE_0 &&
+       offset.width == BRW_WIDTH_4 &&
+       offset.hstride == BRW_HORIZONTAL_STRIDE_1) {
+      restrided_offset = stride(offset, 0, 1, 0);
+   } else {
+      restrided_offset = stride(offset, 4, 1, 0);
+   }
+
    /* m0.3-0.4: 128-bit-granular offsets into the URB from the handles */
-   brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0));
+   brw_MOV(p, vec2(get_element_ud(dst, 3)), restrided_offset);
 
    brw_pop_insn_state(p);
 }
diff --git a/src/intel/compiler/brw_vec4_tes.cpp b/src/intel/compiler/brw_vec4_tes.cpp
index 35aff0f4b78..cf1bff42aa9 100644
--- a/src/intel/compiler/brw_vec4_tes.cpp
+++ b/src/intel/compiler/brw_vec4_tes.cpp
@@ -185,9 +185,19 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
          first_component /= 2;
 
       if (indirect_offset.file != BAD_FILE) {
+         src_reg clamped_indirect_offset = src_reg(this, glsl_type::uvec4_type);
+
+         /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the
+          * valid range of the offset is [0, 0FFFFFFFh].
+          */
+         emit_minmax(BRW_CONDITIONAL_L,
+                     dst_reg(clamped_indirect_offset),
+                     retype(indirect_offset, BRW_REGISTER_TYPE_UD),
+                     brw_imm_ud(0x0fffffffu));
+
          header = src_reg(this, glsl_type::uvec4_type);
          emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
-              input_read_header, indirect_offset);
+              input_read_header, clamped_indirect_offset);
       } else {
          /* Arbitrarily only push up to 24 vec4 slots worth of data,
           * which is 12 registers (since each holds 2 vec4 slots).
diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml
index 541e4405716..be18708df00 100644
--- a/src/intel/genxml/gen10.xml
+++ b/src/intel/genxml/gen10.xml
@@ -3553,6 +3553,14 @@
     <field name="All Allocation" start="25" end="31" type="uint"/>
   </register>
 
+  <register name="CS_CHICKEN1" length="1" num="0x2580">
+    <field name="Replay Mode" start="0" end="0" type="uint">
+      <value name="Mid-cmdbuffer Preemption" value="0"/>
+      <value name="Object Level Preemption" value="1"/>
+    </field>
+    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
+  </register>
+
   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
     <field name="Write Offset" start="2" end="31" type="offset"/>
   </register>
diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml
index bd3800e4b79..8ed46860cd0 100644
--- a/src/intel/genxml/gen11.xml
+++ b/src/intel/genxml/gen11.xml
@@ -3551,6 +3551,14 @@
     <field name="All Allocation" start="25" end="31" type="uint"/>
   </register>
 
+  <register name="CS_CHICKEN1" length="1" num="0x2580">
+    <field name="Replay Mode" start="0" end="0" type="uint">
+      <value name="Mid-cmdbuffer Preemption" value="0"/>
+      <value name="Object Level Preemption" value="1"/>
+    </field>
+    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
+  </register>
+
   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
     <field name="Write Offset" start="2" end="31" type="offset"/>
   </register>
diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml
index 5b01fd45400..dfc3d891498 100644
--- a/src/intel/genxml/gen75.xml
+++ b/src/intel/genxml/gen75.xml
@@ -2314,9 +2314,9 @@
   <instruction name="MI_BATCH_BUFFER_START" bias="2" length="2">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="49"/>
-    <field name="2nd Level Batch Buffer" start="22" end="22" type="uint">
-      <value name="1st level batch" value="0"/>
-      <value name="2nd level batch" value="1"/>
+    <field name="Second Level Batch Buffer" start="22" end="22" type="uint">
+      <value name="First level batch" value="0"/>
+      <value name="Second level batch" value="1"/>
     </field>
     <field name="Add Offset Enable" start="16" end="16" type="bool"/>
     <field name="Predication Enable" start="15" end="15" type="bool"/>
diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml
index 4ed41d15612..330366b7ed0 100644
--- a/src/intel/genxml/gen8.xml
+++ b/src/intel/genxml/gen8.xml
@@ -2553,9 +2553,9 @@
   <instruction name="MI_BATCH_BUFFER_START" bias="2" length="3">
     <field name="Command Type" start="29" end="31" type="uint" default="0"/>
     <field name="MI Command Opcode" start="23" end="28" type="uint" default="49"/>
-    <field name="2nd Level Batch Buffer" start="22" end="22" type="uint">
-      <value name="1st level batch" value="0"/>
-      <value name="2nd level batch" value="1"/>
+    <field name="Second Level Batch Buffer" start="22" end="22" type="uint">
+      <value name="First level batch" value="0"/>
+      <value name="Second level batch" value="1"/>
     </field>
     <field name="Add Offset Enable" start="16" end="16" type="bool"/>
     <field name="Predication Enable" start="15" end="15" type="bool"/>
diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml
index 318ae89d5e7..0c7c606c6b6 100644
--- a/src/intel/genxml/gen9.xml
+++ b/src/intel/genxml/gen9.xml
@@ -3491,6 +3491,14 @@
     <field name="All Allocation" start="25" end="31" type="uint"/>
   </register>
 
+  <register name="CS_CHICKEN1" length="1" num="0x2580">
+    <field name="Replay Mode" start="0" end="0" type="uint">
+      <value name="Mid-cmdbuffer Preemption" value="0"/>
+      <value name="Object Level Preemption" value="1"/>
+    </field>
+    <field name="Replay Mode Mask" start="16" end="16" type="bool"/>
+  </register>
+
   <register name="SO_WRITE_OFFSET0" length="1" num="0x5280">
     <field name="Write Offset" start="2" end="31" type="offset"/>
   </register>
diff --git a/src/intel/isl/isl_gen7.c b/src/intel/isl/isl_gen7.c
index 4fa9851233f..a9db21fba52 100644
--- a/src/intel/isl/isl_gen7.c
+++ b/src/intel/isl/isl_gen7.c
@@ -294,6 +294,29 @@ isl_gen6_filter_tiling(const struct isl_device *dev,
     */
    if (ISL_DEV_GEN(dev) < 7 && isl_format_get_layout(info->format)->bpb >= 128)
       *flags &= ~ISL_TILING_Y0_BIT;
+
+   /* From the BDW and SKL PRMs, Volume 2d,
+    * RENDER_SURFACE_STATE::Width - Programming Notes:
+    *
+    *   A known issue exists if a primitive is rendered to the first 2 rows and
+    *   last 2 columns of a 16K width surface. If any geometry is drawn inside
+    *   this square it will be copied to column X=2 and X=3 (arrangement on Y
+    *   position will stay the same). If any geometry exceeds the boundaries of
+    *   this 2x2 region it will be drawn normally. The issue also only occurs
+    *   if the surface has TileMode != Linear.
+    *
+    * [Internal documentation notes that this issue isn't present on SKL GT4.]
+    * To prevent this rendering corruption, only allow linear tiling for
+    * surfaces with widths greater than 16K-2 pixels.
+    *
+    * TODO: Is this an issue for multisampled surfaces as well?
+    */
+   if (info->width > 16382 && info->samples == 1 &&
+       info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT &&
+       (ISL_DEV_GEN(dev) == 8 ||
+        (dev->info->is_skylake && dev->info->gt != 4))) {
+          *flags &= ISL_TILING_LINEAR_BIT;
+   }
 }
 
 void
diff --git a/src/intel/meson.build b/src/intel/meson.build
index ccaf16a76f9..b53728c026c 100644
--- a/src/intel/meson.build
+++ b/src/intel/meson.build
@@ -18,6 +18,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+c_sse2_args = ['-msse2', '-mstackrealign']
 inc_intel = include_directories('.')
 
 subdir('blorp')
diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
index 8989d558b66..3fec04c24c4 100644
--- a/src/intel/tools/aubinator.c
+++ b/src/intel/tools/aubinator.c
@@ -590,7 +590,7 @@ handle_memtrace_reg_write(uint32_t *p)
    uint32_t pphwsp_addr = context_descriptor & 0xfffff000;
    struct gen_batch_decode_bo pphwsp_bo = get_ggtt_batch_bo(NULL, pphwsp_addr);
    uint32_t *context = (uint32_t *)((uint8_t *)pphwsp_bo.map +
-                                    (pphwsp_bo.addr - pphwsp_addr) +
+                                    (pphwsp_addr - pphwsp_bo.addr) +
                                     pphwsp_size);
 
    uint32_t ring_buffer_head = context[5];
@@ -601,7 +601,7 @@ handle_memtrace_reg_write(uint32_t *p)
    struct gen_batch_decode_bo ring_bo = get_ggtt_batch_bo(NULL,
                                                           ring_buffer_start);
    assert(ring_bo.size > 0);
-   void *commands = (uint8_t *)ring_bo.map + (ring_bo.addr - ring_buffer_start);
+   void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr);
 
    if (context_descriptor & 0x100 /* ppgtt */) {
       batch_ctx.get_bo = get_ppgtt_batch_bo;
diff --git a/src/intel/tools/error2aub.c b/src/intel/tools/error2aub.c
index 3407dcec0b7..68a5b96e109 100644
--- a/src/intel/tools/error2aub.c
+++ b/src/intel/tools/error2aub.c
@@ -29,6 +29,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stdarg.h>
 #include <zlib.h>
 
 #include "aub_write.h"
@@ -205,7 +206,7 @@ main(int argc, char *argv[])
       BO_TYPE_UNKNOWN = 0,
       BO_TYPE_BATCH,
       BO_TYPE_USER,
-   } bo_type;
+   } bo_type = BO_TYPE_UNKNOWN;
    uint64_t bo_addr;
 
    char *line = NULL;
diff --git a/src/intel/tools/meson.build b/src/intel/tools/meson.build
index 37bc04d8301..bef0af02f90 100644
--- a/src/intel/tools/meson.build
+++ b/src/intel/tools/meson.build
@@ -70,7 +70,7 @@ if with_tools.contains('intel')
     dependencies : [dep_dl, dep_thread],
     include_directories : [inc_common, inc_intel, inc_drm_uapi],
     link_with : [libintel_common, libmesa_util],
-    c_args : [c_vis_args, no_override_init_args],
+    c_args : [c_vis_args, no_override_init_args, c_sse2_args],
     install_dir: get_option('libexecdir'),
     install: true,
   )
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index ab01d46cbeb..67f2f73aa11 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -1253,7 +1253,8 @@ anv_bo_cache_lookup(struct anv_bo_cache *cache, uint32_t gem_handle)
    (EXEC_OBJECT_WRITE | \
     EXEC_OBJECT_ASYNC | \
     EXEC_OBJECT_SUPPORTS_48B_ADDRESS | \
-    EXEC_OBJECT_PINNED)
+    EXEC_OBJECT_PINNED | \
+    ANV_BO_EXTERNAL)
 
 VkResult
 anv_bo_cache_alloc(struct anv_device *device,
@@ -1311,6 +1312,7 @@ anv_bo_cache_import(struct anv_device *device,
                     struct anv_bo **bo_out)
 {
    assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS));
+   assert(bo_flags & ANV_BO_EXTERNAL);
 
    pthread_mutex_lock(&cache->mutex);
 
@@ -1327,7 +1329,7 @@ anv_bo_cache_import(struct anv_device *device,
        * client has imported a BO twice in different ways and they get what
        * they have coming.
        */
-      uint64_t new_flags = 0;
+      uint64_t new_flags = ANV_BO_EXTERNAL;
       new_flags |= (bo->bo.flags | bo_flags) & EXEC_OBJECT_WRITE;
       new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_ASYNC;
       new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
@@ -1356,6 +1358,7 @@ anv_bo_cache_import(struct anv_device *device,
       if ((new_flags & EXEC_OBJECT_PINNED) &&
           (bo->bo.flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) !=
           (bo_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) {
+         pthread_mutex_unlock(&cache->mutex);
          return vk_errorf(device->instance, NULL,
                           VK_ERROR_INVALID_EXTERNAL_HANDLE,
                           "The same BO was imported on two different heaps");
@@ -1411,6 +1414,12 @@ anv_bo_cache_export(struct anv_device *device,
    assert(anv_bo_cache_lookup(cache, bo_in->gem_handle) == bo_in);
    struct anv_cached_bo *bo = (struct anv_cached_bo *)bo_in;
 
+   /* This BO must have been flagged external in order for us to be able
+    * to export it.  This is done based on external options passed into
+    * anv_AllocateMemory.
+    */
+   assert(bo->bo.flags & ANV_BO_EXTERNAL);
+
    int fd = anv_gem_handle_to_fd(device, bo->bo.gem_handle);
    if (fd < 0)
       return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c
index a3bab8087b4..dd8df331349 100644
--- a/src/intel/vulkan/anv_android.c
+++ b/src/intel/vulkan/anv_android.c
@@ -128,7 +128,7 @@ anv_image_from_gralloc(VkDevice device_h,
     */
    int dma_buf = gralloc_info->handle->data[0];
 
-   uint64_t bo_flags = 0;
+   uint64_t bo_flags = ANV_BO_EXTERNAL;
    if (device->instance->physicalDevice.supports_48bit_addresses)
       bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
    if (device->instance->physicalDevice.use_softpin)
@@ -234,7 +234,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
    *grallocUsage = 0;
    intel_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage);
 
-   /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags
+   /* WARNING: Android's libvulkan.so hardcodes the VkImageUsageFlags
     * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags.
     * The relevant code in libvulkan/swapchain.cpp contains this fun comment:
     *
@@ -247,7 +247,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
     * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
     */
 
-   const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
+   VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
       .format = format,
       .type = VK_IMAGE_TYPE_2D,
@@ -255,6 +255,17 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
       .usage = imageUsage,
    };
 
+   /* Android P and earlier doesn't check if the physical device supports a
+    * given format and usage combination before calling this function. Omit the
+    * storage requirement to make the tests pass.
+    */
+#if ANDROID_API_LEVEL <= 28
+   if (format == VK_FORMAT_R8G8B8A8_SRGB ||
+       format == VK_FORMAT_R5G6B5_UNORM_PACK16) {
+      image_format_info.usage &= ~VK_IMAGE_USAGE_STORAGE_BIT;
+   }
+#endif
+
    VkImageFormatProperties2KHR image_format_props = {
       .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR,
    };
@@ -268,19 +279,13 @@ VkResult anv_GetSwapchainGrallocUsageANDROID(
                        "inside %s", __func__);
    }
 
-   /* Reject STORAGE here to avoid complexity elsewhere. */
-   if (imageUsage & VK_IMAGE_USAGE_STORAGE_BIT) {
-      return vk_errorf(device->instance, device, VK_ERROR_FORMAT_NOT_SUPPORTED,
-                       "VK_IMAGE_USAGE_STORAGE_BIT unsupported for gralloc "
-                       "swapchain");
-   }
-
    if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT |
                              VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
       *grallocUsage |= GRALLOC_USAGE_HW_RENDER;
 
    if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
                              VK_IMAGE_USAGE_SAMPLED_BIT |
+                             VK_IMAGE_USAGE_STORAGE_BIT |
                              VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
       *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
 
diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
index c47a81c8a4d..e08e07ad7bd 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -531,7 +531,7 @@ emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
    anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, bbs) {
       bbs.DWordLength               = cmd_buffer->device->info.gen < 8 ?
                                       gen7_length : gen8_length;
-      bbs._2ndLevelBatchBuffer      = _1stlevelbatch;
+      bbs.SecondLevelBatchBuffer    = Firstlevelbatch;
       bbs.AddressSpaceIndicator     = ASI_PPGTT;
       bbs.BatchBufferStartAddress   = (struct anv_address) { bo, offset };
    }
@@ -894,8 +894,17 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
           * It doesn't matter where it points now so long as has a valid
           * relocation.  We'll adjust it later as part of the chaining
           * process.
+          *
+          * We set the end of the batch a little short so we would be sure we
+          * have room for the chaining command.  Since we're about to emit the
+          * chaining command, let's set it back where it should go.
           */
+         cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
+         assert(cmd_buffer->batch.start == batch_bo->bo.map);
+         assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size);
+
          emit_batch_buffer_start(cmd_buffer, &batch_bo->bo, 0);
+         assert(cmd_buffer->batch.start == batch_bo->bo.map);
       } else {
          cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
       }
@@ -1088,7 +1097,7 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
       obj->relocs_ptr = 0;
       obj->alignment = 0;
       obj->offset = bo->offset;
-      obj->flags = bo->flags | extra_flags;
+      obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
       obj->rsvd1 = 0;
       obj->rsvd2 = 0;
    }
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index cd67cc636b2..1cc632bdb0d 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -155,7 +155,7 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device,
       .addr = {
          .buffer = buffer->address.bo,
          .offset = buffer->address.offset + offset,
-         .mocs = device->default_mocs,
+         .mocs = anv_mocs_for_bo(device, buffer->address.bo),
       },
    };
 
@@ -208,7 +208,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device,
       .addr = {
          .buffer = image->planes[plane].address.bo,
          .offset = image->planes[plane].address.offset + surface->offset,
-         .mocs = device->default_mocs,
+         .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
       },
    };
 
@@ -218,7 +218,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device,
       blorp_surf->aux_addr = (struct blorp_address) {
          .buffer = image->planes[plane].address.bo,
          .offset = image->planes[plane].address.offset + aux_surface->offset,
-         .mocs = device->default_mocs,
+         .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo),
       };
       blorp_surf->aux_usage = aux_usage;
 
@@ -532,81 +532,86 @@ void anv_CmdBlitImage(
       const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
       const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
 
-      get_blorp_surf_for_anv_image(cmd_buffer->device,
-                                   src_image, src_res->aspectMask,
-                                   srcImageLayout, ISL_AUX_USAGE_NONE, &src);
-      get_blorp_surf_for_anv_image(cmd_buffer->device,
-                                   dst_image, dst_res->aspectMask,
-                                   dstImageLayout, ISL_AUX_USAGE_NONE, &dst);
-
-      struct anv_format_plane src_format =
-         anv_get_format_plane(&cmd_buffer->device->info, src_image->vk_format,
-                              src_res->aspectMask, src_image->tiling);
-      struct anv_format_plane dst_format =
-         anv_get_format_plane(&cmd_buffer->device->info, dst_image->vk_format,
-                              dst_res->aspectMask, dst_image->tiling);
-
-      unsigned dst_start, dst_end;
-      if (dst_image->type == VK_IMAGE_TYPE_3D) {
-         assert(dst_res->baseArrayLayer == 0);
-         dst_start = pRegions[r].dstOffsets[0].z;
-         dst_end = pRegions[r].dstOffsets[1].z;
-      } else {
-         dst_start = dst_res->baseArrayLayer;
-         dst_end = dst_start + anv_get_layerCount(dst_image, dst_res);
-      }
-
-      unsigned src_start, src_end;
-      if (src_image->type == VK_IMAGE_TYPE_3D) {
-         assert(src_res->baseArrayLayer == 0);
-         src_start = pRegions[r].srcOffsets[0].z;
-         src_end = pRegions[r].srcOffsets[1].z;
-      } else {
-         src_start = src_res->baseArrayLayer;
-         src_end = src_start + anv_get_layerCount(src_image, src_res);
-      }
-
-      bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
-      float src_z_step = (float)(src_end + 1 - src_start) /
-                         (float)(dst_end + 1 - dst_start);
+      assert(anv_image_aspects_compatible(src_res->aspectMask,
+                                          dst_res->aspectMask));
+
+      uint32_t aspect_bit;
+      anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) {
+         get_blorp_surf_for_anv_image(cmd_buffer->device,
+                                      src_image, 1U << aspect_bit,
+                                      srcImageLayout, ISL_AUX_USAGE_NONE, &src);
+         get_blorp_surf_for_anv_image(cmd_buffer->device,
+                                      dst_image, 1U << aspect_bit,
+                                      dstImageLayout, ISL_AUX_USAGE_NONE, &dst);
+
+         struct anv_format_plane src_format =
+            anv_get_format_plane(&cmd_buffer->device->info, src_image->vk_format,
+                                 1U << aspect_bit, src_image->tiling);
+         struct anv_format_plane dst_format =
+            anv_get_format_plane(&cmd_buffer->device->info, dst_image->vk_format,
+                                 1U << aspect_bit, dst_image->tiling);
+
+         unsigned dst_start, dst_end;
+         if (dst_image->type == VK_IMAGE_TYPE_3D) {
+            assert(dst_res->baseArrayLayer == 0);
+            dst_start = pRegions[r].dstOffsets[0].z;
+            dst_end = pRegions[r].dstOffsets[1].z;
+         } else {
+            dst_start = dst_res->baseArrayLayer;
+            dst_end = dst_start + anv_get_layerCount(dst_image, dst_res);
+         }
 
-      if (flip_z) {
-         src_start = src_end;
-         src_z_step *= -1;
-      }
+         unsigned src_start, src_end;
+         if (src_image->type == VK_IMAGE_TYPE_3D) {
+            assert(src_res->baseArrayLayer == 0);
+            src_start = pRegions[r].srcOffsets[0].z;
+            src_end = pRegions[r].srcOffsets[1].z;
+         } else {
+            src_start = src_res->baseArrayLayer;
+            src_end = src_start + anv_get_layerCount(src_image, src_res);
+         }
 
-      unsigned src_x0 = pRegions[r].srcOffsets[0].x;
-      unsigned src_x1 = pRegions[r].srcOffsets[1].x;
-      unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
-      unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
-      bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
+         bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
+         float src_z_step = (float)(src_end + 1 - src_start) /
+            (float)(dst_end + 1 - dst_start);
 
-      unsigned src_y0 = pRegions[r].srcOffsets[0].y;
-      unsigned src_y1 = pRegions[r].srcOffsets[1].y;
-      unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
-      unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
-      bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
+         if (flip_z) {
+            src_start = src_end;
+            src_z_step *= -1;
+         }
 
-      const unsigned num_layers = dst_end - dst_start;
-      anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
-                                        dst_res->aspectMask,
-                                        dst.aux_usage,
-                                        dst_res->mipLevel,
-                                        dst_start, num_layers);
-
-      for (unsigned i = 0; i < num_layers; i++) {
-         unsigned dst_z = dst_start + i;
-         unsigned src_z = src_start + i * src_z_step;
-
-         blorp_blit(&batch, &src, src_res->mipLevel, src_z,
-                    src_format.isl_format, src_format.swizzle,
-                    &dst, dst_res->mipLevel, dst_z,
-                    dst_format.isl_format, dst_format.swizzle,
-                    src_x0, src_y0, src_x1, src_y1,
-                    dst_x0, dst_y0, dst_x1, dst_y1,
-                    blorp_filter, flip_x, flip_y);
+         unsigned src_x0 = pRegions[r].srcOffsets[0].x;
+         unsigned src_x1 = pRegions[r].srcOffsets[1].x;
+         unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
+         unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
+         bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
+
+         unsigned src_y0 = pRegions[r].srcOffsets[0].y;
+         unsigned src_y1 = pRegions[r].srcOffsets[1].y;
+         unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
+         unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
+         bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
+
+         const unsigned num_layers = dst_end - dst_start;
+         anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
+                                           1U << aspect_bit,
+                                           dst.aux_usage,
+                                           dst_res->mipLevel,
+                                           dst_start, num_layers);
+
+         for (unsigned i = 0; i < num_layers; i++) {
+            unsigned dst_z = dst_start + i;
+            unsigned src_z = src_start + i * src_z_step;
+
+            blorp_blit(&batch, &src, src_res->mipLevel, src_z,
+                       src_format.isl_format, src_format.swizzle,
+                       &dst, dst_res->mipLevel, dst_z,
+                       dst_format.isl_format, dst_format.swizzle,
+                       src_x0, src_y0, src_x1, src_y1,
+                       dst_x0, dst_y0, dst_x1, dst_y1,
+                       blorp_filter, flip_x, flip_y);
+         }
       }
-
    }
 
    blorp_batch_finish(&batch);
@@ -663,12 +668,12 @@ void anv_CmdCopyBuffer(
       struct blorp_address src = {
          .buffer = src_buffer->address.bo,
          .offset = src_buffer->address.offset + pRegions[r].srcOffset,
-         .mocs = cmd_buffer->device->default_mocs,
+         .mocs = anv_mocs_for_bo(cmd_buffer->device, src_buffer->address.bo),
       };
       struct blorp_address dst = {
          .buffer = dst_buffer->address.bo,
          .offset = dst_buffer->address.offset + pRegions[r].dstOffset,
-         .mocs = cmd_buffer->device->default_mocs,
+         .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
       };
 
       blorp_buffer_copy(&batch, src, dst, pRegions[r].size);
@@ -721,7 +726,7 @@ void anv_CmdUpdateBuffer(
       struct blorp_address dst = {
          .buffer = dst_buffer->address.bo,
          .offset = dst_buffer->address.offset + dstOffset,
-         .mocs = cmd_buffer->device->default_mocs,
+         .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo),
       };
 
       blorp_buffer_copy(&batch, src, dst, copy_size);
@@ -1431,7 +1436,8 @@ anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
          .buffer = image->planes[0].address.bo,
          .offset = image->planes[0].address.offset +
                    image->planes[0].shadow_surface.offset,
-         .mocs = cmd_buffer->device->default_mocs,
+         .mocs = anv_mocs_for_bo(cmd_buffer->device,
+                                 image->planes[0].address.bo),
       },
    };
 
@@ -1599,6 +1605,24 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
                                    ISL_AUX_USAGE_NONE, &stencil);
    }
 
+   /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear":
+    *
+    *    "The following is required when performing a depth buffer clear with
+    *    using the WM_STATE or 3DSTATE_WM:
+    *
+    *       * If other rendering operations have preceded this clear, a
+    *         PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
+    *         enabled must be issued before the rectangle primitive used for
+    *         the depth buffer clear operation.
+    *       * [...]"
+    *
+    * Even though the PRM only says that this is required if using 3DSTATE_WM
+    * and a 3DPRIMITIVE, it appears to also sometimes hang when doing a clear
+    * with WM_HZ_OP.
+    */
+   cmd_buffer->state.pending_pipe_bits |=
+      ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT;
+
    blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil,
                                  level, base_layer, layer_count,
                                  area.offset.x, area.offset.y,
@@ -1613,18 +1637,22 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
 
    /* From the SKL PRM, Depth Buffer Clear:
     *
-    * Depth Buffer Clear Workaround
-    * Depth buffer clear pass using any of the methods (WM_STATE, 3DSTATE_WM
-    * or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL command with
-    * DEPTH_STALL bit and Depth FLUSH bits “set” before starting to render.
-    * DepthStall and DepthFlush are not needed between consecutive depth clear
-    * passes nor is it required if the depth-clear pass was done with
-    * “full_surf_clear” bit set in the 3DSTATE_WM_HZ_OP.
+    *    "Depth Buffer Clear Workaround
+    *
+    *    Depth buffer clear pass using any of the methods (WM_STATE,
+    *    3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL
+    *    command with DEPTH_STALL bit and Depth FLUSH bits “set” before
+    *    starting to render.  DepthStall and DepthFlush are not needed between
+    *    consecutive depth clear passes nor is it required if the depth-clear
+    *    pass was done with “full_surf_clear” bit set in the
+    *    3DSTATE_WM_HZ_OP."
+    *
+    * Even though the PRM provides a bunch of conditions under which this is
+    * supposedly unnecessary, we choose to perform the flush unconditionally
+    * just to be safe.
     */
-   if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
-      cmd_buffer->state.pending_pipe_bits |=
-         ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT;
-   }
+   cmd_buffer->state.pending_pipe_bits |=
+      ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT;
 }
 
 void
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index c40b94d69f3..50d16b8204b 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -610,20 +610,33 @@ VkResult anv_CreateInstance(
    else
       instance->alloc = default_alloc;
 
-   if (pCreateInfo->pApplicationInfo &&
-       pCreateInfo->pApplicationInfo->apiVersion != 0) {
-      instance->apiVersion = pCreateInfo->pApplicationInfo->apiVersion;
-   } else {
-      anv_EnumerateInstanceVersion(&instance->apiVersion);
+   instance->app_info = (struct anv_app_info) { .api_version = 0 };
+   if (pCreateInfo->pApplicationInfo) {
+      const VkApplicationInfo *app = pCreateInfo->pApplicationInfo;
+
+      instance->app_info.app_name =
+         vk_strdup(&instance->alloc, app->pApplicationName,
+                   VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+      instance->app_info.app_version = app->applicationVersion;
+
+      instance->app_info.engine_name =
+         vk_strdup(&instance->alloc, app->pEngineName,
+                   VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+      instance->app_info.engine_version = app->engineVersion;
+
+      instance->app_info.api_version = app->apiVersion;
    }
 
+   if (instance->app_info.api_version == 0)
+      anv_EnumerateInstanceVersion(&instance->app_info.api_version);
+
    instance->enabled_extensions = enabled_extensions;
 
    for (unsigned i = 0; i < ARRAY_SIZE(instance->dispatch.entrypoints); i++) {
       /* Vulkan requires that entrypoints for extensions which have not been
        * enabled must not be advertised.
        */
-      if (!anv_entrypoint_is_enabled(i, instance->apiVersion,
+      if (!anv_entrypoint_is_enabled(i, instance->app_info.api_version,
                                      &instance->enabled_extensions, NULL)) {
          instance->dispatch.entrypoints[i] = NULL;
       } else if (anv_dispatch_table.entrypoints[i] != NULL) {
@@ -669,6 +682,9 @@ void anv_DestroyInstance(
       anv_physical_device_finish(&instance->physicalDevice);
    }
 
+   vk_free(&instance->alloc, instance->app_info.app_name);
+   vk_free(&instance->alloc, instance->app_info.engine_name);
+
    VG(VALGRIND_DESTROY_MEMPOOL(instance));
 
    vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
@@ -841,6 +857,15 @@ void anv_GetPhysicalDeviceFeatures(
    pFeatures->vertexPipelineStoresAndAtomics =
       pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
       pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
+
+   struct anv_app_info *app_info = &pdevice->instance->app_info;
+
+   /* The new DOOM and Wolfenstein games require depthBounds without
+    * checking for it.  They seem to run fine without it so just claim it's
+    * there and accept the consequences.
+    */
+   if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
+      pFeatures->depthBounds = true;
 }
 
 void anv_GetPhysicalDeviceFeatures2(
@@ -909,6 +934,14 @@ void anv_GetPhysicalDeviceFeatures2(
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
+         VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
+            (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
+         features->vertexAttributeInstanceRateDivisor = VK_TRUE;
+         features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
+         break;
+      }
+
       default:
          anv_debug_ignored_stype(ext->sType);
          break;
@@ -1150,6 +1183,13 @@ void anv_GetPhysicalDeviceProperties2(
          break;
       }
 
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
+         VkPhysicalDeviceProtectedMemoryProperties *props =
+            (VkPhysicalDeviceProtectedMemoryProperties *)ext;
+         props->protectedNoFault = false;
+         break;
+      }
+
       default:
          anv_debug_ignored_stype(ext->sType);
          break;
@@ -1489,7 +1529,7 @@ anv_device_init_dispatch(struct anv_device *device)
       /* Vulkan requires that entrypoints for extensions which have not been
        * enabled must not be advertised.
        */
-      if (!anv_entrypoint_is_enabled(i, device->instance->apiVersion,
+      if (!anv_entrypoint_is_enabled(i, device->instance->app_info.api_version,
                                      &device->instance->enabled_extensions,
                                      &device->enabled_extensions)) {
          device->dispatch.entrypoints[i] = NULL;
@@ -2176,8 +2216,8 @@ VkResult anv_AllocateMemory(
              fd_info->handleType ==
                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
 
-      result = anv_bo_cache_import(device, &device->bo_cache,
-                                   fd_info->fd, bo_flags, &mem->bo);
+      result = anv_bo_cache_import(device, &device->bo_cache, fd_info->fd,
+                                   bo_flags | ANV_BO_EXTERNAL, &mem->bo);
       if (result != VK_SUCCESS)
          goto fail;
 
@@ -2214,6 +2254,11 @@ VkResult anv_AllocateMemory(
        */
       close(fd_info->fd);
    } else {
+      const VkExportMemoryAllocateInfoKHR *fd_info =
+         vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
+      if (fd_info && fd_info->handleTypes)
+         bo_flags |= ANV_BO_EXTERNAL;
+
       result = anv_bo_cache_alloc(device, &device->bo_cache,
                                   pAllocateInfo->allocationSize, bo_flags,
                                   &mem->bo);
diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
index cffc3e700cb..c2b79b51637 100644
--- a/src/intel/vulkan/anv_extensions.py
+++ b/src/intel/vulkan/anv_extensions.py
@@ -72,9 +72,9 @@ def __init__(self, version, enable):
 EXTENSIONS = [
     Extension('VK_ANDROID_native_buffer',                 5, 'ANDROID'),
     Extension('VK_KHR_16bit_storage',                     1, 'device->info.gen >= 8'),
-    Extension('VK_KHR_8bit_storage',                      1, 'device->info.gen >= 8'),
+    Extension('VK_KHR_8bit_storage',                      1, 'device->info.gen >= 8 && !ANDROID'),
     Extension('VK_KHR_bind_memory2',                      1, True),
-    Extension('VK_KHR_create_renderpass2',                1, True),
+    Extension('VK_KHR_create_renderpass2',                1, '!ANDROID'),
     Extension('VK_KHR_dedicated_allocation',              1, True),
     Extension('VK_KHR_descriptor_update_template',        1, True),
     Extension('VK_KHR_device_group',                      1, True),
@@ -123,7 +123,7 @@ def __init__(self, version, enable):
               'device->has_context_priority'),
     Extension('VK_EXT_shader_viewport_index_layer',       1, True),
     Extension('VK_EXT_shader_stencil_export',             1, 'device->info.gen >= 9'),
-    Extension('VK_EXT_vertex_attribute_divisor',          2, True),
+    Extension('VK_EXT_vertex_attribute_divisor',          3, True),
     Extension('VK_EXT_post_depth_coverage',               1, 'device->info.gen >= 9'),
 ]
 
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 36d4ac13c75..28d70967b05 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -1095,7 +1095,7 @@ anv_image_fill_surface_state(struct anv_device *device,
                             .size = surface->isl.size,
                             .format = ISL_FORMAT_RAW,
                             .stride = 1,
-                            .mocs = device->default_mocs);
+                            .mocs = anv_mocs_for_bo(device, address.bo));
       state_inout->address = address,
       state_inout->aux_address = ANV_NULL_ADDRESS;
       state_inout->clear_address = ANV_NULL_ADDRESS;
@@ -1196,7 +1196,8 @@ anv_image_fill_surface_state(struct anv_device *device,
                           .aux_address = anv_address_physical(aux_address),
                           .clear_address = anv_address_physical(clear_address),
                           .use_clear_address = !anv_address_is_null(clear_address),
-                          .mocs = device->default_mocs,
+                          .mocs = anv_mocs_for_bo(device,
+                                                  state_inout->address.bo),
                           .x_offset_sa = tile_x_sa,
                           .y_offset_sa = tile_y_sa);
 
diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c
index 06db5787a9c..f6b8ded20a9 100644
--- a/src/intel/vulkan/anv_intel.c
+++ b/src/intel/vulkan/anv_intel.c
@@ -64,7 +64,8 @@ VkResult anv_CreateDmaBufImageINTEL(
          .samples = 1,
          /* FIXME: Need a way to use X tiling to allow scanout */
          .tiling = VK_IMAGE_TILING_OPTIMAL,
-         .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+         .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+                  VK_IMAGE_USAGE_SAMPLED_BIT,
          .flags = 0,
       }},
       pAllocator, &image_h);
@@ -73,7 +74,7 @@ VkResult anv_CreateDmaBufImageINTEL(
 
    image = anv_image_from_handle(image_h);
 
-   uint64_t bo_flags = 0;
+   uint64_t bo_flags = ANV_BO_EXTERNAL;
    if (device->instance->physicalDevice.supports_48bit_addresses)
       bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
    if (device->instance->physicalDevice.use_softpin)
diff --git a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
index 5a971d9be39..71e511f34b7 100644
--- a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
+++ b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c
@@ -340,18 +340,16 @@ try_lower_tex_ycbcr(struct anv_pipeline_layout *layout,
    if (binding->immutable_samplers == NULL)
       return false;
 
-   unsigned texture_index = tex->texture_index;
+   assert(tex->texture_index == 0);
+   unsigned array_index = 0;
    if (deref->deref_type != nir_deref_type_var) {
       assert(deref->deref_type == nir_deref_type_array);
       nir_const_value *const_index = nir_src_as_const_value(deref->arr.index);
       if (!const_index)
          return false;
-      size_t hw_binding_size =
-         anv_descriptor_set_binding_layout_get_hw_size(binding);
-      texture_index += MIN2(const_index->u32[0], hw_binding_size - 1);
+      array_index = MIN2(const_index->u32[0], binding->array_size - 1);
    }
-   const struct anv_sampler *sampler =
-      binding->immutable_samplers[texture_index];
+   const struct anv_sampler *sampler = binding->immutable_samplers[array_index];
 
    if (sampler->conversion == NULL)
       return false;
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 1660fcbbc87..1e7c87c70d0 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -555,6 +555,10 @@ anv_multialloc_alloc2(struct anv_multialloc *ma,
    return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope);
 }
 
+/* Extra ANV-defined BO flags which won't be passed to the kernel */
+#define ANV_BO_EXTERNAL    (1ull << 31)
+#define ANV_BO_FLAG_MASK   (1ull << 31)
+
 struct anv_bo {
    uint32_t gem_handle;
 
@@ -882,12 +886,21 @@ struct anv_physical_device {
     int                                         master_fd;
 };
 
+struct anv_app_info {
+   const char*        app_name;
+   uint32_t           app_version;
+   const char*        engine_name;
+   uint32_t           engine_version;
+   uint32_t           api_version;
+};
+
 struct anv_instance {
     VK_LOADER_DATA                              _loader_data;
 
     VkAllocationCallbacks                       alloc;
 
-    uint32_t                                    apiVersion;
+    struct anv_app_info                         app_info;
+
     struct anv_instance_extension_table         enabled_extensions;
     struct anv_dispatch_table                   dispatch;
 
@@ -1003,6 +1016,7 @@ struct anv_device {
     struct anv_scratch_pool                     scratch_pool;
 
     uint32_t                                    default_mocs;
+    uint32_t                                    external_mocs;
 
     pthread_mutex_t                             mutex;
     pthread_cond_t                              queue_submit;
@@ -1032,6 +1046,15 @@ anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
    anv_state_pool_free(anv_binding_table_pool(device), state);
 }
 
+static inline uint32_t
+anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo)
+{
+   if (bo->flags & ANV_BO_EXTERNAL)
+      return device->external_mocs;
+   else
+      return device->default_mocs;
+}
+
 static void inline
 anv_state_flush(struct anv_device *device, struct anv_state state)
 {
@@ -1313,6 +1336,12 @@ _anv_combine_address(struct anv_batch *batch, void *location,
       .AgeforQUADLRU = 0                                       \
    }
 
+#define GEN8_EXTERNAL_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) {     \
+      .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,  \
+      .TargetCache = L3DefertoPATforLLCeLLCselection,                      \
+      .AgeforQUADLRU = 0                                                   \
+   }
+
 /* Skylake: MOCS is now an index into an array of 62 different caching
  * configurations programmed by the kernel.
  */
@@ -1322,9 +1351,9 @@ _anv_combine_address(struct anv_batch *batch, void *location,
       .IndextoMOCSTables                           = 2         \
    }
 
-#define GEN9_MOCS_PTE {                                 \
-      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */       \
-      .IndextoMOCSTables                           = 1  \
+#define GEN9_EXTERNAL_MOCS (struct GEN9_MEMORY_OBJECT_CONTROL_STATE) {  \
+      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */                       \
+      .IndextoMOCSTables                           = 1                  \
    }
 
 /* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */
@@ -1333,9 +1362,9 @@ _anv_combine_address(struct anv_batch *batch, void *location,
       .IndextoMOCSTables                           = 2         \
    }
 
-#define GEN10_MOCS_PTE {                                 \
-      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */       \
-      .IndextoMOCSTables                           = 1  \
+#define GEN10_EXTERNAL_MOCS (struct GEN10_MEMORY_OBJECT_CONTROL_STATE) {   \
+      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */                          \
+      .IndextoMOCSTables                           = 1                     \
    }
 
 /* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */
@@ -1344,9 +1373,9 @@ _anv_combine_address(struct anv_batch *batch, void *location,
       .IndextoMOCSTables                           = 2         \
    }
 
-#define GEN11_MOCS_PTE {                                 \
-      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */       \
-      .IndextoMOCSTables                           = 1  \
+#define GEN11_EXTERNAL_MOCS (struct GEN11_MEMORY_OBJECT_CONTROL_STATE) {   \
+      /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */                          \
+      .IndextoMOCSTables                           = 1                     \
    }
 
 struct anv_device_memory {
diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
index e0c0a42069f..f7fb137fdf5 100644
--- a/src/intel/vulkan/anv_queue.c
+++ b/src/intel/vulkan/anv_queue.c
@@ -971,7 +971,7 @@ VkResult anv_CreateSemaphore(
       } else {
          semaphore->permanent.type = ANV_SEMAPHORE_TYPE_BO;
          VkResult result = anv_bo_cache_alloc(device, &device->bo_cache,
-                                              4096, 0,
+                                              4096, ANV_BO_EXTERNAL,
                                               &semaphore->permanent.bo);
          if (result != VK_SUCCESS) {
             vk_free2(&device->alloc, pAllocator, semaphore);
@@ -1120,7 +1120,8 @@ VkResult anv_ImportSemaphoreFdKHR(
          new_impl.type = ANV_SEMAPHORE_TYPE_BO;
 
          VkResult result = anv_bo_cache_import(device, &device->bo_cache,
-                                               fd, 0, &new_impl.bo);
+                                               fd, ANV_BO_EXTERNAL,
+                                               &new_impl.bo);
          if (result != VK_SUCCESS)
             return result;
 
diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
index 1403601e9c0..5ed1d711689 100644
--- a/src/intel/vulkan/anv_wsi.c
+++ b/src/intel/vulkan/anv_wsi.c
@@ -216,28 +216,45 @@ VkResult anv_GetSwapchainImagesKHR(
 }
 
 VkResult anv_AcquireNextImageKHR(
-    VkDevice                                     _device,
+    VkDevice                                     device,
     VkSwapchainKHR                               swapchain,
     uint64_t                                     timeout,
     VkSemaphore                                  semaphore,
     VkFence                                      fence,
     uint32_t*                                    pImageIndex)
+{
+   VkAcquireNextImageInfoKHR acquire_info = {
+      .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR,
+      .swapchain = swapchain,
+      .timeout = timeout,
+      .semaphore = semaphore,
+      .fence = fence,
+      .deviceMask = 0,
+   };
+
+   return anv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex);
+}
+
+VkResult anv_AcquireNextImage2KHR(
+    VkDevice                                     _device,
+    const VkAcquireNextImageInfoKHR*             pAcquireInfo,
+    uint32_t*                                    pImageIndex)
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
    struct anv_physical_device *pdevice = &device->instance->physicalDevice;
 
-   VkResult result = wsi_common_acquire_next_image(&pdevice->wsi_device,
-                                                   _device,
-                                                   swapchain,
-                                                   timeout,
-                                                   semaphore,
-                                                   pImageIndex);
+   VkResult result = wsi_common_acquire_next_image2(&pdevice->wsi_device,
+                                                    _device,
+                                                    pAcquireInfo,
+                                                    pImageIndex);
 
    /* Thanks to implicit sync, the image is ready immediately.  However, we
     * should wait for the current GPU state to finish.
     */
-   if (fence != VK_NULL_HANDLE)
-      anv_QueueSubmit(anv_queue_to_handle(&device->queue), 0, NULL, fence);
+   if (pAcquireInfo->fence != VK_NULL_HANDLE) {
+      anv_QueueSubmit(anv_queue_to_handle(&device->queue), 0, NULL,
+                      pAcquireInfo->fence);
+   }
 
    return result;
 }
diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c
index 3acfbb710c0..da51cb9781c 100644
--- a/src/intel/vulkan/gen7_cmd_buffer.c
+++ b/src/intel/vulkan/gen7_cmd_buffer.c
@@ -48,6 +48,7 @@ clamp_int64(int64_t x, int64_t min, int64_t max)
 void
 gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer)
 {
+   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
    uint32_t count = cmd_buffer->state.gfx.dynamic.scissor.count;
    const VkRect2D *scissors = cmd_buffer->state.gfx.dynamic.scissor.scissors;
    struct anv_state scissor_state =
@@ -73,8 +74,8 @@ gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer)
          /* Do this math using int64_t so overflow gets clamped correctly. */
          .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
          .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
-         .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max),
-         .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max)
+         .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, fb->height - 1),
+         .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, fb->width - 1)
       };
 
       if (s->extent.width <= 0 || s->extent.height <= 0) {
@@ -245,7 +246,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
          ib.CutIndexEnable             = pipeline->primitive_restart;
 #endif
          ib.IndexFormat                = cmd_buffer->state.gfx.gen7.index_type;
-         ib.MemoryObjectControlState   = GENX(MOCS);
+         ib.IndexBufferMOCS            = anv_mocs_for_bo(cmd_buffer->device,
+                                                         buffer->address.bo);
 
          ib.BufferStartingAddress      = anv_address_add(buffer->address,
                                                          offset);
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
index ca2baf84a19..752d04f3013 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -565,7 +565,8 @@ void genX(CmdBindIndexBuffer)(
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
       ib.IndexFormat                = vk_to_gen_index_type[indexType];
-      ib.MemoryObjectControlState   = GENX(MOCS);
+      ib.IndexBufferMOCS            = anv_mocs_for_bo(cmd_buffer->device,
+                                                      buffer->address.bo);
       ib.BufferStartingAddress      = anv_address_add(buffer->address, offset);
       ib.BufferSize                 = buffer->size - offset;
    }
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index b7ed817d3a0..7cef4c55cf1 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -2493,6 +2493,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
    uint32_t *p;
 
    uint32_t vb_emit = cmd_buffer->state.gfx.vb_dirty & pipeline->vb_used;
+   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE)
+      vb_emit |= pipeline->vb_used;
 
    assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
 
@@ -2514,12 +2516,11 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
          struct GENX(VERTEX_BUFFER_STATE) state = {
             .VertexBufferIndex = vb,
 
-#if GEN_GEN >= 8
-            .MemoryObjectControlState = GENX(MOCS),
-#else
+            .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device,
+                                                buffer->address.bo),
+#if GEN_GEN <= 7
             .BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : VERTEXDATA,
             .InstanceDataStepRate = pipeline->vb[vb].instance_divisor,
-            .VertexBufferMemoryObjectControlState = GENX(MOCS),
 #endif
 
             .AddressModifyEnable = true,
@@ -2612,7 +2613,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
                                           pipeline->depth_clamp_enable);
    }
 
-   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR)
+   if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_SCISSOR |
+                                      ANV_CMD_DIRTY_RENDER_TARGETS))
       gen7_cmd_buffer_emit_scissor(cmd_buffer);
 
    genX(cmd_buffer_flush_dynamic_state)(cmd_buffer);
@@ -2633,12 +2635,11 @@ emit_vertex_bo(struct anv_cmd_buffer *cmd_buffer,
          .VertexBufferIndex = index,
          .AddressModifyEnable = true,
          .BufferPitch = 0,
+         .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo),
 #if (GEN_GEN >= 8)
-         .MemoryObjectControlState = GENX(MOCS),
          .BufferStartingAddress = addr,
          .BufferSize = size
 #else
-         .VertexBufferMemoryObjectControlState = GENX(MOCS),
          .BufferStartingAddress = addr,
          .EndAddress = anv_address_add(addr, size),
 #endif
@@ -3390,9 +3391,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
    if (dw == NULL)
       return;
 
-   struct isl_depth_stencil_hiz_emit_info info = {
-      .mocs = device->default_mocs,
-   };
+   struct isl_depth_stencil_hiz_emit_info info = { };
 
    if (iview)
       info.view = &iview->planes[0].isl;
@@ -3410,6 +3409,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
                               image->planes[depth_plane].address.bo,
                               image->planes[depth_plane].address.offset +
                               surface->offset);
+      info.mocs =
+         anv_mocs_for_bo(device, image->planes[depth_plane].address.bo);
 
       const uint32_t ds =
          cmd_buffer->state.subpass->depth_stencil_attachment->attachment;
@@ -3441,6 +3442,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
                               image->planes[stencil_plane].address.bo,
                               image->planes[stencil_plane].address.offset +
                               surface->offset);
+      info.mocs =
+         anv_mocs_for_bo(device, image->planes[stencil_plane].address.bo);
    }
 
    isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c
index eaafcfa3b22..b51c1804659 100644
--- a/src/intel/vulkan/genX_gpu_memcpy.c
+++ b/src/intel/vulkan/genX_gpu_memcpy.c
@@ -158,11 +158,10 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
          .AddressModifyEnable = true,
          .BufferStartingAddress = { src, src_offset },
          .BufferPitch = bs,
+         .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, src),
 #if (GEN_GEN >= 8)
-         .MemoryObjectControlState = GENX(MOCS),
          .BufferSize = size,
 #else
-         .VertexBufferMemoryObjectControlState = GENX(MOCS),
          .EndAddress = { src, src_offset + size - 1 },
 #endif
       });
@@ -219,7 +218,7 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) {
       sob.SOBufferIndex = 0;
-      sob.SOBufferObjectControlState = GENX(MOCS);
+      sob.SOBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, dst),
       sob.SurfaceBaseAddress = (struct anv_address) { dst, dst_offset };
 
 #if GEN_GEN >= 8
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index ec47360764c..cb560dcec70 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -91,7 +91,8 @@ emit_vertex_input(struct anv_pipeline *pipeline,
 
    /* Pull inputs_read out of the VS prog data */
    const uint64_t inputs_read = vs_prog_data->inputs_read;
-   const uint64_t double_inputs_read = vs_prog_data->double_inputs_read;
+   const uint64_t double_inputs_read =
+      vs_prog_data->double_inputs_read & inputs_read;
    assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
    const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
    const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
@@ -115,7 +116,34 @@ emit_vertex_input(struct anv_pipeline *pipeline,
                        GENX(3DSTATE_VERTEX_ELEMENTS));
    if (!p)
       return;
-   memset(p + 1, 0, (num_dwords - 1) * 4);
+
+   for (uint32_t i = 0; i < total_elems; i++) {
+      /* The SKL docs for VERTEX_ELEMENT_STATE say:
+       *
+       *    "All elements must be valid from Element[0] to the last valid
+       *    element. (I.e. if Element[2] is valid then Element[1] and
+       *    Element[0] must also be valid)."
+       *
+       * The SKL docs for 3D_Vertex_Component_Control say:
+       *
+       *    "Don't store this component. (Not valid for Component 0, but can
+       *    be used for Component 1-3)."
+       *
+       * So we can't just leave a vertex element blank and hope for the best.
+       * We have to tell the VF hardware to put something in it; so we just
+       * store a bunch of zero.
+       *
+       * TODO: Compact vertex elements so we never end up with holes.
+       */
+      struct GENX(VERTEX_ELEMENT_STATE) element = {
+         .Valid = true,
+         .Component0Control = VFCOMP_STORE_0,
+         .Component1Control = VFCOMP_STORE_0,
+         .Component2Control = VFCOMP_STORE_0,
+         .Component3Control = VFCOMP_STORE_0,
+      };
+      GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + i * 2], &element);
+   }
 
    for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) {
       const VkVertexInputAttributeDescription *desc =
@@ -727,14 +755,14 @@ sanitize_ds_state(VkPipelineDepthStencilStateCreateInfo *state,
 {
    *stencilWriteEnable = state->stencilTestEnable;
 
-   /* If the depth test is disabled, we won't be writing anything. */
-   if (!state->depthTestEnable)
-      state->depthWriteEnable = false;
-
-   /* The Vulkan spec requires that if either depth or stencil is not present,
-    * the pipeline is to act as if the test silently passes.
+   /* If the depth test is disabled, we won't be writing anything. Make sure we
+    * treat the test as always passing later on as well.
+    *
+    * Also, the Vulkan spec requires that if either depth or stencil is not
+    * present, the pipeline is to act as if the test silently passes. In that
+    * case we won't write either.
     */
-   if (!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
+   if (!state->depthTestEnable || !(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
       state->depthWriteEnable = false;
       state->depthCompareOp = VK_COMPARE_OP_ALWAYS;
    }
@@ -1141,7 +1169,28 @@ emit_3dstate_vs(struct anv_pipeline *pipeline)
       vs.IllegalOpcodeExceptionEnable = false;
       vs.SoftwareExceptionEnable    = false;
       vs.MaximumNumberofThreads     = devinfo->max_vs_threads - 1;
-      vs.VertexCacheDisable         = false;
+
+      if (GEN_GEN == 9 && devinfo->gt == 4 &&
+          anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
+         /* On Sky Lake GT4, we have experienced some hangs related to the VS
+          * cache and tessellation.  It is unknown exactly what is happening
+          * but the Haswell docs for the "VS Reference Count Full Force Miss
+          * Enable" field of the "Thread Mode" register refer to a HSW bug in
+          * which the VUE handle reference count would overflow resulting in
+          * internal reference counting bugs.  My (Jason's) best guess is that
+          * this bug cropped back up on SKL GT4 when we suddenly had more
+          * threads in play than any previous gen9 hardware.
+          *
+          * What we do know for sure is that setting this bit when
+          * tessellation shaders are in use fixes a GPU hang in Batman: Arkham
+          * City when playing with DXVK (https://bugs.freedesktop.org/107280).
+          * Disabling the vertex cache with tessellation shaders should only
+          * have a minor performance impact as the tessellation shaders are
+          * likely generating and processing far more geometry than the vertex
+          * stage.
+          */
+         vs.VertexCacheDisable = true;
+      }
 
       vs.VertexURBEntryReadLength      = vs_prog_data->base.urb_read_length;
       vs.VertexURBEntryReadOffset      = 0;
diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
index e35e9b85844..9b1d73b7259 100644
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -347,6 +347,11 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
             sdi.Address.offset = slot_offset + j * sizeof(uint64_t);
             sdi.ImmediateData = 0ull;
          }
+         anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+            sdi.Address.bo = &pool->bo;
+            sdi.Address.offset = slot_offset + j * sizeof(uint64_t) + 4;
+            sdi.ImmediateData = 0ull;
+         }
       }
       emit_query_availability(cmd_buffer, &pool->bo, slot_offset);
    }
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index b1014d9e797..847fff83949 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -93,6 +93,12 @@ genX(init_device_state)(struct anv_device *device)
 {
    GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs,
                                           &GENX(MOCS));
+#if GEN_GEN >= 8
+   GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->external_mocs,
+                                          &GENX(EXTERNAL_MOCS));
+#else
+   device->external_mocs = device->default_mocs;
+#endif
 
    struct anv_batch batch;
 
@@ -184,6 +190,24 @@ genX(init_device_state)(struct anv_device *device)
 #endif
    }
 
+#if GEN_GEN >= 10
+   /* A fixed function pipe flush is required before modifying this field */
+   anv_batch_emit(&batch, GENX(PIPE_CONTROL), pipe) {
+      pipe.PipeControlFlushEnable = true;
+   }
+
+  /* enable object level preemption */
+   uint32_t csc1;
+  
+   anv_pack_struct(&csc1, GENX(CS_CHICKEN1),
+                   .ReplayMode = ObjectLevelPreemption,
+                   .ReplayModeMask = 1);
+   anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+      lri.RegisterOffset   = GENX(CS_CHICKEN1_num);
+      lri.DataDWord        = csc1;
+   }
+#endif
+
    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
 
    assert(batch.next <= batch.end);
diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build
index e427c7471f4..49ff641f0ba 100644
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -102,7 +102,7 @@ foreach g : [['70', ['gen7_cmd_buffer.c']], ['75', ['gen7_cmd_buffer.c']],
       inc_vulkan_wsi,
     ],
     c_args : [
-      c_vis_args, no_override_init_args, '-msse2',
+      c_vis_args, no_override_init_args, c_sse2_args,
       '-DGEN_VERSIONx10=@0@'.format(_gen),
     ],
     dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
@@ -146,7 +146,7 @@ anv_deps = [
 anv_flags = [
   c_vis_args,
   no_override_init_args,
-  '-msse2',
+  c_sse2_args,
 ]
 
 if with_platform_x11
diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
index 473fe6c9089..fc42b8ea9c4 100644
--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -1737,6 +1737,7 @@ dri3_get_buffer(__DRIdrawable *driDrawable,
                 struct loader_dri3_drawable *draw)
 {
    struct loader_dri3_buffer *buffer;
+   bool fence_await = buffer_type == loader_dri3_buffer_back;
    int buf_id;
 
    if (buffer_type == loader_dri3_buffer_back) {
@@ -1793,6 +1794,7 @@ dri3_get_buffer(__DRIdrawable *driDrawable,
                            0, 0, 0, 0,
                            draw->width, draw->height);
             dri3_fence_trigger(draw->conn, new_buffer);
+            fence_await = true;
          }
          dri3_free_render_buffer(draw, buffer);
       } else if (buffer_type == loader_dri3_buffer_front) {
@@ -1814,12 +1816,15 @@ dri3_get_buffer(__DRIdrawable *driDrawable,
                                           new_buffer->linear_buffer,
                                           0, 0, draw->width, draw->height,
                                           0, 0, 0);
-         }
+         } else
+            fence_await = true;
       }
       buffer = new_buffer;
       draw->buffers[buf_id] = buffer;
    }
-   dri3_fence_await(draw->conn, draw, buffer);
+
+   if (fence_await)
+      dri3_fence_await(draw->conn, draw, buffer);
 
    /*
     * Do we need to preserve the content of a previous buffer?
diff --git a/src/mapi/glapi/gen/KHR_robustness_es.xml b/src/mapi/glapi/gen/KHR_robustness_es.xml
index 84f6fd2cdb9..82b7edf31a7 100644
--- a/src/mapi/glapi/gen/KHR_robustness_es.xml
+++ b/src/mapi/glapi/gen/KHR_robustness_es.xml
@@ -60,4 +60,36 @@
 
 </category>
 
+<category name="GL_EXT_robustness" number="107">
+    <function name="GetGraphicsResetStatusEXT"
+              alias="GetGraphicsResetStatusARB" es2="2.0">
+        <return type="GLenum"/>
+    </function>
+
+    <function name="ReadnPixelsEXT" alias="ReadnPixelsARB" es2="2.0">
+        <param name="x" type="GLint"/>
+        <param name="y" type="GLint"/>
+        <param name="width" type="GLsizei"/>
+        <param name="height" type="GLsizei"/>
+        <param name="format" type="GLenum"/>
+        <param name="type" type="GLenum"/>
+        <param name="bufSize" type="GLsizei"/>
+        <param name="data" type="GLvoid *" output="true"/>
+    </function>
+
+    <function name="GetnUniformfvEXT" alias="GetnUniformfvARB" es2="2.0">
+        <param name="program" type="GLuint"/>
+        <param name="location" type="GLint"/>
+        <param name="bufSize" type="GLsizei"/>
+        <param name="params" type="GLfloat *" output="true"/>
+    </function>
+
+    <function name="GetnUniformivEXT" alias="GetnUniformivARB" es2="2.0">
+        <param name="program" type="GLuint"/>
+        <param name="location" type="GLint"/>
+        <param name="bufSize" type="GLsizei"/>
+        <param name="params" type="GLint *" output="true"/>
+    </function>
+</category>
+
 </OpenGLAPI>
diff --git a/src/mesa/Android.libmesa_dricore.mk b/src/mesa/Android.libmesa_dricore.mk
index 34fd8581c2d..792117767b4 100644
--- a/src/mesa/Android.libmesa_dricore.mk
+++ b/src/mesa/Android.libmesa_dricore.mk
@@ -49,7 +49,7 @@ ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
 LOCAL_WHOLE_STATIC_LIBRARIES := \
 	libmesa_sse41
 LOCAL_CFLAGS := \
-	-msse4.1 \
+	-msse4.1 -mstackrealign \
        -DUSE_SSE41
 endif
 
diff --git a/src/mesa/Android.libmesa_sse41.mk b/src/mesa/Android.libmesa_sse41.mk
index da40f43df69..de19a1fb48b 100644
--- a/src/mesa/Android.libmesa_sse41.mk
+++ b/src/mesa/Android.libmesa_sse41.mk
@@ -34,7 +34,7 @@ LOCAL_SRC_FILES += \
 	$(X86_SSE41_FILES)
 
 LOCAL_CFLAGS := \
-	-msse4.1
+	-msse4.1 -mstackrealign
 
 LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/src/mapi \
diff --git a/src/mesa/drivers/dri/Android.mk b/src/mesa/drivers/dri/Android.mk
index 53ff4b4f632..dc1f98364c8 100644
--- a/src/mesa/drivers/dri/Android.mk
+++ b/src/mesa/drivers/dri/Android.mk
@@ -49,11 +49,18 @@ MESA_DRI_WHOLE_STATIC_LIBRARIES := \
 MESA_DRI_SHARED_LIBRARIES := \
 	libcutils \
 	libdl \
-	libexpat \
 	libglapi \
 	liblog \
 	libz
 
+# If Android version >=8 MESA should static link libexpat else should dynamic link
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0)
+MESA_DRI_WHOLE_STATIC_LIBRARIES += \
+	libexpat
+else
+MESA_DRI_SHARED_LIBRARIES += \
+	libexpat
+endif
 #-----------------------------------------------
 # Build drivers and libmesa_dri_common
 
diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk
index 324d087220a..3052f6f1495 100644
--- a/src/mesa/drivers/dri/i965/Android.mk
+++ b/src/mesa/drivers/dri/i965/Android.mk
@@ -274,6 +274,7 @@ LOCAL_LDFLAGS += $(MESA_DRI_LDFLAGS)
 LOCAL_CFLAGS := \
 	$(MESA_DRI_CFLAGS)
 
+LOCAL_CFLAGS += -Wno-error
 ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
 LOCAL_CFLAGS += \
 	-DUSE_SSE41
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index 889d4c68a2b..0afa7a2f216 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -44,7 +44,7 @@ AM_CFLAGS = \
 	$(WNO_OVERRIDE_INIT) \
 	$(LIBDRM_CFLAGS) \
 	$(VALGRIND_CFLAGS) \
-	-msse2
+	-msse2 -mstackrealign
 
 AM_CXXFLAGS = $(AM_CFLAGS)
 
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
index 7476cee43a4..ad3a47ef035 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -187,6 +187,9 @@ blorp_surf_for_miptree(struct brw_context *brw,
    assert((surf->aux_usage == ISL_AUX_USAGE_NONE) ==
           (surf->aux_addr.buffer == NULL));
 
+   if (!is_render_target && brw->screen->devinfo.gen == 9)
+      gen9_apply_single_tex_astc5x5_wa(brw, mt->format, surf->aux_usage);
+
    /* ISL wants real levels, not offset ones. */
    *level -= mt->first_level;
 }
@@ -382,7 +385,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
    enum isl_format src_isl_format =
       brw_blorp_to_isl_format(brw, src_format, false);
    enum isl_aux_usage src_aux_usage =
-      intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format);
+      intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format,
+                                      0 /* The astc5x5 WA isn't needed */);
    /* We do format workarounds for some depth formats so we can't reliably
     * sample with HiZ.  One of these days, we should fix that.
     */
@@ -1220,12 +1224,12 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
 
    x0 = fb->_Xmin;
    x1 = fb->_Xmax;
-   if (rb->Name != 0) {
-      y0 = fb->_Ymin;
-      y1 = fb->_Ymax;
-   } else {
+   if (fb->FlipY) {
       y0 = rb->Height - fb->_Ymax;
       y1 = rb->Height - fb->_Ymin;
+   } else {
+      y0 = fb->_Ymin;
+      y1 = fb->_Ymax;
    }
 
    /* If the clear region is empty, just return. */
@@ -1411,9 +1415,8 @@ brw_blorp_clear_depth_stencil(struct brw_context *brw,
    if (!(mask & (BUFFER_BITS_DEPTH_STENCIL)))
       return;
 
-   uint32_t x0, x1, y0, y1, rb_name, rb_height;
+   uint32_t x0, x1, y0, y1, rb_height;
    if (depth_rb) {
-      rb_name = depth_rb->Name;
       rb_height = depth_rb->Height;
       if (stencil_rb) {
          assert(depth_rb->Width == stencil_rb->Width);
@@ -1421,18 +1424,17 @@ brw_blorp_clear_depth_stencil(struct brw_context *brw,
       }
    } else {
       assert(stencil_rb);
-      rb_name = stencil_rb->Name;
       rb_height = stencil_rb->Height;
    }
 
    x0 = fb->_Xmin;
    x1 = fb->_Xmax;
-   if (rb_name != 0) {
-      y0 = fb->_Ymin;
-      y1 = fb->_Ymax;
-   } else {
+   if (fb->FlipY) {
       y0 = rb_height - fb->_Ymax;
       y1 = rb_height - fb->_Ymin;
+   } else {
+      y0 = fb->_Ymin;
+      y1 = fb->_Ymax;
    }
 
    /* If the clear region is empty, just return. */
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 09d45e30ecc..31284864973 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -496,7 +496,6 @@ bo_alloc_internal(struct brw_bufmgr *bufmgr,
                   uint32_t stride)
 {
    struct brw_bo *bo;
-   unsigned int page_size = getpagesize();
    int ret;
    struct bo_cache_bucket *bucket;
    bool alloc_from_cache;
@@ -522,12 +521,12 @@ bo_alloc_internal(struct brw_bufmgr *bufmgr,
     * allocation up.
     */
    if (bucket == NULL) {
-      bo_size = size;
-      if (bo_size < page_size)
-         bo_size = page_size;
+      unsigned int page_size = getpagesize();
+      bo_size = size == 0 ? page_size : ALIGN(size, page_size);
    } else {
       bo_size = bucket->size;
    }
+   assert(bo_size);
 
    mtx_lock(&bufmgr->lock);
    /* Get a buffer out of the cache if available */
@@ -1490,7 +1489,7 @@ brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
    brw_bo_make_external(bo);
 
    if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
-                          DRM_CLOEXEC, prime_fd) != 0)
+			  DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
       return -errno;
 
    bo->reusable = false;
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 136f1325c6e..7f0c5dd57d8 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -695,7 +695,7 @@ brw_initialize_context_constants(struct brw_context *brw)
    /* ARB_viewport_array, OES_viewport_array */
    if (devinfo->gen >= 6) {
       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
-      ctx->Const.ViewportSubpixelBits = 0;
+      ctx->Const.ViewportSubpixelBits = 8;
 
       /* Cast to float before negating because MaxViewportWidth is unsigned.
        */
@@ -863,6 +863,19 @@ brw_process_driconf_options(struct brw_context *brw)
    ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
    driComputeOptionsSha1(&brw->screen->optionCache,
                          ctx->Const.dri_config_options_sha1);
+
+   brw->screen->compiler->simd32_heuristics_control.grouped_sends_check =
+      driQueryOptionb(&brw->optionCache, "simd32_heuristic_grouped_check");
+   brw->screen->compiler->simd32_heuristics_control.max_grouped_sends =
+      driQueryOptioni(&brw->optionCache, "simd32_heuristic_grouped_sends");
+   brw->screen->compiler->simd32_heuristics_control.inst_count_check =
+      driQueryOptionb(&brw->optionCache, "simd32_heuristic_inst_check");
+   brw->screen->compiler->simd32_heuristics_control.inst_count_ratio =
+      driQueryOptionf(&brw->optionCache, "simd32_heuristic_inst_ratio");
+   brw->screen->compiler->simd32_heuristics_control.mrt_check =
+      driQueryOptionb(&brw->optionCache, "simd32_heuristic_mrt_check");
+   brw->screen->compiler->simd32_heuristics_control.max_mrts =
+      driQueryOptioni(&brw->optionCache, "simd32_heuristic_max_mrts");
 }
 
 GLboolean
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 72be8f2a4d0..607f67f70e0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -168,6 +168,11 @@ enum brw_cache_id {
    BRW_MAX_CACHE
 };
 
+enum gen9_astc5x5_wa_tex_type {
+   GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0,
+   GEN9_ASTC5X5_WA_TEX_TYPE_AUX     = 1 << 1,
+};
+
 enum brw_state_id {
    /* brw_cache_ids must come first - see brw_program_cache.c */
    BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
@@ -681,6 +686,7 @@ enum brw_query_kind {
    OA_COUNTERS,
    OA_COUNTERS_RAW,
    PIPELINE_STATS,
+   NULL_RENDERER,
 };
 
 struct brw_perf_query_register_prog {
@@ -826,6 +832,8 @@ struct brw_context
 
    GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */
 
+   bool object_preemption; /**< Object level preemption enabled. */
+
    GLenum reduced_primitive;
 
    /**
@@ -1226,6 +1234,7 @@ struct brw_context
 
       int n_active_oa_queries;
       int n_active_pipeline_stats_queries;
+      int n_active_null_renderers;
 
       /* The number of queries depending on running OA counters which
        * extends beyond brw_end_perf_query() since we need to wait until
@@ -1315,6 +1324,8 @@ struct brw_context
     */
    enum isl_aux_usage draw_aux_usage[MAX_DRAW_BUFFERS];
 
+   enum gen9_astc5x5_wa_tex_type gen9_astc5x5_wa_tex_mask;
+
    __DRIcontext *driContext;
    struct intel_screen *screen;
 };
@@ -1339,6 +1350,10 @@ void intel_update_renderbuffers(__DRIcontext *context,
                                 __DRIdrawable *drawable);
 void intel_prepare_render(struct brw_context *brw);
 
+void gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw,
+                                      mesa_format format,
+                                      enum isl_aux_usage aux_usage);
+
 void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
                                 bool *draw_aux_buffer_disabled);
 
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 855f1c7d744..ff69685ec2c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1650,11 +1650,17 @@ enum brw_pixel_shader_coverage_mask_mode {
 #define GEN10_CACHE_MODE_SS            0x0e420
 #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
 
-#define INSTPM                             0x20c0
+#define INSTPM                             0x20c0 /* Gen6-8 */
 # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
+# define INSTPM_GLOBAL_DEBUG_ENABLE                    (1 << 4)
+# define INSTPM_MEDIA_INSTRUCTION_DISABLE              (1 << 3)
+# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 2)
+# define INSTPM_3D_STATE_INSTRUCTION_DISABLE           (1 << 1)
 
 #define CS_DEBUG_MODE2                     0x20d8 /* Gen9+ */
 # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
+# define CSDBG2_MEDIA_INSTRUCTION_DISABLE              (1 << 1)
+# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 0)
 
 #define GEN7_RPSTAT1                       0xA01C
 #define  GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT   7
@@ -1673,4 +1679,9 @@ enum brw_pixel_shader_coverage_mask_mode {
 # define GLK_SCEC_BARRIER_MODE_3D_HULL     (1 << 7)
 # define GLK_SCEC_BARRIER_MODE_MASK        REG_MASK(1 << 7)
 
+#define CS_CHICKEN1                        0x2580 /* Gen9+ */
+# define GEN9_REPLAY_MODE_MIDBUFFER             (0 << 0)
+# define GEN9_REPLAY_MODE_MIDOBJECT             (1 << 0)
+# define GEN9_REPLAY_MODE_MASK                  REG_MASK(1 << 0)
+
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 71461d7b0a7..8536c040109 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -378,6 +378,68 @@ intel_disable_rb_aux_buffer(struct brw_context *brw,
    return found;
 }
 
+/** Implement the ASTC 5x5 sampler workaround
+ *
+ * Gen9 sampling hardware has a bug where an ASTC 5x5 compressed surface
+ * cannot live in the sampler cache at the same time as an aux compressed
+ * surface.  In order to work around the bug we have to stall rendering with a
+ * CS and pixel scoreboard stall (implicit in the CS stall) and invalidate the
+ * texture cache whenever one of ASTC 5x5 or aux compressed may be in the
+ * sampler cache and we're about to render with something which samples from
+ * the other.
+ *
+ * In the case of a single shader which textures from both ASTC 5x5 and
+ * a texture which is CCS or HiZ compressed, we have to resolve the aux
+ * compressed texture prior to rendering.  This second part is handled in
+ * brw_predraw_resolve_inputs() below.
+ *
+ * We have observed this issue to affect CCS and HiZ sampling but whether or
+ * not it also affects MCS is unknown.  Because MCS has no concept of a
+ * resolve (and doing one would be stupid expensive), we choose to simply
+ * ignore the possibility and hope for the best.
+ */
+static void
+gen9_apply_astc5x5_wa_flush(struct brw_context *brw,
+                            enum gen9_astc5x5_wa_tex_type curr_mask)
+{
+   assert(brw->screen->devinfo.gen == 9);
+
+   if (((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
+        (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX)) ||
+       ((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) &&
+        (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))) {
+      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
+      brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+   }
+
+   brw->gen9_astc5x5_wa_tex_mask = curr_mask;
+}
+
+static enum gen9_astc5x5_wa_tex_type
+gen9_astc5x5_wa_bits(mesa_format format, enum isl_aux_usage aux_usage)
+{
+   if (aux_usage != ISL_AUX_USAGE_NONE &&
+       aux_usage != ISL_AUX_USAGE_MCS)
+      return GEN9_ASTC5X5_WA_TEX_TYPE_AUX;
+
+   if (format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+       format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)
+      return GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
+
+   return 0;
+}
+
+/* Helper for the gen9 ASTC 5x5 workaround.  This version exists for BLORP's
+ * use-cases where only a single texture is bound.
+ */
+void
+gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw,
+                                 mesa_format format,
+                                 enum isl_aux_usage aux_usage)
+{
+   gen9_apply_astc5x5_wa_flush(brw, gen9_astc5x5_wa_bits(format, aux_usage));
+}
+
 static void
 mark_textures_used_for_txf(BITSET_WORD *used_for_txf,
                            const struct gl_program *prog)
@@ -417,8 +479,30 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
       mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current);
    }
 
-   /* Resolve depth buffer and render cache of each enabled texture. */
    int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
+
+   enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits = 0;
+   if (brw->screen->devinfo.gen == 9) {
+      /* In order to properly implement the ASTC 5x5 workaround for an
+       * arbitrary draw or dispatch call, we have to walk the entire list of
+       * textures looking for ASTC 5x5.  If there is any ASTC 5x5 in this draw
+       * call, all aux compressed textures must be resolved and have aux
+       * compression disabled while sampling.
+       */
+      for (int i = 0; i <= maxEnabledUnit; i++) {
+         if (!ctx->Texture.Unit[i]._Current)
+            continue;
+         tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+         if (!tex_obj || !tex_obj->mt)
+            continue;
+
+         astc5x5_wa_bits |= gen9_astc5x5_wa_bits(tex_obj->_Format,
+                                                 tex_obj->mt->aux_usage);
+      }
+      gen9_apply_astc5x5_wa_flush(brw, astc5x5_wa_bits);
+   }
+
+   /* Resolve depth buffer and render cache of each enabled texture. */
    for (int i = 0; i <= maxEnabledUnit; i++) {
       if (!ctx->Texture.Unit[i]._Current)
 	 continue;
@@ -452,7 +536,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
 
       intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
                                     min_level, num_levels,
-                                    min_layer, num_layers);
+                                    min_layer, num_layers,
+                                    astc5x5_wa_bits);
 
       /* If any programs are using it with texelFetch, we may need to also do
        * a prepare with an sRGB format to ensure texelFetch works "properly".
@@ -463,7 +548,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering,
          if (txf_format != view_format) {
             intel_miptree_prepare_texture(brw, tex_obj->mt, txf_format,
                                           min_level, num_levels,
-                                          min_layer, num_layers);
+                                          min_layer, num_layers,
+                                          astc5x5_wa_bits);
          }
       }
 
@@ -535,7 +621,8 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw,
          if (irb) {
             intel_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format,
                                           irb->mt_level, 1,
-                                          irb->mt_layer, irb->layer_count);
+                                          irb->mt_layer, irb->layer_count,
+                                          brw->gen9_astc5x5_wa_tex_mask);
          }
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_generate_mipmap.c b/src/mesa/drivers/dri/i965/brw_generate_mipmap.c
index 32c2933f721..4125ae6e11c 100644
--- a/src/mesa/drivers/dri/i965/brw_generate_mipmap.c
+++ b/src/mesa/drivers/dri/i965/brw_generate_mipmap.c
@@ -105,6 +105,23 @@ brw_generate_mipmap(struct gl_context *ctx, GLenum target,
       last_layer = base_size->array_len - 1;
    }
 
+   /* The GL_EXT_texture_sRGB_decode extension's issues section says:
+    *
+    *    "10) How is mipmap generation of sRGB textures affected by the
+    *     TEXTURE_SRGB_DECODE_EXT parameter?
+    *
+    *     RESOLVED:  When the TEXTURE_SRGB_DECODE parameter is DECODE_EXT
+    *     for an sRGB texture, mipmap generation should decode sRGB texels
+    *     to a linear RGB color space, perform downsampling, then encode
+    *     back to an sRGB color space.  (Issue 24 in the EXT_texture_sRGB
+    *     specification provides a rationale for why.)  When the parameter
+    *     is SKIP_DECODE_EXT instead, mipmap generation skips the encode
+    *     and decode steps during mipmap generation.  By skipping the
+    *     encode and decode steps, sRGB mipmap generation should match
+    *     the mipmap generation for a non-sRGB texture."
+    */
+   bool do_srgb = tex_obj->Sampler.sRGBDecode == GL_DECODE_EXT;
+
    for (unsigned dst_level = base_level + 1;
         dst_level <= last_level;
         dst_level++) {
@@ -121,7 +138,7 @@ brw_generate_mipmap(struct gl_context *ctx, GLenum target,
                                  minify(base_size->width, dst_level),
                                  minify(base_size->height, dst_level),
                                  GL_LINEAR, false, false,
-                                 true, true);
+                                 do_srgb, do_srgb);
       }
    }
 }
diff --git a/src/mesa/drivers/dri/i965/brw_multisample_state.h b/src/mesa/drivers/dri/i965/brw_multisample_state.h
index 6cf324e561c..2142a17a484 100644
--- a/src/mesa/drivers/dri/i965/brw_multisample_state.h
+++ b/src/mesa/drivers/dri/i965/brw_multisample_state.h
@@ -38,13 +38,13 @@
 /**
  * 1x MSAA has a single sample at the center: (0.5, 0.5) -> (0x8, 0x8).
  *
- * 2x MSAA sample positions are (0.25, 0.25) and (0.75, 0.75):
+ * 2x MSAA sample positions are (0.75, 0.75) and (0.25, 0.25):
  *   4 c
- * 4 0
- * c   1
+ * 4 1
+ * c   0
  */
 static const uint32_t
-brw_multisample_positions_1x_2x = 0x0088cc44;
+brw_multisample_positions_1x_2x = 0x008844cc;
 
 /**
  * Sample positions:
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
index d45529fc0c7..f04fbf32c80 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -329,6 +329,12 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void)
           o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"),
           obj->pipeline_stats.bo ? "yes" : "no");
       break;
+   case NULL_RENDERER:
+      DBG("%4d: %-6s %-8s NULL_RENDERER\n",
+          id,
+          o->Used ? "Dirty," : "New,",
+          o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"));
+      break;
    default:
       unreachable("Unknown query type");
       break;
@@ -430,6 +436,10 @@ brw_get_perf_query_info(struct gl_context *ctx,
       *n_active = brw->perfquery.n_active_pipeline_stats_queries;
       break;
 
+   case NULL_RENDERER:
+      *n_active = brw->perfquery.n_active_null_renderers;
+      break;
+
    default:
       unreachable("Unknown query type");
       break;
@@ -1019,6 +1029,7 @@ brw_begin_perf_query(struct gl_context *ctx,
    struct brw_context *brw = brw_context(ctx);
    struct brw_perf_query_object *obj = brw_perf_query(o);
    const struct brw_perf_query_info *query = obj->query;
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
    /* We can assume the frontend hides mistaken attempts to Begin a
     * query object multiple times before its End. Similarly if an
@@ -1103,7 +1114,6 @@ brw_begin_perf_query(struct gl_context *ctx,
       /* If the OA counters aren't already on, enable them. */
       if (brw->perfquery.oa_stream_fd == -1) {
          __DRIscreen *screen = brw->screen->driScrnPriv;
-         const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
          /* The period_exponent gives a sampling period as follows:
           *   sample_period = timestamp_period * 2^(period_exponent + 1)
@@ -1249,6 +1259,23 @@ brw_begin_perf_query(struct gl_context *ctx,
       ++brw->perfquery.n_active_pipeline_stats_queries;
       break;
 
+   case NULL_RENDERER:
+      ++brw->perfquery.n_active_null_renderers;
+      if (devinfo->gen >= 9) {
+         brw_load_register_imm32(brw, CS_DEBUG_MODE2,
+                                 REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE) |
+                                 CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE);
+      } else {
+         brw_load_register_imm32(brw, INSTPM,
+                                 REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+                                          INSTPM_MEDIA_INSTRUCTION_DISABLE) |
+                                 INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+                                 INSTPM_MEDIA_INSTRUCTION_DISABLE);
+      }
+      brw_emit_pipe_control_flush(brw,
+                                  PIPE_CONTROL_LRI_WRITE_IMMEDIATE);
+      break;
+
    default:
       unreachable("Unknown query type");
       break;
@@ -1269,6 +1296,7 @@ brw_end_perf_query(struct gl_context *ctx,
 {
    struct brw_context *brw = brw_context(ctx);
    struct brw_perf_query_object *obj = brw_perf_query(o);
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
    DBG("End(%d)\n", o->Id);
 
@@ -1311,6 +1339,21 @@ brw_end_perf_query(struct gl_context *ctx,
       --brw->perfquery.n_active_pipeline_stats_queries;
       break;
 
+   case NULL_RENDERER:
+      if (--brw->perfquery.n_active_null_renderers == 0) {
+         if (devinfo->gen >= 9) {
+            brw_load_register_imm32(brw, CS_DEBUG_MODE2,
+                                    REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE));
+         } else {
+            brw_load_register_imm32(brw, INSTPM,
+                                    REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE |
+                                             INSTPM_MEDIA_INSTRUCTION_DISABLE));
+         }
+         brw_emit_pipe_control_flush(brw,
+                                     PIPE_CONTROL_LRI_WRITE_IMMEDIATE);
+      }
+      break;
+
    default:
       unreachable("Unknown query type");
       break;
@@ -1336,6 +1379,9 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o)
       bo = obj->pipeline_stats.bo;
       break;
 
+   case NULL_RENDERER:
+      break;
+
    default:
       unreachable("Unknown query type");
       break;
@@ -1386,6 +1432,8 @@ brw_is_perf_query_ready(struct gl_context *ctx,
       return (obj->pipeline_stats.bo &&
               !brw_batch_references(&brw->batch, obj->pipeline_stats.bo) &&
               !brw_bo_busy(obj->pipeline_stats.bo));
+   case NULL_RENDERER:
+      return true;
 
    default:
       unreachable("Unknown query type");
@@ -1601,6 +1649,9 @@ brw_get_perf_query_data(struct gl_context *ctx,
       written = get_pipeline_stats_data(brw, obj, data_size, (uint8_t *)data);
       break;
 
+   case NULL_RENDERER:
+      break;
+
    default:
       unreachable("Unknown query type");
       break;
@@ -1671,6 +1722,9 @@ brw_delete_perf_query(struct gl_context *ctx,
       }
       break;
 
+   case NULL_RENDERER:
+      break;
+
    default:
       unreachable("Unknown query type");
       break;
@@ -2153,6 +2207,15 @@ get_register_queries_function(const struct gen_device_info *devinfo)
    return NULL;
 }
 
+static void
+fill_null_renderer_perf_query_info(struct brw_context *brw,
+                                   struct brw_perf_query_info *query)
+{
+   query->kind = NULL_RENDERER;
+   query->name = "Intel_Null_Hardware_Query";
+   query->n_counters = 0;
+}
+
 static unsigned
 brw_init_perf_query_info(struct gl_context *ctx)
 {
@@ -2211,6 +2274,10 @@ brw_init_perf_query_info(struct gl_context *ctx)
          enumerate_sysfs_metrics(brw);
 
       brw_perf_query_register_mdapi_oa_query(brw);
+
+      struct brw_perf_query_info *null_query =
+         brw_perf_query_append_query_info(brw);
+      fill_null_renderer_perf_query_info(brw, null_query);
    }
 
    brw->perfquery.unaccumulated =
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index f6acf81b899..546d103d1a4 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -128,7 +128,7 @@ void brw_disk_cache_write_compute_program(struct brw_context *brw);
 void brw_disk_cache_write_render_programs(struct brw_context *brw);
 
 /***********************************************************************
- * brw_state.c
+ * brw_state_upload.c
  */
 void brw_upload_render_state(struct brw_context *brw);
 void brw_render_state_finished(struct brw_context *brw);
@@ -138,6 +138,7 @@ void brw_init_state(struct brw_context *brw);
 void brw_destroy_state(struct brw_context *brw);
 void brw_emit_select_pipeline(struct brw_context *brw,
                               enum brw_pipeline pipeline);
+void brw_enable_obj_preemption(struct brw_context *brw, bool enable);
 
 static inline void
 brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 757426407c3..af48942927f 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -45,6 +45,28 @@
 #include "brw_cs.h"
 #include "main/framebuffer.h"
 
+void
+brw_enable_obj_preemption(struct brw_context *brw, bool enable)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen >= 9);
+
+   if (enable == brw->object_preemption)
+      return;
+
+   /* A fixed function pipe flush is required before modifying this field */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
+
+   bool replay_mode = enable ?
+      GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER;
+
+   /* enable object level preemption */
+   brw_load_register_imm32(brw, CS_CHICKEN1,
+                           replay_mode | GEN9_REPLAY_MODE_MASK);
+
+   brw->object_preemption = enable;
+}
+
 static void
 brw_upload_initial_gpu_state(struct brw_context *brw)
 {
@@ -135,6 +157,9 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
          ADVANCE_BATCH();
       }
    }
+
+   if (devinfo->gen >= 10)
+      brw_enable_obj_preemption(brw, true);
 }
 
 static inline const struct brw_tracked_state *
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 9397b637c79..12a3e3d7a6c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -603,7 +603,8 @@ static void brw_update_texture_surface(struct gl_context *ctx,
          view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 
       enum isl_aux_usage aux_usage =
-         intel_miptree_texture_aux_usage(brw, mt, format);
+         intel_miptree_texture_aux_usage(brw, mt, format,
+                                         brw->gen9_astc5x5_wa_tex_mask);
 
       brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
                              surf_offset, surf_index,
@@ -1107,7 +1108,8 @@ update_renderbuffer_read_surfaces(struct brw_context *brw)
             };
 
             enum isl_aux_usage aux_usage =
-               intel_miptree_texture_aux_usage(brw, irb->mt, format);
+               intel_miptree_texture_aux_usage(brw, irb->mt, format,
+                                               brw->gen9_astc5x5_wa_tex_mask);
             if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE)
                aux_usage = ISL_AUX_USAGE_NONE;
 
diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
index bfa84fb9b77..3ce47f423ba 100644
--- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
@@ -68,10 +68,10 @@ gen6_get_sample_position(struct gl_context *ctx,
  * index layout in case of 2X and 4x MSAA, but they are different in
  * case of 8X MSAA.
  *
- * 2X MSAA sample index / number layout
- *           ---------
- *           | 0 | 1 |
- *           ---------
+ * 8X MSAA sample index layout    8x MSAA sample number layout
+ *           ---------                      ---------
+ *           | 0 | 1 |                      | 1 | 0 |
+ *           ---------                      ---------
  *
  * 4X MSAA sample index / number layout
  *           ---------
@@ -107,7 +107,7 @@ gen6_get_sample_position(struct gl_context *ctx,
 void
 gen6_set_sample_maps(struct gl_context *ctx)
 {
-   uint8_t map_2x[2] = {0, 1};
+   uint8_t map_2x[2] = {1, 0};
    uint8_t map_4x[4] = {0, 1, 2, 3};
    uint8_t map_8x[8] = {3, 7, 5, 0, 1, 2, 4, 6};
    uint8_t map_16x[16] = { 15, 10, 9, 7, 4, 1, 3, 13,
diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c
index 2e5f8e60ba9..e7259fc1b8d 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -118,6 +118,33 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    unsigned offset = 0;
 
+   /* From the SKL PRM, Workarounds section (#878):
+    *
+    *    Push constant buffer corruption possible. WA: Insert 2 zero-length
+    *    PushConst_PS before every intended PushConst_PS update, issue a
+    *    NULLPRIM after each of the zero len PC update to make sure CS commits
+    *    them.
+    *
+    * This workaround is attempting to solve a pixel shader push constant
+    * synchronization issue.
+    *
+    * There's an unpublished WA that involves re-emitting
+    * 3DSTATE_PUSH_CONSTANT_ALLOC_PS for every 500-ish 3DSTATE_CONSTANT_PS
+    * packets. Since our counting methods may not be reliable due to
+    * context-switching and pre-emption, we instead choose to approximate this
+    * behavior by re-emitting the packet at the top of the batch.
+    */
+   if (brw->ctx.NewDriverState == BRW_NEW_BATCH) {
+       /* SKL GT2 and GLK 2x6 have reliably demonstrated this issue thus far.
+        * We've also seen some intermittent failures from SKL GT4 and BXT in
+        * the past.
+        */
+      if (!devinfo->is_skylake &&
+          !devinfo->is_broxton &&
+          !devinfo->is_geminilake)
+         return;
+   }
+
    BEGIN_BATCH(10);
    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
    OUT_BATCH(vs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
@@ -154,6 +181,7 @@ const struct brw_tracked_state gen7_push_constant_space = {
    .dirty = {
       .mesa = 0,
       .brw = BRW_NEW_CONTEXT |
+             BRW_NEW_BATCH | /* Push constant workaround */
              BRW_NEW_GEOMETRY_PROGRAM |
              BRW_NEW_TESS_PROGRAMS,
    },
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index ea5ad55be59..a54a46ed6b9 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -2806,7 +2806,7 @@ genX(upload_gs_state)(struct brw_context *brw)
 #if GEN_GEN < 7
          gs.SOStatisticsEnable = true;
          if (gs_prog->info.has_transform_feedback_varyings)
-            gs.SVBIPayloadEnable = true;
+            gs.SVBIPayloadEnable = _mesa_is_xfb_active_and_unpaused(ctx);
 
          /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
           * was previously done for gen6.
@@ -3042,7 +3042,26 @@ set_blend_entry_bits(struct brw_context *brw, BLEND_ENTRY_GENXML *entry, int i,
          dstA = fix_dual_blend_alpha_to_one(dstA);
       }
 
-      entry->ColorBufferBlendEnable = true;
+      /* BRW_NEW_FS_PROG_DATA */
+      const struct brw_wm_prog_data *wm_prog_data =
+         brw_wm_prog_data(brw->wm.base.prog_data);
+
+      /* The Dual Source Blending documentation says:
+       *
+       * "If SRC1 is included in a src/dst blend factor and
+       * a DualSource RT Write message is not used, results
+       * are UNDEFINED. (This reflects the same restriction in DX APIs,
+       * where undefined results are produced if “o1” is not written
+       * by a PS – there are no default values defined).
+       * If SRC1 is not included in a src/dst blend factor,
+       * dual source blending must be disabled."
+       *
+       * There is no way to gracefully fix this undefined situation
+       * so we just disable the blending to prevent possible issues.
+       */
+      entry->ColorBufferBlendEnable =
+         !ctx->Color.Blend[0]._UsesDualSrc || wm_prog_data->dual_src_blend;
+
       entry->DestinationBlendFactor = blend_factor(dstRGB);
       entry->SourceBlendFactor = blend_factor(srcRGB);
       entry->DestinationAlphaBlendFactor = blend_factor(dstA);
@@ -3188,6 +3207,7 @@ static const struct brw_tracked_state genX(blend_state) = {
               _NEW_MULTISAMPLE,
       .brw = BRW_NEW_BATCH |
              BRW_NEW_BLORP |
+             BRW_NEW_FS_PROG_DATA |
              BRW_NEW_STATE_BASE_ADDRESS,
    },
    .emit = genX(upload_blend_state),
@@ -4814,7 +4834,25 @@ genX(upload_ps_blend)(struct brw_context *brw)
             dstA = fix_dual_blend_alpha_to_one(dstA);
          }
 
-         pb.ColorBufferBlendEnable = true;
+         /* BRW_NEW_FS_PROG_DATA */
+         const struct brw_wm_prog_data *wm_prog_data =
+            brw_wm_prog_data(brw->wm.base.prog_data);
+
+         /* The Dual Source Blending documentation says:
+          *
+          * "If SRC1 is included in a src/dst blend factor and
+          * a DualSource RT Write message is not used, results
+          * are UNDEFINED. (This reflects the same restriction in DX APIs,
+          * where undefined results are produced if “o1” is not written
+          * by a PS – there are no default values defined).
+          * If SRC1 is not included in a src/dst blend factor,
+          * dual source blending must be disabled."
+          *
+          * There is no way to gracefully fix this undefined situation
+          * so we just disable the blending to prevent possible issues.
+          */
+         pb.ColorBufferBlendEnable =
+            !color->Blend[0]._UsesDualSrc || wm_prog_data->dual_src_blend;
          pb.SourceAlphaBlendFactor = brw_translate_blend_factor(srcA);
          pb.DestinationAlphaBlendFactor = brw_translate_blend_factor(dstA);
          pb.SourceBlendFactor = brw_translate_blend_factor(srcRGB);
@@ -4833,7 +4871,8 @@ static const struct brw_tracked_state genX(ps_blend) = {
               _NEW_MULTISAMPLE,
       .brw = BRW_NEW_BLORP |
              BRW_NEW_CONTEXT |
-             BRW_NEW_FRAGMENT_PROGRAM,
+             BRW_NEW_FRAGMENT_PROGRAM |
+             BRW_NEW_FS_PROG_DATA,
    },
    .emit = genX(upload_ps_blend)
 };
@@ -5548,6 +5587,50 @@ static const struct brw_tracked_state genX(blend_constant_color) = {
 
 /* ---------------------------------------------------------------------- */
 
+#if GEN_GEN == 9
+
+/**
+ * Implement workarounds for preemption:
+ *    - WaDisableMidObjectPreemptionForGSLineStripAdj
+ *    - WaDisableMidObjectPreemptionForTrifanOrPolygon
+ */
+static void
+gen9_emit_preempt_wa(struct brw_context *brw)
+{
+   /* WaDisableMidObjectPreemptionForGSLineStripAdj
+    *
+    *    WA: Disable mid-draw preemption when draw-call is a linestrip_adj and
+    *    GS is enabled.
+    */
+   bool object_preemption =
+      !(brw->primitive == _3DPRIM_LINESTRIP_ADJ && brw->gs.enabled);
+
+   /* WaDisableMidObjectPreemptionForTrifanOrPolygon
+    *
+    *    TriFan miscompare in Execlist Preemption test. Cut index that is on a
+    *    previous context. End the previous, the resume another context with a
+    *    tri-fan or polygon, and the vertex count is corrupted. If we prempt
+    *    again we will cause corruption.
+    *
+    *    WA: Disable mid-draw preemption when draw-call has a tri-fan.
+    */
+   object_preemption =
+      object_preemption && !(brw->primitive == _3DPRIM_TRIFAN);
+
+   brw_enable_obj_preemption(brw, object_preemption);
+}
+
+static const struct brw_tracked_state gen9_preempt_wa = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_PRIMITIVE | BRW_NEW_GEOMETRY_PROGRAM,
+   },
+   .emit = gen9_emit_preempt_wa,
+};
+#endif
+
+/* ---------------------------------------------------------------------- */
+
 void
 genX(init_atoms)(struct brw_context *brw)
 {
@@ -5852,6 +5935,9 @@ genX(init_atoms)(struct brw_context *brw)
 
       &genX(cut_index),
       &gen8_pma_fix,
+#if GEN_GEN == 9
+      &gen9_preempt_wa,
+#endif
    };
 #endif
 
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 65d2c64e319..82f0a89a61a 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -55,6 +55,8 @@
 
 static void
 intel_batchbuffer_reset(struct brw_context *brw);
+static void
+brw_new_batch(struct brw_context *brw);
 
 static void
 dump_validation_list(struct intel_batchbuffer *batch)
@@ -311,6 +313,8 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw)
    brw->batch.exec_count = brw->batch.saved.exec_count;
 
    brw->batch.map_next = brw->batch.saved.map_next;
+   if (USED_BATCH(brw->batch) == 0)
+      brw_new_batch(brw);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index e6825955b0e..4da540dee94 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -327,6 +327,35 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
    return intel_alloc_private_renderbuffer_storage(ctx, rb, internalFormat, width, height);
 }
 
+static mesa_format
+fallback_rgbx_to_rgba(struct intel_screen *screen, struct gl_renderbuffer *rb,
+                      mesa_format original_format)
+{
+   mesa_format format = original_format;
+
+   /* The base format and internal format must be derived from the user-visible
+    * format (that is, the gl_config's format), even if we internally use
+    * choose a different format for the renderbuffer. Otherwise, rendering may
+    * use incorrect channel write masks.
+    */
+   rb->_BaseFormat = _mesa_get_format_base_format(original_format);
+   rb->InternalFormat = rb->_BaseFormat;
+
+   if (!screen->mesa_format_supports_render[original_format]) {
+      /* The glRenderbufferStorage paths in core Mesa detect if the driver
+       * does not support the user-requested format, and then searches for
+       * a fallback format. The DRI code bypasses core Mesa, though. So we do
+       * the fallbacks here.
+       *
+       * We must support MESA_FORMAT_R8G8B8X8 on Android because the Android
+       * framework requires HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces.
+       */
+      format = _mesa_format_fallback_rgbx_to_rgba(original_format);
+      assert(screen->mesa_format_supports_render[format]);
+   }
+   return format;
+}
+
 static void
 intel_image_target_renderbuffer_storage(struct gl_context *ctx,
 					struct gl_renderbuffer *rb,
@@ -349,8 +378,13 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
       return;
    }
 
+   rb->Format = fallback_rgbx_to_rgba(brw->screen, rb, image->format);
+
+   mesa_format chosen_format = rb->Format == image->format ?
+      image->format : rb->Format;
+
    /* __DRIimage is opaque to the core so it has to be checked here */
-   if (!brw->mesa_format_supports_render[image->format]) {
+   if (!brw->mesa_format_supports_render[chosen_format]) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
             "glEGLImageTargetRenderbufferStorage(unsupported image format)");
       return;
@@ -365,15 +399,12 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
     * content.
     */
    irb->mt = intel_miptree_create_for_dri_image(brw, image, GL_TEXTURE_2D,
-                                                image->format, false);
+                                                rb->Format, false);
    if (!irb->mt)
       return;
 
-   rb->InternalFormat = image->internal_format;
    rb->Width = image->width;
    rb->Height = image->height;
-   rb->Format = image->format;
-   rb->_BaseFormat = _mesa_get_format_base_format(image->format);
    rb->NeedsFinishRenderTexture = true;
    irb->layer_count = 1;
 }
@@ -434,27 +465,7 @@ intel_create_winsys_renderbuffer(struct intel_screen *screen,
    rb->ClassID = INTEL_RB_CLASS;
    rb->NumSamples = num_samples;
 
-   /* The base format and internal format must be derived from the user-visible
-    * format (that is, the gl_config's format), even if we internally use
-    * choose a different format for the renderbuffer. Otherwise, rendering may
-    * use incorrect channel write masks.
-    */
-   rb->_BaseFormat = _mesa_get_format_base_format(format);
-   rb->InternalFormat = rb->_BaseFormat;
-
-   rb->Format = format;
-   if (!screen->mesa_format_supports_render[rb->Format]) {
-      /* The glRenderbufferStorage paths in core Mesa detect if the driver
-       * does not support the user-requested format, and then searches for
-       * a falback format. The DRI code bypasses core Mesa, though. So we do
-       * the fallbacks here.
-       *
-       * We must support MESA_FORMAT_R8G8B8X8 on Android because the Android
-       * framework requires HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces.
-       */
-      rb->Format = _mesa_format_fallback_rgbx_to_rgba(rb->Format);
-      assert(screen->mesa_format_supports_render[rb->Format]);
-   }
+   rb->Format = fallback_rgbx_to_rgba(screen, rb, format);
 
    /* intel-specific methods */
    rb->Delete = intel_delete_renderbuffer;
diff --git a/src/mesa/drivers/dri/i965/intel_image.h b/src/mesa/drivers/dri/i965/intel_image.h
index a8193c6def9..ca604159dc2 100644
--- a/src/mesa/drivers/dri/i965/intel_image.h
+++ b/src/mesa/drivers/dri/i965/intel_image.h
@@ -89,9 +89,6 @@ struct __DRIimageRec {
    GLuint tile_y;
    bool has_depthstencil;
 
-   /** The image was created with EGL_EXT_image_dma_buf_import. */
-   bool dma_buf_imported;
-
    /** Offset of the auxiliary compression surface in the bo. */
    uint32_t aux_offset;
 
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index a18d5ac3624..36681352ba7 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2542,8 +2542,19 @@ can_texture_with_ccs(struct brw_context *brw,
 enum isl_aux_usage
 intel_miptree_texture_aux_usage(struct brw_context *brw,
                                 struct intel_mipmap_tree *mt,
-                                enum isl_format view_format)
+                                enum isl_format view_format,
+                                enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits)
 {
+   assert(brw->screen->devinfo.gen == 9 || astc5x5_wa_bits == 0);
+
+   /* On gen9, ASTC 5x5 textures cannot live in the sampler cache along side
+    * CCS or HiZ compressed textures.  See gen9_apply_astc5x5_wa_flush() for
+    * details.
+    */
+   if ((astc5x5_wa_bits & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
+       mt->aux_usage != ISL_AUX_USAGE_MCS)
+      return ISL_AUX_USAGE_NONE;
+
    switch (mt->aux_usage) {
    case ISL_AUX_USAGE_HIZ:
       if (intel_miptree_sample_with_hiz(brw, mt))
@@ -2601,10 +2612,12 @@ intel_miptree_prepare_texture(struct brw_context *brw,
                               struct intel_mipmap_tree *mt,
                               enum isl_format view_format,
                               uint32_t start_level, uint32_t num_levels,
-                              uint32_t start_layer, uint32_t num_layers)
+                              uint32_t start_layer, uint32_t num_layers,
+                              enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits)
 {
    enum isl_aux_usage aux_usage =
-      intel_miptree_texture_aux_usage(brw, mt, view_format);
+      intel_miptree_texture_aux_usage(brw, mt, view_format, astc5x5_wa_bits);
+
    bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
 
    /* Clear color is specified as ints or floats and the conversion is done by
@@ -3542,21 +3555,18 @@ intel_miptree_release_map(struct intel_mipmap_tree *mt,
 
 static bool
 can_blit_slice(struct intel_mipmap_tree *mt,
-               unsigned int level, unsigned int slice)
+               const struct intel_miptree_map *map)
 {
    /* See intel_miptree_blit() for details on the 32k pitch limit. */
-   if (intel_miptree_blt_pitch(mt) >= 32768)
-      return false;
-
-   return true;
+   const unsigned src_blt_pitch = intel_miptree_blt_pitch(mt);
+   const unsigned dst_blt_pitch = ALIGN(map->w * mt->cpp, 64);
+   return src_blt_pitch < 32768 && dst_blt_pitch < 32768;
 }
 
 static bool
 use_intel_mipree_map_blit(struct brw_context *brw,
                           struct intel_mipmap_tree *mt,
-                          GLbitfield mode,
-                          unsigned int level,
-                          unsigned int slice)
+                          const struct intel_miptree_map *map)
 {
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
@@ -3564,19 +3574,19 @@ use_intel_mipree_map_blit(struct brw_context *brw,
       /* It's probably not worth swapping to the blit ring because of
        * all the overhead involved.
        */
-       !(mode & GL_MAP_WRITE_BIT) &&
+       !(map->mode & GL_MAP_WRITE_BIT) &&
        !mt->compressed &&
        (mt->surf.tiling == ISL_TILING_X ||
         /* Prior to Sandybridge, the blitter can't handle Y tiling */
         (devinfo->gen >= 6 && mt->surf.tiling == ISL_TILING_Y0) ||
         /* Fast copy blit on skl+ supports all tiling formats. */
         devinfo->gen >= 9) &&
-       can_blit_slice(mt, level, slice))
+       can_blit_slice(mt, map))
       return true;
 
    if (mt->surf.tiling != ISL_TILING_LINEAR &&
        mt->bo->size >= brw->max_gtt_map_object_size) {
-      assert(can_blit_slice(mt, level, slice));
+      assert(can_blit_slice(mt, map));
       return true;
    }
 
@@ -3625,7 +3635,7 @@ intel_miptree_map(struct brw_context *brw,
       intel_miptree_map_etc(brw, mt, map, level, slice);
    } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
       intel_miptree_map_depthstencil(brw, mt, map, level, slice);
-   } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
+   } else if (use_intel_mipree_map_blit(brw, mt, map)) {
       intel_miptree_map_blit(brw, mt, map, level, slice);
 #if defined(USE_SSE41)
    } else if (!(mode & GL_MAP_WRITE_BIT) &&
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index bb7df7ad235..08c129a4b8b 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -621,13 +621,15 @@ intel_miptree_access_raw(struct brw_context *brw,
 enum isl_aux_usage
 intel_miptree_texture_aux_usage(struct brw_context *brw,
                                 struct intel_mipmap_tree *mt,
-                                enum isl_format view_format);
+                                enum isl_format view_format,
+                                enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits);
 void
 intel_miptree_prepare_texture(struct brw_context *brw,
                               struct intel_mipmap_tree *mt,
                               enum isl_format view_format,
                               uint32_t start_level, uint32_t num_levels,
-                              uint32_t start_layer, uint32_t num_layers);
+                              uint32_t start_layer, uint32_t num_layers,
+                              enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits);
 void
 intel_miptree_prepare_image(struct brw_context *brw,
                             struct intel_mipmap_tree *mt);
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 6ed7895bc76..8a90b207add 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -181,7 +181,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
     * tiled_to_linear a negative pitch so that it walks through the
     * client's data backwards as it walks through the renderbufer forwards.
     */
-   if (rb->Name == 0) {
+   if (ctx->ReadBuffer->FlipY) {
       yoffset = rb->Height - yoffset - height;
       pixels += (ptrdiff_t) (height - 1) * dst_pitch;
       dst_pitch = -dst_pitch;
@@ -249,7 +249,7 @@ intel_readpixels_blorp(struct gl_context *ctx,
    return brw_blorp_download_miptree(brw, irb->mt, rb->Format, swizzle,
                                      irb->mt_level, x, y, irb->mt_layer,
                                      w, h, 1, GL_TEXTURE_2D, format, type,
-                                     rb->Name == 0, pixels, packing);
+                                     ctx->ReadBuffer->FlipY, pixels, packing);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index cb357419a77..2ceadd005b0 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -61,6 +61,33 @@ DRI_CONF_BEGIN
 	    DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
 	 DRI_CONF_DESC_END
       DRI_CONF_OPT_END
+
+      DRI_CONF_OPT_BEGIN_B(simd32_heuristic_grouped_check, "true")
+              DRI_CONF_DESC(en, "Enable/disable grouped texture fetch "
+                            "check in the SIMD32 selection heuristic.")
+      DRI_CONF_OPT_END
+      DRI_CONF_OPT_BEGIN_V(simd32_heuristic_grouped_sends, int, 6, "1:999")
+             DRI_CONF_DESC(en, "How many grouped texture fetches should "
+                            "the SIMD32 selection heuristic allow.")
+      DRI_CONF_OPT_END
+      DRI_CONF_OPT_BEGIN_B(simd32_heuristic_inst_check, "true")
+              DRI_CONF_DESC(en, "Enable/disable SIMD32/SIMD16 instruction "
+                            "count ratio check in the SIMD32 selection "
+                            "heuristic.")
+      DRI_CONF_OPT_END
+      DRI_CONF_OPT_BEGIN_V(simd32_heuristic_inst_ratio, float, 2.3, "1:999")
+              DRI_CONF_DESC(en, "SIMD32/SIMD16 instruction count ratio "
+                            "the SIMD32 selection heuristic should allow.")
+      DRI_CONF_OPT_END
+      DRI_CONF_OPT_BEGIN_B(simd32_heuristic_mrt_check, "true")
+              DRI_CONF_DESC(en, "Enable/disable MRT write check in the "
+                            "SIMD32 selection heuristic.")
+      DRI_CONF_OPT_END
+      DRI_CONF_OPT_BEGIN_V(simd32_heuristic_max_mrts, int, 1, "1:8")
+              DRI_CONF_DESC(en, "How many MRT writes should the SIMD32 "
+                            "selection heuristic allow.")
+      DRI_CONF_OPT_END
+
       DRI_CONF_MESA_NO_ERROR("false")
    DRI_CONF_SECTION_END
 
@@ -283,6 +310,18 @@ static const struct intel_image_format intel_image_formats[] = {
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
        { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } },
 
+   { __DRI_IMAGE_FOURCC_P010, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 },
+       { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } },
+
+   { __DRI_IMAGE_FOURCC_P012, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 },
+       { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } },
+
+   { __DRI_IMAGE_FOURCC_P016, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 },
+       { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } },
+
    { __DRI_IMAGE_FOURCC_NV16, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
        { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } },
@@ -958,7 +997,6 @@ intel_dup_image(__DRIimage *orig_image, void *loaderPrivate)
    image->tile_y          = orig_image->tile_y;
    image->has_depthstencil = orig_image->has_depthstencil;
    image->data            = loaderPrivate;
-   image->dma_buf_imported = orig_image->dma_buf_imported;
    image->aux_offset      = orig_image->aux_offset;
    image->aux_pitch       = orig_image->aux_pitch;
 
@@ -1238,7 +1276,6 @@ intel_create_image_from_dma_bufs2(__DRIscreen *dri_screen,
       return NULL;
    }
 
-   image->dma_buf_imported = true;
    image->yuv_color_space = yuv_color_space;
    image->sample_range = sample_range;
    image->horizontal_siting = horizontal_siting;
@@ -1275,9 +1312,9 @@ static bool
 intel_image_format_is_supported(const struct gen_device_info *devinfo,
                                 const struct intel_image_format *fmt)
 {
-   if (fmt->fourcc == __DRI_IMAGE_FOURCC_SARGB8888 ||
-       fmt->fourcc == __DRI_IMAGE_FOURCC_SABGR8888)
-      return false;
+   /* Currently, all formats with an intel_image_format are available on all
+    * platforms so there's really nothing to check there.
+    */
 
 #ifndef NDEBUG
    if (fmt->nplanes == 1) {
@@ -1303,6 +1340,14 @@ intel_query_dma_buf_formats(__DRIscreen *_screen, int max,
    int num_formats = 0, i;
 
    for (i = 0; i < ARRAY_SIZE(intel_image_formats); i++) {
+      /* These two formats are valid DRI formats but do not exist in
+       * drm_fourcc.h in the Linux kernel.  We don't want to accidentally
+       * advertise them through the EGL layer.
+       */
+      if (intel_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SARGB8888 ||
+          intel_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SABGR8888)
+         continue;
+
       if (!intel_image_format_is_supported(&screen->devinfo,
                                            &intel_image_formats[i]))
          continue;
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 3d948381f4a..98c92ccfba7 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -613,16 +613,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
    if (image == NULL)
       return;
 
-   /* We support external textures only for EGLImages created with
-    * EGL_EXT_image_dma_buf_import. We may lift that restriction in the future.
-    */
-   if (target == GL_TEXTURE_EXTERNAL_OES && !image->dma_buf_imported) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-            "glEGLImageTargetTexture2DOES(external target is enabled only "
-               "for images created with EGL_EXT_image_dma_buf_import");
-      return;
-   }
-
    /* Disallow depth/stencil textures: we don't have a way to pass the
     * separate stencil miptree of a GL_DEPTH_STENCIL texture through.
     */
diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build
index 761bb51d6fa..fe6a5ad55d1 100644
--- a/src/mesa/drivers/dri/i965/meson.build
+++ b/src/mesa/drivers/dri/i965/meson.build
@@ -142,7 +142,7 @@ foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110']
     ['genX_blorp_exec.c', 'genX_state_upload.c', gen_xml_pack],
     include_directories : [inc_common, inc_intel, inc_dri_common],
     c_args : [
-      c_vis_args, no_override_init_args, '-msse2',
+      c_vis_args, no_override_init_args, c_sse2_args,
       '-DGEN_VERSIONx10=@0@'.format(v),
     ],
     dependencies : [dep_libdrm, idep_nir_headers],
@@ -183,8 +183,8 @@ libi965 = static_library(
   include_directories : [
     inc_common, inc_intel, inc_dri_common, inc_util, inc_drm_uapi,
   ],
-  c_args : [c_vis_args, no_override_init_args, '-msse2'],
-  cpp_args : [cpp_vis_args, '-msse2'],
+  c_args : [c_vis_args, no_override_init_args, c_sse2_args],
+  cpp_args : [cpp_vis_args, c_sse2_args],
   link_with : [
     i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler,
     libblorp,
diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
index d53225d63ab..b4cff8c2592 100644
--- a/src/mesa/drivers/dri/r200/r200_state.c
+++ b/src/mesa/drivers/dri/r200/r200_state.c
@@ -688,10 +688,10 @@ static void r200ColorMask( struct gl_context *ctx,
    if (!rrb)
      return;
    mask = radeonPackColor( rrb->cpp,
-			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 0),
-			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 1),
-			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 2),
-			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 3) );
+			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 0)*0xFF,
+			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 1)*0xFF,
+			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 2)*0xFF,
+			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 3)*0xFF );
 
 
    if (!(r && g && b && a))
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
index 8b72c98a3b2..410a78fc084 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -503,10 +503,10 @@ static void radeonColorMask( struct gl_context *ctx,
      return;
 
    mask = radeonPackColor( rrb->cpp,
-			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 0),
-			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 1),
-			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 2),
-			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 3) );
+			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 0)*0xFF,
+			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 1)*0xFF,
+			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 2)*0xFF,
+			   GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 3)*0xFF );
 
    if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
       RADEON_STATECHANGE( rmesa, msk );
diff --git a/src/mesa/gl.pc.in b/src/mesa/gl.pc.in
index 181724b97bf..680f7427768 100644
--- a/src/mesa/gl.pc.in
+++ b/src/mesa/gl.pc.in
@@ -7,7 +7,7 @@ Name: gl
 Description: Mesa OpenGL library
 Requires.private: @GL_PC_REQ_PRIV@
 Version: @PACKAGE_VERSION@
-Libs: -L${libdir} -l@GL_LIB@
+Libs: -L${libdir} -l@GL_PKGCONF_LIB@
 Libs.private: @GL_PC_LIB_PRIV@
 Cflags: -I${includedir} @GL_PC_CFLAGS@
 glx_tls: @GLX_TLS@
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 25c3161f7d0..4d95a072793 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -335,6 +335,30 @@ _mesa_extension_supported(const struct gl_context *ctx, extension_index i)
    return (ctx->Version >= ext->version[ctx->API]) && base[ext->offset];
 }
 
+/**
+ * Compare two entries of the extensions table.  Sorts first by year,
+ * then by name.
+ *
+ * Arguments are indices into _mesa_extension_table.
+ */
+static int
+extension_compare(const void *p1, const void *p2)
+{
+   extension_index i1 = * (const extension_index *) p1;
+   extension_index i2 = * (const extension_index *) p2;
+   const struct mesa_extension *e1 = &_mesa_extension_table[i1];
+   const struct mesa_extension *e2 = &_mesa_extension_table[i2];
+   int res;
+
+   res = (int)e1->year - (int)e2->year;
+
+   if (res == 0) {
+      res = strcmp(e1->name, e2->name);
+   }
+
+   return res;
+}
+
 
 /**
  * Construct the GL_EXTENSIONS string.  Called the first time that
@@ -372,8 +396,8 @@ _mesa_make_extension_string(struct gl_context *ctx)
 
       if (i->year <= maxYear &&
           _mesa_extension_supported(ctx, k)) {
-         length += strlen(i->name) + 1; /* +1 for space */
-         extension_indices[count++] = k;
+	 length += strlen(i->name) + 1; /* +1 for space */
+	 ++count;
       }
    }
    for (k = 0; k < MAX_UNRECOGNIZED_EXTENSIONS; k++)
@@ -385,6 +409,24 @@ _mesa_make_extension_string(struct gl_context *ctx)
       return NULL;
    }
 
+   /* Sort extensions in chronological order because idTech 2/3 games
+    * (e.g., Quake3 demo) store the extension list in a fixed size buffer.
+    * Some cases truncate, while others overflow the buffer. Resulting in
+    * misrendering and crashes, respectively.
+    * Address the former here, while the latter will be addressed by setting
+    * the MESA_EXTENSION_MAX_YEAR environment variable.
+    */
+   j = 0;
+   for (k = 0; k < MESA_EXTENSION_COUNT; ++k) {
+      if (_mesa_extension_table[k].year <= maxYear &&
+         _mesa_extension_supported(ctx, k)) {
+         extension_indices[j++] = k;
+      }
+   }
+   assert(j == count);
+   qsort(extension_indices, count,
+         sizeof *extension_indices, extension_compare);
+
    /* Build the extension string.*/
    for (j = 0; j < count; ++j) {
       const struct mesa_extension *i = &_mesa_extension_table[extension_indices[j]];
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index cfe2174ef12..a9400d5d5ad 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1434,6 +1434,7 @@ framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb,
       if (!ctx->Extensions.MESA_framebuffer_flip_y)
          goto invalid_pname_enum;
       cannot_be_winsys_fbo = true;
+      break;
    default:
       goto invalid_pname_enum;
    }
diff --git a/src/mesa/main/multisample.c b/src/mesa/main/multisample.c
index f93a18832da..d30e5c92442 100644
--- a/src/mesa/main/multisample.c
+++ b/src/mesa/main/multisample.c
@@ -94,8 +94,8 @@ _mesa_GetMultisamplefv(GLenum pname, GLuint index, GLfloat * val)
 
       ctx->Driver.GetSamplePosition(ctx, ctx->DrawBuffer, index, val);
 
-      /* winsys FBOs are upside down */
-      if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
+      /* FBOs can be upside down (winsys always are)*/
+      if (ctx->DrawBuffer->FlipY)
          val[1] = 1.0f - val[1];
 
       return;
diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index e8c28d86162..a2c0d2c3eca 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -922,6 +922,8 @@ read_pixels_es3_error_check(struct gl_context *ctx, GLenum format, GLenum type,
    case GL_RGBA:
       if (type == GL_FLOAT && data_type == GL_FLOAT)
          return GL_NO_ERROR; /* EXT_color_buffer_float */
+      if (type == GL_HALF_FLOAT && data_type == GL_FLOAT)
+         return GL_NO_ERROR;
       if (type == GL_UNSIGNED_BYTE && data_type == GL_UNSIGNED_NORMALIZED)
          return GL_NO_ERROR;
       if (internalFormat == GL_RGB10_A2 &&
diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index b5d86d64d5b..a3ec7241986 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1426,6 +1426,11 @@ get_tex_level_parameter_image(struct gl_context *ctx,
                               _mesa_get_format_bits(texFormat,
                                                     GL_TEXTURE_GREEN_SIZE));
             }
+            if (*params == 0 && pname == GL_TEXTURE_INTENSITY_SIZE) {
+               /* Gallium may store intensity as LA */
+               *params = _mesa_get_format_bits(texFormat, 
+                                               GL_TEXTURE_ALPHA_SIZE);
+            }
          }
          else {
             *params = 0;
diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk
index c6470e6289e..13d0da85882 100644
--- a/src/mesa/program/Android.mk
+++ b/src/mesa/program/Android.mk
@@ -41,7 +41,7 @@ endef
 include $(MESA_TOP)/src/mesa/Makefile.sources
 
 include $(CLEAR_VARS)
-
+LOCAL_CFLAGS += -Wno-error
 LOCAL_MODULE := libmesa_program
 LOCAL_MODULE_CLASS := STATIC_LIBRARIES
 LOCAL_STATIC_LIBRARIES := libmesa_nir \
diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c
index 4d7f388cfb0..3bbe451399f 100644
--- a/src/mesa/program/prog_statevars.c
+++ b/src/mesa/program/prog_statevars.c
@@ -571,7 +571,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index16 state[],
       case STATE_FB_WPOS_Y_TRANSFORM:
          /* A driver may negate this conditional by using ZW swizzle
           * instead of XY (based on e.g. some other state). */
-         if (_mesa_is_user_fbo(ctx->DrawBuffer)) {
+         if (!ctx->DrawBuffer->FlipY) {
             /* Identity (XY) followed by flipping Y upside down (ZW). */
             value[0] = 1.0F;
             value[1] = 0.0F;
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c
index 6455e612e4e..fa147b89688 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -92,7 +92,7 @@ st_upload_constants(struct st_context *st, struct gl_program *prog)
    /* update constants */
    if (params && params->NumParameters) {
       struct pipe_constant_buffer cb;
-      const uint paramBytes = params->NumParameters * sizeof(GLfloat) * 4;
+      const uint paramBytes = params->NumParameterValues * sizeof(GLfloat);
 
       /* Update the constants which come from fixed-function state, such as
        * transformation matrices, fog factors, etc.  The rest of the values in
diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c
index 289856cd72d..27e4da31581 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -163,6 +163,9 @@ st_convert_sampler(const struct st_context *st,
       const GLboolean is_integer = texobj->_IsIntegerFormat;
       GLenum texBaseFormat = _mesa_base_tex_image(texobj)->_BaseFormat;
 
+      if (texobj->StencilSampling)
+         texBaseFormat = GL_STENCIL_INDEX;
+
       if (st->apply_texture_swizzle_to_border_color) {
          const struct st_texture_object *stobj = st_texture_object_const(texobj);
          /* XXX: clean that up to not use the sampler view at all */
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 5a331f841de..a05e264476a 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -566,7 +566,11 @@ make_texture(struct st_context *st,
       dest = pipe_transfer_map(pipe, pt, 0, 0,
                                PIPE_TRANSFER_WRITE, 0, 0,
                                width, height, &transfer);
-
+      if (!dest) {
+         pipe_resource_reference(&pt, NULL);
+         _mesa_unmap_pbo_source(ctx, unpack);
+         return NULL;
+      }
 
       /* Put image into texture transfer.
        * Note that the image is actually going to be upside down in
@@ -1173,6 +1177,13 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
       return;
    }
 
+   /* Put glDrawPixels image into a texture */
+   pt = make_texture(st, width, height, format, type, unpack, pixels);
+   if (!pt) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
+      return;
+   }
+
    /*
     * Get vertex/fragment shaders
     */
@@ -1199,13 +1210,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
       st_upload_constants(st, &st->fp->Base);
    }
 
-   /* Put glDrawPixels image into a texture */
-   pt = make_texture(st, width, height, format, type, unpack, pixels);
-   if (!pt) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
-      return;
-   }
-
    /* create sampler view for the image */
    sv[0] = st_create_texture_sampler_view(st->pipe, pt);
    if (!sv[0]) {
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index dbaf7f6f8fe..c6d9731bb82 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -1229,7 +1229,7 @@ void st_init_extensions(struct pipe_screen *screen,
        screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_UNORM,
                                    PIPE_TEXTURE_2D, 0, 0,
                                    PIPE_BIND_SAMPLER_VIEW) &&
-       screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_SRGB,
+       screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_SRGB,
                                    PIPE_TEXTURE_2D, 0, 0,
                                    PIPE_BIND_SAMPLER_VIEW) &&
        screen->is_format_supported(screen, PIPE_FORMAT_R16_UNORM,
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index ae2c49960c9..83620fb3f83 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -388,7 +388,7 @@ st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
          ~prev_stages & shader_program->data->linked_stages;
 
       nir->info.next_stage = stages_mask ?
-         (gl_shader_stage) ffs(stages_mask) : MESA_SHADER_FRAGMENT;
+         (gl_shader_stage) u_bit_scan(&stages_mask) : MESA_SHADER_FRAGMENT;
    } else {
       nir->info.next_stage = MESA_SHADER_FRAGMENT;
    }
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index aec53309172..44a08901b81 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1227,6 +1227,10 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    st_src_reg a, b, c;
    st_dst_reg result_dst;
 
+   // there is no TGSI opcode for this
+   if (ir->type->is_integer_64())
+      return false;
+
    ir_expression *expr = ir->operands[mul_operand]->as_expression();
    if (!expr || expr->operation != ir_binop_mul)
       return false;
@@ -6072,6 +6076,34 @@ compile_tgsi_instruction(struct st_translate *t,
    }
 }
 
+/* Invert SamplePos.y when rendering to the default framebuffer. */
+static void
+emit_samplepos_adjustment(struct st_translate *t, int wpos_y_transform)
+{
+   struct ureg_program *ureg = t->ureg;
+
+   assert(wpos_y_transform >= 0);
+   struct ureg_src trans_const = ureg_DECL_constant(ureg, wpos_y_transform);
+   struct ureg_src samplepos_sysval = t->systemValues[SYSTEM_VALUE_SAMPLE_POS];
+   struct ureg_dst samplepos_flipped = ureg_DECL_temporary(ureg);
+   struct ureg_dst is_fbo = ureg_DECL_temporary(ureg);
+
+   ureg_ADD(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y),
+            ureg_imm1f(ureg, 1), ureg_negate(samplepos_sysval));
+
+   /* If trans.x == 1, use samplepos.y, else use 1 - samplepos.y. */
+   ureg_FSEQ(ureg, ureg_writemask(is_fbo, TGSI_WRITEMASK_Y),
+             ureg_scalar(trans_const, TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1));
+   ureg_UCMP(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y),
+             ureg_src(is_fbo), samplepos_sysval, ureg_src(samplepos_flipped));
+   ureg_MOV(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_X),
+            samplepos_sysval);
+
+   /* Use the result in place of the system value. */
+   t->systemValues[SYSTEM_VALUE_SAMPLE_POS] = ureg_src(samplepos_flipped);
+}
+
+
 /**
  * Emit the TGSI instructions for inverting and adjusting WPOS.
  * This code is unavoidable because it also depends on whether
@@ -6639,6 +6671,10 @@ st_translate_program(
                emit_wpos(st_context(ctx), t, proginfo, ureg,
                          program->wpos_transform_const);
 
+            if (procType == PIPE_SHADER_FRAGMENT &&
+                semName == TGSI_SEMANTIC_SAMPLEPOS)
+               emit_samplepos_adjustment(t, program->wpos_transform_const);
+
             sysInputs &= ~(1ull << i);
          }
       }
@@ -6940,7 +6976,8 @@ get_mesa_program_tgsi(struct gl_context *ctx,
    /* This must be done before the uniform storage is associated. */
    if (shader->Stage == MESA_SHADER_FRAGMENT &&
        (prog->info.inputs_read & VARYING_BIT_POS ||
-        prog->info.system_values_read & (1ull << SYSTEM_VALUE_FRAG_COORD))) {
+        prog->info.system_values_read & (1ull << SYSTEM_VALUE_FRAG_COORD) ||
+        prog->info.system_values_read & (1ull << SYSTEM_VALUE_SAMPLE_POS))) {
       static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
          STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
       };
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 69286b57916..9ed316b0f78 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -834,6 +834,7 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
    struct st_context *shared_ctx = (struct st_context *) shared_stctxi;
    struct st_context *st;
    struct pipe_context *pipe;
+   struct gl_config* mode_ptr;
    struct gl_config mode;
    gl_api api;
    bool no_error = false;
@@ -893,7 +894,13 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
    }
 
    st_visual_to_context_mode(&attribs->visual, &mode);
-   st = st_create_context(api, pipe, &mode, shared_ctx,
+
+   if (attribs->visual.no_config)
+      mode_ptr = NULL;
+   else
+      mode_ptr = &mode;
+
+   st = st_create_context(api, pipe, mode_ptr, shared_ctx,
                           &attribs->options, no_error);
    if (!st) {
       *error = ST_CONTEXT_ERROR_NO_MEMORY;
diff --git a/src/mesa/tnl/t_split_copy.c b/src/mesa/tnl/t_split_copy.c
index cbb7eb409f9..085ae9a28c9 100644
--- a/src/mesa/tnl/t_split_copy.c
+++ b/src/mesa/tnl/t_split_copy.c
@@ -531,7 +531,7 @@ replay_init(struct copy_context *copy)
    for (offset = 0, i = 0; i < copy->nr_varying; i++) {
       const struct tnl_vertex_array *src = copy->varying[i].array;
       const struct gl_array_attributes *srcattr = src->VertexAttrib;
-      struct tnl_vertex_array *dst = &copy->dstarray[i];
+      struct tnl_vertex_array *dst = &copy->dstarray[copy->varying[i].attr];
       struct gl_vertex_buffer_binding *dstbind = &copy->varying[i].dstbinding;
       struct gl_array_attributes *dstattr = &copy->varying[i].dstattribs;
 
diff --git a/src/util/Android.mk b/src/util/Android.mk
index 9b6144268e6..7525ea1f5d9 100644
--- a/src/util/Android.mk
+++ b/src/util/Android.mk
@@ -41,8 +41,14 @@ LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/src/gallium/include \
 	$(MESA_TOP)/src/gallium/auxiliary
 
+# If Android version >=8 MESA should static link libexpat else should dynamic link
+ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0)
+LOCAL_STATIC_LIBRARIES := \
+	libexpat
+else
 LOCAL_SHARED_LIBRARIES := \
 	libexpat
+endif
 
 LOCAL_MODULE := libmesa_util
 
diff --git a/src/util/Makefile.am b/src/util/Makefile.am
index bafb57439ab..1e143083374 100644
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -59,7 +59,8 @@ libmesautil_la_LIBADD = \
 	$(PTHREAD_LIBS) \
 	$(CLOCK_LIB) \
 	$(ZLIB_LIBS) \
-	$(LIBATOMIC_LIBS)
+	$(LIBATOMIC_LIBS) \
+	-lm
 
 libxmlconfig_la_SOURCES = $(XMLCONFIG_FILES)
 libxmlconfig_la_CFLAGS = \
diff --git a/src/util/bitscan.h b/src/util/bitscan.h
index dc89ac93f28..cdfecafaf01 100644
--- a/src/util/bitscan.h
+++ b/src/util/bitscan.h
@@ -112,6 +112,31 @@ u_bit_scan64(uint64_t *mask)
    return i;
 }
 
+/* Count bits set in mask */
+static inline int
+u_count_bits(unsigned *mask)
+{
+   unsigned v = *mask;
+   int c;
+   v = v - ((v >> 1) & 0x55555555);
+   v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
+   v = (v + (v >> 4)) & 0xF0F0F0F;
+   c = (int)((v * 0x1010101) >> 24);
+   return c;
+}
+
+static inline int
+u_count_bits64(uint64_t *mask)
+{
+   uint64_t v = *mask;
+   int c;
+   v = v - ((v >> 1) & 0x5555555555555555ull);
+   v = (v & 0x3333333333333333ull) + ((v >> 2) & 0x3333333333333333ull);
+   v = (v + (v >> 4)) & 0xF0F0F0F0F0F0F0Full;
+   c = (int)((v * 0x101010101010101ull) >> 56);
+   return c;
+}
+
 /* Determine if an unsigned value is a power of two.
  *
  * \note
diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 87ddfb86b27..368ec417927 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -189,7 +189,7 @@ do {                                       \
 } while (0);
 
 struct disk_cache *
-disk_cache_create(const char *gpu_name, const char *timestamp,
+disk_cache_create(const char *gpu_name, const char *driver_id,
                   uint64_t driver_flags)
 {
    void *local;
@@ -387,9 +387,9 @@ disk_cache_create(const char *gpu_name, const char *timestamp,
    cache->driver_keys_blob_size = cv_size;
 
    /* Create driver id keys */
-   size_t ts_size = strlen(timestamp) + 1;
+   size_t id_size = strlen(driver_id) + 1;
    size_t gpu_name_size = strlen(gpu_name) + 1;
-   cache->driver_keys_blob_size += ts_size;
+   cache->driver_keys_blob_size += id_size;
    cache->driver_keys_blob_size += gpu_name_size;
 
    /* We sometimes store entire structs that contains a pointers in the cache,
@@ -409,7 +409,7 @@ disk_cache_create(const char *gpu_name, const char *timestamp,
 
    uint8_t *drv_key_blob = cache->driver_keys_blob;
    DRV_KEY_CPY(drv_key_blob, &cache_version, cv_size)
-   DRV_KEY_CPY(drv_key_blob, timestamp, ts_size)
+   DRV_KEY_CPY(drv_key_blob, driver_id, id_size)
    DRV_KEY_CPY(drv_key_blob, gpu_name, gpu_name_size)
    DRV_KEY_CPY(drv_key_blob, &ptr_size, ptr_size_size)
    DRV_KEY_CPY(drv_key_blob, &driver_flags, driver_flags_size)
diff --git a/src/util/disk_cache.h b/src/util/disk_cache.h
index 50bd9f41ac4..2a147cba615 100644
--- a/src/util/disk_cache.h
+++ b/src/util/disk_cache.h
@@ -26,11 +26,14 @@
 
 #ifdef HAVE_DLFCN_H
 #include <dlfcn.h>
+#include <stdio.h>
+#include "util/build_id.h"
 #endif
 #include <assert.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <sys/stat.h>
+#include "util/mesa-sha1.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -100,7 +103,33 @@ disk_cache_get_function_timestamp(void *ptr, uint32_t* timestamp)
    if (stat(info.dli_fname, &st)) {
       return false;
    }
+
+   if (!st.st_mtime) {
+      fprintf(stderr, "Mesa: The provided filesystem timestamp for the cache "
+              "is bogus! Disabling On-disk cache.\n");
+      return false;
+   }
+
    *timestamp = st.st_mtime;
+
+   return true;
+}
+
+static inline bool
+disk_cache_get_function_identifier(void *ptr, struct mesa_sha1 *ctx)
+{
+   uint32_t timestamp;
+
+#ifdef HAVE_DL_ITERATE_PHDR
+   const struct build_id_note *note = NULL;
+   if ((note = build_id_find_nhdr_for_addr(ptr))) {
+      _mesa_sha1_update(ctx, build_id_data(note), build_id_length(note));
+   } else
+#endif
+   if (disk_cache_get_function_timestamp(ptr, &timestamp)) {
+      _mesa_sha1_update(ctx, &timestamp, sizeof(timestamp));
+   } else
+      return false;
    return true;
 }
 #endif
diff --git a/src/util/drirc b/src/util/drirc
index 8ece875e34f..0cd04bcadd4 100644
--- a/src/util/drirc
+++ b/src/util/drirc
@@ -100,6 +100,14 @@ TODO: document the other workarounds.
             <option name="allow_glsl_builtin_variable_redeclaration" value="true" />
         </application>
 
+        <application name="RAGE (64-bit)" executable="Rage64.exe">
+            <option name="allow_glsl_builtin_variable_redeclaration" value="true" />
+        </application>
+
+        <application name="RAGE (32-bit)" executable="Rage.exe">
+            <option name="allow_glsl_builtin_variable_redeclaration" value="true" />
+        </application>
+
         <application name="Second Life" executable="do-not-directly-run-secondlife-bin">
             <option name="allow_glsl_extension_directive_midshader" value="true" />
         </application>
@@ -120,6 +128,10 @@ TODO: document the other workarounds.
             <option name="allow_glsl_extension_directive_midshader" value="true" />
         </application>
 
+        <application name="Metro 2033 Redux / Metro Last Night Redux" executable="metro">
+            <option name="allow_glsl_extension_directive_midshader" value="true" />
+        </application>
+
         <application name="Worms W.M.D" executable="Worms W.M.Dx64">
             <option name="allow_higher_compat_version" value="true" />
         </application>
@@ -185,6 +197,14 @@ TODO: document the other workarounds.
             <option name="force_glsl_extensions_warn" value="true" />
         </application>
 
+        <application name="ARMA 3" executable="arma3.x86_64">
+            <option name="glsl_correct_derivatives_after_discard" value="true"/>
+        </application>
+
+        <application name="Wolfenstein The Old Blood" executable="WolfOldBlood_x64.exe">
+            <option name="force_compat_profile" value="true" />
+        </application>
+
         <!-- The GL thread whitelist is below, workarounds are above.
              Keep it that way. -->
 
diff --git a/src/util/meson.build b/src/util/meson.build
index 6386d945a22..795b5fdf241 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -108,7 +108,7 @@ libmesa_util = static_library(
   'mesa_util',
   [files_mesa_util, format_srgb],
   include_directories : inc_common,
-  dependencies : [dep_zlib, dep_clock, dep_thread, dep_atomic],
+  dependencies : [dep_zlib, dep_clock, dep_thread, dep_atomic, dep_m],
   c_args : [c_msvc_compat_args, c_vis_args],
   build_by_default : false
 )
@@ -117,6 +117,7 @@ libxmlconfig = static_library(
   'xmlconfig',
   files_xmlconfig,
   include_directories : inc_common,
+  link_with : libmesa_util,
   dependencies : [dep_expat, dep_m],
   c_args : [
     c_msvc_compat_args, c_vis_args,
diff --git a/src/util/ralloc.c b/src/util/ralloc.c
index 5d77f75ee85..fc35661996d 100644
--- a/src/util/ralloc.c
+++ b/src/util/ralloc.c
@@ -61,7 +61,7 @@ struct
 #endif
    ralloc_header
 {
-#ifdef DEBUG
+#ifndef NDEBUG
    /* A canary value used to determine whether a pointer is ralloc'd. */
    unsigned canary;
 #endif
@@ -88,9 +88,7 @@ get_header(const void *ptr)
 {
    ralloc_header *info = (ralloc_header *) (((char *) ptr) -
 					    sizeof(ralloc_header));
-#ifdef DEBUG
    assert(info->canary == CANARY);
-#endif
    return info;
 }
 
@@ -140,7 +138,7 @@ ralloc_size(const void *ctx, size_t size)
 
    add_child(parent, info);
 
-#ifdef DEBUG
+#ifndef NDEBUG
    info->canary = CANARY;
 #endif
 
@@ -554,11 +552,19 @@ ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt,
  */
 
 #define MIN_LINEAR_BUFSIZE 2048
-#define SUBALLOC_ALIGNMENT sizeof(uintptr_t)
+#define SUBALLOC_ALIGNMENT 8
 #define LMAGIC 0x87b9c7d3
 
-struct linear_header {
-#ifdef DEBUG
+struct
+#ifdef _MSC_VER
+ __declspec(align(8))
+#elif defined(__LP64__)
+ __attribute__((aligned(16)))
+#else
+ __attribute__((aligned(8)))
+#endif
+   linear_header {
+#ifndef NDEBUG
    unsigned magic;   /* for debugging */
 #endif
    unsigned offset;  /* points to the first unused byte in the buffer */
@@ -608,7 +614,7 @@ create_linear_node(void *ralloc_ctx, unsigned min_size)
    if (unlikely(!node))
       return NULL;
 
-#ifdef DEBUG
+#ifndef NDEBUG
    node->magic = LMAGIC;
 #endif
    node->offset = 0;
@@ -628,9 +634,7 @@ linear_alloc_child(void *parent, unsigned size)
    linear_size_chunk *ptr;
    unsigned full_size;
 
-#ifdef DEBUG
    assert(first->magic == LMAGIC);
-#endif
    assert(!latest->next);
 
    size = ALIGN_POT(size, SUBALLOC_ALIGNMENT);
@@ -651,6 +655,8 @@ linear_alloc_child(void *parent, unsigned size)
    ptr = (linear_size_chunk *)((char*)&latest[1] + latest->offset);
    ptr->size = size;
    latest->offset += full_size;
+
+   assert((uintptr_t)&ptr[1] % SUBALLOC_ALIGNMENT == 0);
    return &ptr[1];
 }
 
@@ -702,9 +708,7 @@ linear_free_parent(void *ptr)
       return;
 
    node = LINEAR_PARENT_TO_HEADER(ptr);
-#ifdef DEBUG
    assert(node->magic == LMAGIC);
-#endif
 
    while (node) {
       void *ptr = node;
@@ -723,9 +727,7 @@ ralloc_steal_linear_parent(void *new_ralloc_ctx, void *ptr)
       return;
 
    node = LINEAR_PARENT_TO_HEADER(ptr);
-#ifdef DEBUG
    assert(node->magic == LMAGIC);
-#endif
 
    while (node) {
       ralloc_steal(new_ralloc_ctx, node);
@@ -738,9 +740,7 @@ void *
 ralloc_parent_of_linear_parent(void *ptr)
 {
    linear_header *node = LINEAR_PARENT_TO_HEADER(ptr);
-#ifdef DEBUG
    assert(node->magic == LMAGIC);
-#endif
    return node->ralloc_parent;
 }
 
diff --git a/src/util/u_process.c b/src/util/u_process.c
index 5bf3f56db4e..5e5927678d8 100644
--- a/src/util/u_process.c
+++ b/src/util/u_process.c
@@ -40,11 +40,18 @@ extern char *program_invocation_name, *program_invocation_short_name;
 static const char *
 __getProgramName()
 {
-    char * arg = strrchr(program_invocation_name, '/');
-    if (arg)
-        return arg+1;
-    else
-        return program_invocation_name;
+   char * arg = strrchr(program_invocation_name, '/');
+   if (arg)
+      return arg+1;
+
+   /* If there was no '/' at all we likely have a windows like path from
+    * a wine application.
+    */
+   arg = strrchr(program_invocation_name, '\\');
+   if (arg)
+      return arg+1;
+
+   return program_invocation_name;
 }
 #    define GET_PROGRAM_NAME() __getProgramName()
 #elif defined(__CYGWIN__)
diff --git a/src/util/u_string.h b/src/util/u_string.h
index ce454308d7b..e4081466459 100644
--- a/src/util/u_string.h
+++ b/src/util/u_string.h
@@ -81,6 +81,7 @@ util_vsnprintf(char *str, size_t size, const char *format, va_list ap)
    if (ret < 0) {
       ret = _vscprintf(format, ap_copy);
    }
+   va_end(ap_copy);
    return ret;
 }
 
@@ -119,14 +120,14 @@ util_vasprintf(char **ret, const char *format, va_list ap)
 
    /* Compute length of output string first */
    va_copy(ap_copy, ap);
-   int r = util_vsnprintf(NULL, 0, format, ap);
+   int r = util_vsnprintf(NULL, 0, format, ap_copy);
    va_end(ap_copy);
 
    if (r < 0)
       return -1;
 
    *ret = (char *) malloc(r + 1);
-   if (!ret)
+   if (!*ret)
       return -1;
 
    /* Print to buffer */
diff --git a/src/util/xmlpool/meson.build b/src/util/xmlpool/meson.build
index 97693fac8c4..69625fc2c12 100644
--- a/src/util/xmlpool/meson.build
+++ b/src/util/xmlpool/meson.build
@@ -22,7 +22,10 @@ xmlpool_options_h = custom_target(
   'xmlpool_options.h',
   input : ['gen_xmlpool.py', 't_options.h'],
   output : 'options.h',
-  command : [prog_python2, '@INPUT@', meson.current_source_dir()],
+  command : [
+    prog_python2, '@INPUT@', meson.current_source_dir(),
+    'ca', 'es', 'de', 'nl', 'sv', 'fr',
+  ],
   capture : true,
   depend_files : files('ca.po', 'es.po', 'de.po', 'nl.po', 'sv.po', 'fr.po'),
 )
diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h
index f0537e965b8..7d21750f6b8 100644
--- a/src/util/xmlpool/t_options.h
+++ b/src/util/xmlpool/t_options.h
@@ -150,6 +150,11 @@ DRI_CONF_OPT_BEGIN_B(allow_glsl_cross_stage_interpolation_mismatch, def) \
         DRI_CONF_DESC(en,gettext("Allow interpolation qualifier mismatch across shader stages")) \
 DRI_CONF_OPT_END
 
+#define DRI_CONF_FORCE_COMPAT_PROFILE(def) \
+DRI_CONF_OPT_BEGIN_B(force_compat_profile, def) \
+        DRI_CONF_DESC(en,gettext("Force an OpenGL compatibility context")) \
+DRI_CONF_OPT_END
+
 /**
  * \brief Image quality-related options
  */
diff --git a/src/vulkan/Android.mk b/src/vulkan/Android.mk
index 6253f1c3be9..730d036d18c 100644
--- a/src/vulkan/Android.mk
+++ b/src/vulkan/Android.mk
@@ -32,12 +32,15 @@ include $(LOCAL_PATH)/Makefile.sources
 include $(CLEAR_VARS)
 LOCAL_MODULE := libmesa_vulkan_util
 LOCAL_MODULE_CLASS := STATIC_LIBRARIES
-
+LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers
 intermediates := $(call local-generated-sources-dir)
 
 LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/include/vulkan \
-	$(MESA_TOP)/src/vulkan/util
+	$(MESA_TOP)/src/vulkan/util \
+	frameworks/native/libs/nativebase/include \
+	frameworks/native/libs/nativewindow/include \
+	frameworks/native/libs/arect/include
 
 LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, \
 	$(VULKAN_UTIL_GENERATED_FILES))
diff --git a/src/vulkan/registry/vk.xml b/src/vulkan/registry/vk.xml
index 4419c6fbf96..9cd1c72de64 100644
--- a/src/vulkan/registry/vk.xml
+++ b/src/vulkan/registry/vk.xml
@@ -72,6 +72,7 @@ server.
         <tag name="KDAB"        author="KDAB"                          contact="Sean Harmer @seanharmer"/>
         <tag name="ANDROID"     author="Google, Inc."                  contact="Jesse Hall @critsec"/>
         <tag name="CHROMIUM"    author="Google, Inc."                  contact="Jesse Hall @critsec"/>
+        <tag name="FUCHSIA"     author="Google, Inc."                  contact="Craig Stout @cdotstout, Jesse Hall @critsec"/>
         <tag name="GOOGLE"      author="Google, Inc."                  contact="Jesse Hall @critsec"/>
         <tag name="QCOM"        author="Qualcomm Technologies, Inc."   contact="Maurice Ribble @mribble"/>
         <tag name="LUNARG"      author="LunarG, Inc."                  contact="Karen Ghavam @karenghavam-lunarg"/>
@@ -146,7 +147,7 @@ server.
         <type category="define">// Vulkan 1.1 version number
 #define <name>VK_API_VERSION_1_1</name> <type>VK_MAKE_VERSION</type>(1, 1, 0)// Patch version should always be set to 0</type>
         <type category="define">// Version of this file
-#define <name>VK_HEADER_VERSION</name> 80</type>
+#define <name>VK_HEADER_VERSION</name> 84</type>
 
         <type category="define">
 #define <name>VK_DEFINE_HANDLE</name>(object) typedef struct object##_T* object;</type>
@@ -588,12 +589,12 @@ server.
             <member><type>uint32_t</type>        <name>depth</name></member>
         </type>
         <type category="struct" name="VkViewport">
-            <member><type>float</type>          <name>x</name></member>
-            <member><type>float</type>          <name>y</name></member>
-            <member><type>float</type>          <name>width</name></member>
-            <member><type>float</type>          <name>height</name></member>
-            <member><type>float</type>          <name>minDepth</name></member>
-            <member><type>float</type>          <name>maxDepth</name></member>
+            <member noautovalidity="true"><type>float</type> <name>x</name></member>
+            <member noautovalidity="true"><type>float</type> <name>y</name></member>
+            <member noautovalidity="true"><type>float</type> <name>width</name></member>
+            <member noautovalidity="true"><type>float</type> <name>height</name></member>
+            <member><type>float</type>                       <name>minDepth</name></member>
+            <member><type>float</type>                       <name>maxDepth</name></member>
         </type>
         <type category="struct" name="VkRect2D">
             <member><type>VkOffset2D</type>     <name>offset</name></member>
@@ -642,11 +643,11 @@ server.
         </type>
         <type category="struct" name="VkAllocationCallbacks">
             <member optional="true"><type>void</type>*           <name>pUserData</name></member>
-            <member><type>PFN_vkAllocationFunction</type>   <name>pfnAllocation</name></member>
-            <member><type>PFN_vkReallocationFunction</type> <name>pfnReallocation</name></member>
-            <member><type>PFN_vkFreeFunction</type>    <name>pfnFree</name></member>
-            <member optional="true"><type>PFN_vkInternalAllocationNotification</type> <name>pfnInternalAllocation</name></member>
-            <member optional="true"><type>PFN_vkInternalFreeNotification</type> <name>pfnInternalFree</name></member>
+            <member noautovalidity="true"><type>PFN_vkAllocationFunction</type>   <name>pfnAllocation</name></member>
+            <member noautovalidity="true"><type>PFN_vkReallocationFunction</type> <name>pfnReallocation</name></member>
+            <member noautovalidity="true"><type>PFN_vkFreeFunction</type>    <name>pfnFree</name></member>
+            <member optional="true" noautovalidity="true"><type>PFN_vkInternalAllocationNotification</type> <name>pfnInternalAllocation</name></member>
+            <member optional="true" noautovalidity="true"><type>PFN_vkInternalFreeNotification</type> <name>pfnInternalFree</name></member>
         </type>
         <type category="struct" name="VkDeviceQueueCreateInfo">
             <member values="VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO"><type>VkStructureType</type> <name>sType</name></member>
@@ -741,7 +742,7 @@ server.
             <member><type>VkDeviceSize</type>           <name>maxResourceSize</name><comment>max size (in bytes) of this resource type</comment></member>
         </type>
         <type category="struct" name="VkDescriptorBufferInfo">
-            <member><type>VkBuffer</type>               <name>buffer</name><comment>Buffer used for this descriptor slot when the descriptor is UNIFORM_BUFFER[_DYNAMIC] or STORAGE_BUFFER[_DYNAMIC]. VK_NULL_HANDLE otherwise.</comment></member>
+            <member><type>VkBuffer</type>               <name>buffer</name><comment>Buffer used for this descriptor slot.</comment></member>
             <member><type>VkDeviceSize</type>           <name>offset</name><comment>Base offset from buffer start in bytes to update in the descriptor set.</comment></member>
             <member><type>VkDeviceSize</type>           <name>range</name><comment>Size in bytes of the buffer resource for this descriptor update.</comment></member>
         </type>
@@ -874,9 +875,9 @@ server.
             <member><type>VkImageSubresourceRange</type> <name>subresourceRange</name></member>
         </type>
         <type category="struct" name="VkBufferCopy">
-            <member><type>VkDeviceSize</type>           <name>srcOffset</name><comment>Specified in bytes</comment></member>
-            <member><type>VkDeviceSize</type>           <name>dstOffset</name><comment>Specified in bytes</comment></member>
-            <member><type>VkDeviceSize</type>           <name>size</name><comment>Specified in bytes</comment></member>
+            <member><type>VkDeviceSize</type>                       <name>srcOffset</name><comment>Specified in bytes</comment></member>
+            <member><type>VkDeviceSize</type>                       <name>dstOffset</name><comment>Specified in bytes</comment></member>
+            <member noautovalidity="true"><type>VkDeviceSize</type> <name>size</name><comment>Specified in bytes</comment></member>
         </type>
         <type category="struct" name="VkSparseMemoryBind">
             <member><type>VkDeviceSize</type>           <name>resourceOffset</name><comment>Specified in bytes</comment></member>
@@ -991,13 +992,13 @@ server.
             <member len="descriptorSetCount">const <type>VkDescriptorSetLayout</type>* <name>pSetLayouts</name></member>
         </type>
         <type category="struct" name="VkSpecializationMapEntry">
-            <member><type>uint32_t</type>               <name>constantID</name><comment>The SpecConstant ID specified in the BIL</comment></member>
-            <member><type>uint32_t</type>               <name>offset</name><comment>Offset of the value in the data block</comment></member>
-            <member><type>size_t</type>                 <name>size</name><comment>Size in bytes of the SpecConstant</comment></member>
+            <member><type>uint32_t</type>                     <name>constantID</name><comment>The SpecConstant ID specified in the BIL</comment></member>
+            <member><type>uint32_t</type>                     <name>offset</name><comment>Offset of the value in the data block</comment></member>
+            <member noautovalidity="true"><type>size_t</type> <name>size</name><comment>Size in bytes of the SpecConstant</comment></member>
         </type>
         <type category="struct" name="VkSpecializationInfo">
             <member optional="true"><type>uint32_t</type>               <name>mapEntryCount</name><comment>Number of entries in the map</comment></member>
-            <member len="mapEntryCount" noautovalidity="true">const <type>VkSpecializationMapEntry</type>* <name>pMapEntries</name><comment>Array of map entries</comment></member>
+            <member len="mapEntryCount">const <type>VkSpecializationMapEntry</type>* <name>pMapEntries</name><comment>Array of map entries</comment></member>
             <member optional="true"><type>size_t</type>                 <name>dataSize</name><comment>Size in bytes of pData</comment></member>
             <member len="dataSize">const <type>void</type>*            <name>pData</name><comment>Pointer to SpecConstant data</comment></member>
         </type>
@@ -1518,22 +1519,22 @@ server.
             <member><type>uint32_t</type>               <name>layers</name></member>
         </type>
         <type category="struct" name="VkDrawIndirectCommand">
-            <member><type>uint32_t</type>               <name>vertexCount</name></member>
-            <member><type>uint32_t</type>               <name>instanceCount</name></member>
-            <member><type>uint32_t</type>               <name>firstVertex</name></member>
-            <member><type>uint32_t</type>               <name>firstInstance</name></member>
+            <member><type>uint32_t</type>                       <name>vertexCount</name></member>
+            <member><type>uint32_t</type>                       <name>instanceCount</name></member>
+            <member><type>uint32_t</type>                       <name>firstVertex</name></member>
+            <member noautovalidity="true"><type>uint32_t</type> <name>firstInstance</name></member>
         </type>
         <type category="struct" name="VkDrawIndexedIndirectCommand">
-            <member><type>uint32_t</type>               <name>indexCount</name></member>
-            <member><type>uint32_t</type>               <name>instanceCount</name></member>
-            <member><type>uint32_t</type>               <name>firstIndex</name></member>
-            <member><type>int32_t</type>                <name>vertexOffset</name></member>
-            <member><type>uint32_t</type>               <name>firstInstance</name></member>
+            <member><type>uint32_t</type>                       <name>indexCount</name></member>
+            <member><type>uint32_t</type>                       <name>instanceCount</name></member>
+            <member><type>uint32_t</type>                       <name>firstIndex</name></member>
+            <member><type>int32_t</type>                        <name>vertexOffset</name></member>
+            <member noautovalidity="true"><type>uint32_t</type> <name>firstInstance</name></member>
         </type>
         <type category="struct" name="VkDispatchIndirectCommand">
-            <member><type>uint32_t</type>               <name>x</name></member>
-            <member><type>uint32_t</type>               <name>y</name></member>
-            <member><type>uint32_t</type>               <name>z</name></member>
+            <member noautovalidity="true"><type>uint32_t</type> <name>x</name></member>
+            <member noautovalidity="true"><type>uint32_t</type> <name>y</name></member>
+            <member noautovalidity="true"><type>uint32_t</type> <name>z</name></member>
         </type>
         <type category="struct" name="VkSubmitInfo">
             <member values="VK_STRUCTURE_TYPE_SUBMIT_INFO"><type>VkStructureType</type> <name>sType</name></member>
@@ -1562,7 +1563,7 @@ server.
         </type>
         <type category="struct" name="VkDisplayModeParametersKHR">
             <member><type>VkExtent2D</type>                       <name>visibleRegion</name><comment>Visible scanout region.</comment></member>
-            <member><type>uint32_t</type>                         <name>refreshRate</name><comment>Number of times per second the display is updated.</comment></member>
+            <member noautovalidity="true"><type>uint32_t</type>   <name>refreshRate</name><comment>Number of times per second the display is updated.</comment></member>
         </type>
         <type category="struct" name="VkDisplayModePropertiesKHR" returnedonly="true">
             <member><type>VkDisplayModeKHR</type>                 <name>displayMode</name><comment>Handle of this display mode.</comment></member>
@@ -1708,7 +1709,7 @@ server.
             <member values="VK_STRUCTURE_TYPE_VALIDATION_FLAGS_EXT"><type>VkStructureType</type>                  <name>sType</name><comment>Must be VK_STRUCTURE_TYPE_VALIDATION_FLAGS_EXT</comment></member>
             <member>const <type>void</type>*                      <name>pNext</name></member>
             <member><type>uint32_t</type>                         <name>disabledValidationCheckCount</name><comment>Number of validation checks to disable</comment></member>
-            <member len="disabledValidationCheckCount"><type>VkValidationCheckEXT</type>* <name>pDisabledValidationChecks</name><comment>Validation checks to disable</comment></member>
+            <member len="disabledValidationCheckCount">const <type>VkValidationCheckEXT</type>* <name>pDisabledValidationChecks</name><comment>Validation checks to disable</comment></member>
         </type>
         <type category="struct" name="VkPipelineRasterizationStateRasterizationOrderAMD" structextends="VkPipelineRasterizationStateCreateInfo">
             <member values="VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD"><type>VkStructureType</type> <name>sType</name></member>
@@ -1971,7 +1972,7 @@ server.
         </type>
         <type category="struct" name="VkRectLayerKHR">
             <member><type>VkOffset2D</type>                       <name>offset</name><comment>upper-left corner of a rectangle that has not changed, in pixels of a presentation images</comment></member>
-            <member><type>VkExtent2D</type>                       <name>extent</name><comment>Dimensions of a rectangle that has not changed, in pixels of a presentation images</comment></member>
+            <member noautovalidity="true"><type>VkExtent2D</type> <name>extent</name><comment>Dimensions of a rectangle that has not changed, in pixels of a presentation images</comment></member>
             <member><type>uint32_t</type>                         <name>layer</name><comment>Layer of a swapchain's image(s), for stereoscopic-3D images</comment></member>
         </type>
         <type category="struct" name="VkPhysicalDeviceVariablePointerFeatures" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
@@ -2440,10 +2441,10 @@ server.
             <member><type>float</type>          <name>maxContentLightLevel</name><comment>Content maximum luminance</comment></member>
             <member><type>float</type>          <name>maxFrameAverageLightLevel</name></member>
         </type>
-        <type category="struct" name="VkRefreshCycleDurationGOOGLE">
+        <type category="struct" name="VkRefreshCycleDurationGOOGLE" returnedonly="true">
             <member><type>uint64_t</type>                         <name>refreshDuration</name><comment>Number of nanoseconds from the start of one refresh cycle to the next</comment></member>
         </type>
-        <type category="struct" name="VkPastPresentationTimingGOOGLE">
+        <type category="struct" name="VkPastPresentationTimingGOOGLE" returnedonly="true">
             <member><type>uint32_t</type>                         <name>presentID</name><comment>Application-provided identifier, previously given to vkQueuePresentKHR</comment></member>
             <member><type>uint64_t</type>                         <name>desiredPresentTime</name><comment>Earliest time an image should have been presented, previously given to vkQueuePresentKHR</comment></member>
             <member><type>uint64_t</type>                         <name>actualPresentTime</name><comment>Time the image was actually displayed</comment></member>
@@ -2795,7 +2796,7 @@ server.
             <member>const <type>void</type>*            <name>pNext</name></member>
             <member><type>VkSamplerReductionModeEXT</type> <name>reductionMode</name></member>
         </type>
-        <type category="struct" name="VkPhysicalDeviceBlendOperationAdvancedFeaturesEXT" structextends="VkPhysicalDeviceFeatures2">
+        <type category="struct" name="VkPhysicalDeviceBlendOperationAdvancedFeaturesEXT" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
             <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_FEATURES_EXT"><type>VkStructureType</type> <name>sType</name></member>
             <member noautovalidity="true"><type>void</type>*                            <name>pNext</name></member>
             <member><type>VkBool32</type>                         <name>advancedBlendCoherentOperations</name></member>
@@ -2817,6 +2818,32 @@ server.
             <member><type>VkBool32</type>               <name>dstPremultiplied</name></member>
             <member><type>VkBlendOverlapEXT</type>      <name>blendOverlap</name></member>
         </type>
+        <type category="struct" name="VkPhysicalDeviceInlineUniformBlockFeaturesEXT" returnedonly="true" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member><type>void</type>*                  <name>pNext</name></member>
+            <member><type>VkBool32</type>               <name>inlineUniformBlock</name></member>
+            <member><type>VkBool32</type>               <name>descriptorBindingInlineUniformBlockUpdateAfterBind</name></member>
+        </type>
+        <type category="struct" name="VkPhysicalDeviceInlineUniformBlockPropertiesEXT" returnedonly="true" structextends="VkPhysicalDeviceProperties2">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member><type>void</type>*                  <name>pNext</name></member>
+            <member><type>uint32_t</type>               <name>maxInlineUniformBlockSize</name></member>
+            <member><type>uint32_t</type>               <name>maxPerStageDescriptorInlineUniformBlocks</name></member>
+            <member><type>uint32_t</type>               <name>maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks</name></member>
+            <member><type>uint32_t</type>               <name>maxDescriptorSetInlineUniformBlocks</name></member>
+            <member><type>uint32_t</type>               <name>maxDescriptorSetUpdateAfterBindInlineUniformBlocks</name></member>
+        </type>
+        <type category="struct" name="VkWriteDescriptorSetInlineUniformBlockEXT">
+            <member values="VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member>const <type>void</type>*            <name>pNext</name></member>
+            <member><type>uint32_t</type>               <name>dataSize</name></member>
+            <member len="dataSize">const <type>void</type>* <name>pData</name></member>
+        </type>
+        <type category="struct" name="VkDescriptorPoolInlineUniformBlockCreateInfoEXT" structextends="VkDescriptorPoolCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member>const <type>void</type>*            <name>pNext</name></member>
+            <member><type>uint32_t</type>               <name>maxInlineUniformBlockBindings</name></member>
+        </type>
         <type category="struct" name="VkPipelineCoverageModulationStateCreateInfoNV" structextends="VkPipelineMultisampleStateCreateInfo">
             <member values="VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_MODULATION_STATE_CREATE_INFO_NV"><type>VkStructureType</type> <name>sType</name></member>
             <member>const <type>void</type>*                                                                      <name>pNext</name></member>
@@ -2857,7 +2884,7 @@ server.
             <member><type>VkBool32</type>         <name>supported</name></member>
         </type>
         <type category="struct" name="VkDescriptorSetLayoutSupportKHR"                         alias="VkDescriptorSetLayoutSupport"/>
-        <type category="struct" name="VkPhysicalDeviceShaderDrawParameterFeatures" structextends="VkPhysicalDeviceFeatures2">
+        <type category="struct" name="VkPhysicalDeviceShaderDrawParameterFeatures" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
             <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES"><type>VkStructureType</type> <name>sType</name></member>
             <member noautovalidity="true"><type>void</type>*                            <name>pNext</name></member>
             <member><type>VkBool32</type>                         <name>shaderDrawParameters</name></member>
@@ -3198,6 +3225,39 @@ server.
             <member><type>VkBool32</type>                           <name>conditionalRendering</name></member>
             <member><type>VkBool32</type>                           <name>inheritedConditionalRendering</name></member>
         </type>
+        <type category="struct" name="VkPhysicalDeviceVulkanMemoryModelFeaturesKHR" returnedonly="true" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR"><type>VkStructureType</type> <name>sType</name></member>
+            <member noautovalidity="true"><type>void</type>*      <name>pNext</name></member>
+            <member><type>VkBool32</type>                         <name>vulkanMemoryModel</name></member>
+            <member><type>VkBool32</type>                         <name>vulkanMemoryModelDeviceScope</name></member>
+        </type>
+        <type category="struct" name="VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member noautovalidity="true"><type>void</type>*        <name>pNext</name></member>
+            <member><type>VkBool32</type>                           <name>vertexAttributeInstanceRateDivisor</name></member>
+            <member><type>VkBool32</type>                           <name>vertexAttributeInstanceRateZeroDivisor</name></member>
+        </type>
+        <type category="struct" name="VkQueueFamilyCheckpointPropertiesNV" structextends="VkQueueFamilyProperties2" returnedonly="true">
+            <member values="VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_NV"><type>VkStructureType</type> <name>sType</name></member>
+            <member><type>void</type>*           <name>pNext</name></member>
+            <member><type>VkPipelineStageFlags</type> <name>checkpointExecutionStageMask</name></member>
+        </type>
+        <type category="struct" name="VkCheckpointDataNV" returnedonly="true">
+            <member values="VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV"><type>VkStructureType</type> <name>sType</name></member>
+            <member><type>void</type>*                  <name>pNext</name></member>
+            <member><type>VkPipelineStageFlagBits</type>   <name>stage</name></member>
+            <member noautovalidity="true"><type>void</type>* <name>pCheckpointMarker</name></member>
+        </type>
+        <type category="struct" name="VkImageViewASTCDecodeModeEXT" structextends="VkImageViewCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_IMAGE_VIEW_ASTC_DECODE_MODE_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member>const <type>void</type>*                      <name>pNext</name></member>
+            <member><type>VkFormat</type>                         <name>decodeMode</name></member>
+        </type>
+        <type category="struct" name="VkPhysicalDeviceASTCDecodeFeaturesEXT" structextends="VkPhysicalDeviceFeatures2,VkDeviceCreateInfo">
+            <member values="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ASTC_DECODE_FEATURES_EXT"><type>VkStructureType</type> <name>sType</name></member>
+            <member noautovalidity="true"><type>void</type>*      <name>pNext</name></member>
+            <member><type>VkBool32</type>                         <name>decodeModeSharedExponent</name></member>
+        </type>
     </types>
 
     <comment>Vulkan enumerant (token) definitions</comment>
@@ -3725,7 +3785,7 @@ server.
         <enum value="-10"   name="VK_ERROR_TOO_MANY_OBJECTS" comment="Too many objects of the type have already been created"/>
         <enum value="-11"   name="VK_ERROR_FORMAT_NOT_SUPPORTED" comment="Requested format is not supported on this device"/>
         <enum value="-12"   name="VK_ERROR_FRAGMENTED_POOL" comment="A requested pool allocation has failed due to fragmentation of the pool's memory"/>
-            <unused start="-12"/>
+            <unused start="-13" comment="This is the next unused available error code (negative value)"/>
     </enums>
     <enums name="VkDynamicState" type="enum">
         <enum value="0"     name="VK_DYNAMIC_STATE_VIEWPORT"/>
@@ -3982,6 +4042,7 @@ server.
     </enums>
     <enums name="VkColorSpaceKHR" type="enum">
         <enum value="0"     name="VK_COLOR_SPACE_SRGB_NONLINEAR_KHR"/>
+        <enum               name="VK_COLORSPACE_SRGB_NONLINEAR_KHR" alias="VK_COLOR_SPACE_SRGB_NONLINEAR_KHR" comment="Backwards-compatible alias containing a typo"/>
     </enums>
     <enums name="VkDisplayPlaneAlphaFlagBitsKHR" type="bitmask">
         <enum bitpos="0"    name="VK_DISPLAY_PLANE_ALPHA_OPAQUE_BIT_KHR"/>
@@ -4043,11 +4104,13 @@ server.
         <enum value="26"    name="VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT"/>
         <enum value="27"    name="VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT"/>
         <enum value="28"    name="VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT"/>
+        <enum               name="VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT" alias="VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT" comment="Backwards-compatible alias containing a typo"/>
         <enum value="29"    name="VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_KHR_EXT"/>
         <enum value="30"    name="VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_MODE_KHR_EXT"/>
         <enum value="31"    name="VK_DEBUG_REPORT_OBJECT_TYPE_OBJECT_TABLE_NVX_EXT"/>
         <enum value="32"    name="VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT"/>
         <enum value="33"    name="VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT"/>
+        <enum               name="VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT" alias="VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT" comment="Backwards-compatible alias containing a typo"/>
     </enums>
     <enums name="VkRasterizationOrderAMD" type="enum">
         <enum value="0"     name="VK_RASTERIZATION_ORDER_STRICT_AMD"/>
@@ -6239,6 +6302,17 @@ server.
             <param><type>uint32_t</type> <name>maxDrawCount</name></param>
             <param><type>uint32_t</type> <name>stride</name></param>
         </command>
+        <command queues="graphics,compute,transfer" renderpass="both" cmdbufferlevel="primary,secondary">
+            <proto><type>void</type> <name>vkCmdSetCheckpointNV</name></proto>
+            <param><type>VkCommandBuffer</type> <name>commandBuffer</name></param>
+            <param noautovalidity="true">const <type>void</type>* <name>pCheckpointMarker</name></param>
+        </command>
+        <command>
+            <proto><type>void</type> <name>vkGetQueueCheckpointDataNV</name></proto>
+            <param><type>VkQueue</type> <name>queue</name></param>
+            <param optional="false,true"><type>uint32_t</type>* <name>pCheckpointDataCount</name></param>
+            <param optional="true" len="pCheckpointDataCount"><type>VkCheckpointDataNV</type>* <name>pCheckpointData</name></param>
+        </command>
     </commands>
 
     <feature api="vulkan" name="VK_VERSION_1_0" number="1.0" comment="Vulkan core API interface definitions">
@@ -6803,7 +6877,6 @@ server.
                 <enum value="&quot;VK_KHR_surface&quot;"                        name="VK_KHR_SURFACE_EXTENSION_NAME"/>
                 <enum offset="0" extends="VkResult" dir="-"                     name="VK_ERROR_SURFACE_LOST_KHR"/>
                 <enum offset="1" extends="VkResult" dir="-"                     name="VK_ERROR_NATIVE_WINDOW_IN_USE_KHR"/>
-                <enum value="VK_COLOR_SPACE_SRGB_NONLINEAR_KHR"                 name="VK_COLORSPACE_SRGB_NONLINEAR_KHR"/>
                 <enum offset="0" extends="VkObjectType"                         name="VK_OBJECT_TYPE_SURFACE_KHR"                  comment="VkSurfaceKHR"/>
                 <command name="vkDestroySurfaceKHR"/>
                 <command name="vkGetPhysicalDeviceSurfaceSupportKHR"/>
@@ -6971,10 +7044,9 @@ server.
                 <enum value="9"                                                 name="VK_EXT_DEBUG_REPORT_SPEC_VERSION"/>
                 <enum value="&quot;VK_EXT_debug_report&quot;"                   name="VK_EXT_DEBUG_REPORT_EXTENSION_NAME"/>
                 <enum offset="0" extends="VkStructureType"                      name="VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT"/>
+                <enum alias="VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT" extends="VkStructureType" name="VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT" comment="Backwards-compatible alias containing a typo"/>
                 <enum offset="1" extends="VkResult" dir="-"                     name="VK_ERROR_VALIDATION_FAILED_EXT"/>
-                <enum value="VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT" name="VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT"/>
                 <enum offset="0" extends="VkObjectType"                         name="VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT"          comment="VkDebugReportCallbackEXT"/>
-                <enum value="VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT"         name="VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT"/>
                 <type name="VkDebugReportObjectTypeEXT"/>
                 <type name="VkDebugReportCallbackCreateInfoEXT"/>
                 <command name="vkCreateDebugReportCallbackEXT"/>
@@ -6987,7 +7059,7 @@ server.
                 <enum extends="VkDebugReportObjectTypeEXT" extnumber="86"  offset="0"  name="VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT"/>
             </require>
         </extension>
-        <extension name="VK_NV_glsl_shader" number="13" type="device" author="NV" contact="Piers Daniell @pdaniell-nv" supported="vulkan">
+        <extension name="VK_NV_glsl_shader" number="13" type="device" author="NV" contact="Piers Daniell @pdaniell-nv" supported="vulkan" deprecatedby="">
             <require>
                 <enum value="1"                                                 name="VK_NV_GLSL_SHADER_SPEC_VERSION"/>
                 <enum value="&quot;VK_NV_glsl_shader&quot;"                     name="VK_NV_GLSL_SHADER_EXTENSION_NAME"/>
@@ -7042,13 +7114,13 @@ server.
                 <enum value="&quot;VK_AMD_extension_20&quot;"                   name="VK_AMD_EXTENSION_20_EXTENSION_NAME"/>
             </require>
         </extension>
-        <extension name="VK_AMD_shader_trinary_minmax" number="21" type="device" author="AMD" contact="Qun Lin, AMD @linqun" supported="vulkan">
+        <extension name="VK_AMD_shader_trinary_minmax" number="21" type="device" author="AMD" contact="Qun Lin @linqun" supported="vulkan">
             <require>
                 <enum value="1"                                                 name="VK_AMD_SHADER_TRINARY_MINMAX_SPEC_VERSION"/>
                 <enum value="&quot;VK_AMD_shader_trinary_minmax&quot;"          name="VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME"/>
             </require>
         </extension>
-        <extension name="VK_AMD_shader_explicit_vertex_parameter" number="22" type="device" author="AMD" contact="Qun Lin, AMD @linqun" supported="vulkan">
+        <extension name="VK_AMD_shader_explicit_vertex_parameter" number="22" type="device" author="AMD" contact="Qun Lin @linqun" supported="vulkan">
             <require>
                 <enum value="1"                                                 name="VK_AMD_SHADER_EXPLICIT_VERTEX_PARAMETER_SPEC_VERSION"/>
                 <enum value="&quot;VK_AMD_shader_explicit_vertex_parameter&quot;" name="VK_AMD_SHADER_EXPLICIT_VERTEX_PARAMETER_EXTENSION_NAME"/>
@@ -7252,6 +7324,7 @@ server.
             <require>
                 <enum value="0"                                                 name="VK_NVX_EXTENSION_51_SPEC_VERSION"/>
                 <enum value="&quot;VK_NVX_extension_51&quot;"                   name="VK_NVX_EXTENSION_51_EXTENSION_NAME"/>
+                <enum bitpos="13" extends="VkImageCreateFlagBits"               name="VK_IMAGE_CREATE_RESERVED_13_BIT_NV"/>
             </require>
         </extension>
         <extension name="VK_NVX_extension_52" number="52" author="NVX" contact="James Jones @cubanismo" supported="disabled">
@@ -7468,10 +7541,14 @@ server.
                 <enum value="&quot;VK_ARM_extension_01&quot;"                   name="VK_ARM_EXTENSION_01_EXTENSION_NAME"/>
             </require>
         </extension>
-        <extension name="VK_ARM_extension_02" number="68" type="device" author="ARM" contact="Jan-Harald Fredriksen @janharaldfredriksen-arm" supported="disabled">
+        <extension name="VK_EXT_astc_decode_mode" number="68" type="device" author="ARM" contact="Jan-Harald Fredriksen @janharaldfredriksen-arm" requires="VK_KHR_get_physical_device_properties2" supported="vulkan">
             <require>
-                <enum value="0"                                                 name="VK_ARM_EXTENSION_02_SPEC_VERSION"/>
-                <enum value="&quot;VK_ARM_extension_02&quot;"                   name="VK_ARM_EXTENSION_02_EXTENSION_NAME"/>
+                <enum value="1"                                         name="VK_EXT_ASTC_DECODE_MODE_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_astc_decode_mode&quot;"       name="VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkStructureType"              name="VK_STRUCTURE_TYPE_IMAGE_VIEW_ASTC_DECODE_MODE_EXT"/>
+                <enum offset="1" extends="VkStructureType"              name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ASTC_DECODE_FEATURES_EXT"/>
+                <type name="VkImageViewASTCDecodeModeEXT"/>
+                <type name="VkPhysicalDeviceASTCDecodeFeaturesEXT"/>
             </require>
         </extension>
         <extension name="VK_IMG_extension_69" number="69" type="device" author="IMG" contact="Tobias Hector @tobski" supported="disabled">
@@ -7812,8 +7889,8 @@ server.
             <require>
                 <enum value="1"                                             name="VK_EXT_DISPLAY_SURFACE_COUNTER_SPEC_VERSION"/>
                 <enum value="&quot;VK_EXT_display_surface_counter&quot;"    name="VK_EXT_DISPLAY_SURFACE_COUNTER_EXTENSION_NAME"/>
-                <enum offset="0" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT"/>
-                <enum value="VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT"  name="VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT"/>
+                <enum offset="0"                                           extends="VkStructureType" name="VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT"/>
+                <enum alias="VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT" extends="VkStructureType" name="VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT" comment="Backwards-compatible alias containing a typo"/>
                 <type name="VkSurfaceCounterFlagsEXT"/>
                 <type name="VkSurfaceCounterFlagBitsEXT"/>
                 <type name="VkSurfaceCapabilities2EXT"/>
@@ -8283,7 +8360,7 @@ server.
                 <enum value="&quot;VK_KHR_storage_buffer_storage_class&quot;" name="VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME"/>
             </require>
         </extension>
-        <extension name="VK_AMD_gpu_shader_int16" number="133" type="device" author="AMD" contact="Qun Lin, AMD @linqun" supported="vulkan">
+        <extension name="VK_AMD_gpu_shader_int16" number="133" type="device" author="AMD" contact="Qun Lin @linqun" supported="vulkan">
             <require>
                 <enum value="1"                                             name="VK_AMD_GPU_SHADER_INT16_SPEC_VERSION"/>
                 <enum value="&quot;VK_AMD_gpu_shader_int16&quot;"           name="VK_AMD_GPU_SHADER_INT16_EXTENSION_NAME"/>
@@ -8319,10 +8396,19 @@ server.
                 <enum value="&quot;VK_AMD_shader_fragment_mask&quot;"       name="VK_AMD_SHADER_FRAGMENT_MASK_EXTENSION_NAME"/>
             </require>
         </extension>
-        <extension name="VK_AMD_extension_139" number="139" author="AMD" contact="Mais Alnasser @malnasse" supported="disabled">
+        <extension name="VK_EXT_inline_uniform_block" number="139" type="device" author="EXT" requires="VK_KHR_get_physical_device_properties2,VK_KHR_maintenance1" contact="Daniel Rakos @aqnuep" supported="vulkan">
             <require>
-                <enum value="0"                                             name="VK_AMD_EXTENSION_139_SPEC_VERSION"/>
-                <enum value="&quot;VK_AMD_extension_139&quot;"              name="VK_AMD_EXTENSION_139_EXTENSION_NAME"/>
+                <enum value="1"                                          name="VK_EXT_INLINE_UNIFORM_BLOCK_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_inline_uniform_block&quot;"    name="VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkDescriptorType"              name="VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT"/>
+                <enum offset="0" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT"/>
+                <enum offset="1" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT"/>
+                <enum offset="2" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT"/>
+                <enum offset="3" extends="VkStructureType"               name="VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT"/>
+                <type name="VkPhysicalDeviceInlineUniformBlockFeaturesEXT"/>
+                <type name="VkPhysicalDeviceInlineUniformBlockPropertiesEXT"/>
+                <type name="VkWriteDescriptorSetInlineUniformBlockEXT"/>
+                <type name="VkDescriptorPoolInlineUniformBlockCreateInfoEXT"/>
             </require>
         </extension>
         <extension name="VK_AMD_extension_140" number="140" author="AMD" contact="Mais Alnasser @malnasse" supported="disabled">
@@ -8632,7 +8718,6 @@ server.
                 <enum offset="0" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_VALIDATION_CACHE_CREATE_INFO_EXT"/>
                 <enum offset="1" extends="VkStructureType"                  name="VK_STRUCTURE_TYPE_SHADER_MODULE_VALIDATION_CACHE_CREATE_INFO_EXT"/>
                 <enum offset="0" extends="VkObjectType"                     name="VK_OBJECT_TYPE_VALIDATION_CACHE_EXT" comment="VkValidationCacheEXT"/>
-                <enum value="VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT"         name="VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT"/>
                 <type name="VkValidationCacheEXT"/>
                 <type name="VkValidationCacheCreateInfoEXT"/>
                 <type name="VkShaderModuleValidationCacheCreateInfoEXT"/>
@@ -8679,12 +8764,26 @@ server.
             <require>
                 <enum value="0"                                             name="VK_EXT_EXTENSION_165_SPEC_VERSION"/>
                 <enum value="&quot;VK_NV_extension_165&quot;"               name="VK_EXT_EXTENSION_165_EXTENSION_NAME"/>
+                <enum bitpos="23" extends="VkAccessFlagBits"                name="VK_ACCESS_RESERVED_23_BIT_NV"/>
+                <enum bitpos="8"  extends="VkImageUsageFlagBits"            name="VK_IMAGE_USAGE_RESERVED_8_BIT_NV"/>
+                <enum bitpos="22" extends="VkPipelineStageFlagBits"         name="VK_PIPELINE_STAGE_RESERVED_22_BIT_NV"/>
             </require>
         </extension>
         <extension name="VK_NV_extension_166" number="166" author="NV" contact="Daniel Koch @dgkoch" supported="disabled">
             <require>
                 <enum value="0"                                             name="VK_EXT_EXTENSION_166_SPEC_VERSION"/>
                 <enum value="&quot;VK_NV_extension_166&quot;"               name="VK_EXT_EXTENSION_166_EXTENSION_NAME"/>
+                <enum bitpos="8" extends="VkShaderStageFlagBits"            name="VK_SHADER_STAGE_RESERVED_8_BIT_NV"/>
+                <enum bitpos="9" extends="VkShaderStageFlagBits"            name="VK_SHADER_STAGE_RESERVED_9_BIT_NV"/>
+                <enum bitpos="10" extends="VkShaderStageFlagBits"           name="VK_SHADER_STAGE_RESERVED_10_BIT_NV"/>
+                <enum bitpos="11" extends="VkShaderStageFlagBits"           name="VK_SHADER_STAGE_RESERVED_11_BIT_NV"/>
+                <enum bitpos="12" extends="VkShaderStageFlagBits"           name="VK_SHADER_STAGE_RESERVED_12_BIT_NV"/>
+                <enum bitpos="13" extends="VkShaderStageFlagBits"           name="VK_SHADER_STAGE_RESERVED_13_BIT_NV"/>
+                <enum bitpos="21" extends="VkPipelineStageFlagBits"         name="VK_PIPELINE_STAGE_RESERVED_21_BIT_NV"/>
+                <enum bitpos="10" extends="VkBufferUsageFlagBits"           name="VK_BUFFER_USAGE_RESERVED_10_BIT_NV"/>
+                <enum bitpos="21" extends="VkAccessFlagBits"                name="VK_ACCESS_RESERVED_21_BIT_NV"/>
+                <enum bitpos="22" extends="VkAccessFlagBits"                name="VK_ACCESS_RESERVED_22_BIT_NV"/>
+                <enum bitpos="5" extends="VkPipelineCreateFlagBits"         name="VK_PIPELINE_CREATE_RESERVED_5_BIT_NV"/>
             </require>
         </extension>
         <extension name="VK_NV_extension_167" number="167" author="NV" contact="Daniel Koch @dgkoch" supported="disabled">
@@ -8860,13 +8959,15 @@ server.
         </extension>
         <extension name="VK_EXT_vertex_attribute_divisor" number="191" type="device" requires="VK_KHR_get_physical_device_properties2" author="NV" contact="Vikram Kushwaha @vkushwaha" supported="vulkan">
             <require>
-                <enum value="1"                                         name="VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_SPEC_VERSION"/>
+                <enum value="3"                                         name="VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_SPEC_VERSION"/>
                 <enum value="&quot;VK_EXT_vertex_attribute_divisor&quot;"   name="VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME"/>
                 <enum offset="0" extends="VkStructureType"              name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT"/>
                 <enum offset="1" extends="VkStructureType"              name="VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT"/>
+                <enum offset="2" extends="VkStructureType"              name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT"/>
                 <type name="VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT"/>
                 <type name="VkVertexInputBindingDivisorDescriptionEXT"/>
                 <type name="VkPipelineVertexInputDivisorStateCreateInfoEXT"/>
+                <type name="VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT"/>
             </require>
         </extension>
         <extension name="VK_GOOGLE_extension_192" number="192" author="GOOGLE" contact="Jean-Francois Roy @jfroy" supported="disabled">
@@ -8940,6 +9041,10 @@ server.
             <require>
                 <enum value="0"                                         name="VK_NV_EXTENSION_203_SPEC_VERSION"/>
                 <enum value="&quot;VK_NV_extension_203&quot;"           name="VK_NV_EXTENSION_203_EXTENSION_NAME"/>
+                <enum bitpos="6" extends="VkShaderStageFlagBits"        name="VK_SHADER_STAGE_RESERVED_6_BIT_NV"/>
+                <enum bitpos="7" extends="VkShaderStageFlagBits"        name="VK_SHADER_STAGE_RESERVED_7_BIT_NV"/>
+                <enum bitpos="19" extends="VkPipelineStageFlagBits"     name="VK_PIPELINE_STAGE_RESERVED_19_BIT_NV"/>
+                <enum bitpos="20" extends="VkPipelineStageFlagBits"     name="VK_PIPELINE_STAGE_RESERVED_20_BIT_NV"/>
             </require>
         </extension>
         <extension name="VK_NV_extension_204" number="204" author="NV" contact="Pat Brown @nvpbrown" supported="disabled">
@@ -8960,10 +9065,16 @@ server.
                 <enum value="&quot;VK_NV_extension_206&quot;"           name="VK_NV_EXTENSION_206_EXTENSION_NAME"/>
             </require>
         </extension>
-        <extension name="VK_NV_extension_207" number="207" author="NV" contact="Nuno Subtil @nsubtil" supported="disabled">
+        <extension name="VK_NV_device_diagnostic_checkpoints" type="device" number="207" requires="VK_KHR_get_physical_device_properties2" author="NVIDIA" contact="Nuno Subtil @nsubtil" supported="vulkan">
             <require>
-                <enum value="0"                                         name="VK_NV_EXTENSION_207_SPEC_VERSION"/>
-                <enum value="&quot;VK_NV_extension_207&quot;"           name="VK_NV_EXTENSION_207_EXTENSION_NAME"/>
+                <enum value="2"                                         name="VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_SPEC_VERSION"/>
+                <enum value="&quot;VK_NV_device_diagnostic_checkpoints&quot;" name="VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkStructureType"              name="VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV"/>
+                <enum offset="1" extends="VkStructureType"              name="VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_NV"/>
+                <type name="VkQueueFamilyCheckpointPropertiesNV"/>
+                <type name="VkCheckpointDataNV"/>
+                <command name="vkCmdSetCheckpointNV"/>
+                <command name="vkGetQueueCheckpointDataNV"/>
             </require>
         </extension>
         <extension name="VK_KHR_extension_208" number="208" type="device" author="KHR" contact="Daniel Rakos @drakos-arm" supported="disabled">
@@ -8990,10 +9101,12 @@ server.
                 <enum value="&quot;VK_KHR_extension_211&quot;"          name="VK_KHR_EXTENSION_211_EXTENSION_NAME"/>
             </require>
         </extension>
-        <extension name="VK_KHR_extension_212" number="212" type="device" author="KHR" contact="Jeff Bolz @jeffbolznv" supported="disabled">
+        <extension name="VK_KHR_vulkan_memory_model" number="212" type="device" author="KHR" contact="Jeff Bolz @jeffbolznv" provisional="true" supported="vulkan">
             <require>
-                <enum value="0"                                         name="VK_KHR_EXTENSION_212_SPEC_VERSION"/>
-                <enum value="&quot;VK_KHR_extension_212&quot;"          name="VK_KHR_EXTENSION_212_EXTENSION_NAME"/>
+                <enum value="2"                                         name="VK_KHR_VULKAN_MEMORY_MODEL_SPEC_VERSION"/>
+                <enum value="&quot;VK_KHR_vulkan_memory_model&quot;"    name="VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME"/>
+                <enum offset="0" extends="VkStructureType"              name="VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR"/>
+                <type name="VkPhysicalDeviceVulkanMemoryModelFeaturesKHR"/>
             </require>
         </extension>
         <extension name="VK_AMD_extension_213" number="213" author="AMD" contact="Neil Henning @sheredom" supported="disabled">
@@ -9008,5 +9121,46 @@ server.
                 <enum value="&quot;VK_KHR_extension_214&quot;"              name="VK_KHR_EXTENSION_214_EXTENSION_NAME"/>
             </require>
         </extension>
+        <extension name="VK_GOOGLE_extension_215" number="215" author="GOOGLE" contact="Jesse Hall @critsec" supported="disabled">
+            <require>
+                <enum value="0"                                             name="VK_KHR_EXTENSION_215_SPEC_VERSION"/>
+                <enum value="&quot;VK_KHR_extension_215&quot;"              name="VK_KHR_EXTENSION_215_EXTENSION_NAME"/>
+            </require>
+        </extension>
+        <extension name="VK_GOOGLE_extension_216" number="216" author="GOOGLE" contact="Jesse Hall @critsec" supported="disabled">
+            <require>
+                <enum value="0"                                             name="VK_KHR_EXTENSION_216_SPEC_VERSION"/>
+                <enum value="&quot;VK_KHR_extension_216&quot;"              name="VK_KHR_EXTENSION_216_EXTENSION_NAME"/>
+            </require>
+        </extension>
+        <extension name="VK_GOOGLE_extension_217" number="217" author="GOOGLE" contact="Jesse Hall @critsec" supported="disabled">
+            <require>
+                <enum value="0"                                             name="VK_KHR_EXTENSION_217_SPEC_VERSION"/>
+                <enum value="&quot;VK_KHR_extension_217&quot;"              name="VK_KHR_EXTENSION_217_EXTENSION_NAME"/>
+            </require>
+        </extension>
+        <extension name="VK_EXT_macos_ios_window" number="218" author="EXT" contact="Dzmitry Malyshau @kvark" supported="disabled">
+            <require>
+                <enum value="0"                                                 name="VK_EXT_MACOS_IOS_WINDOW_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_macos_ios_window&quot;"               name="VK_EXT_MACOS_IOS_WINDOW_EXTENSION_NAME"/>
+            </require>
+        </extension>
+        <extension name="VK_EXT_extension_219" number="219" type="device" author="EXT" contact="Matthew Netsch @mnetsch" supported="disabled">
+            <require>
+                <enum value="0"                                             name="VK_EXT_EXTENSION_219_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_extension_219&quot;"              name="VK_EXT_EXTENSION_219_EXTENSION_NAME"/>
+                <enum bitpos="14" extends="VkImageCreateFlagBits"           name="VK_IMAGE_CREATE_RESERVED_14_BIT_EXT"/>
+                <enum bitpos="24" extends="VkAccessFlagBits"                name="VK_ACCESS_RESERVED_24_BIT_EXT"/>
+                <enum bitpos="24" extends="VkFormatFeatureFlagBits"         name="VK_FORMAT_FEATURE_RESERVED_24_BIT_EXT"/>
+                <enum bitpos="9"  extends="VkImageUsageFlagBits"            name="VK_IMAGE_USAGE_RESERVED_9_BIT_EXT"/>
+                <enum bitpos="23" extends="VkPipelineStageFlagBits"         name="VK_PIPELINE_STAGE_RESERVED_23_BIT_EXT"/>
+            </require>
+        </extension>
+        <extension name="VK_EXT_extension_220" number="220" author="EXT" contact="Dzmitry Malyshau @kvark" supported="disabled">
+            <require>
+                <enum value="0"                                              name="VK_EXT_EXTENSION_220_SPEC_VERSION"/>
+                <enum value="&quot;VK_EXT_extension_220&quot;"               name="VK_EXT_EXTENSION_220_EXTENSION_NAME"/>
+            </require>
+        </extension>
     </extensions>
 </registry>
diff --git a/src/vulkan/util/vk_alloc.h b/src/vulkan/util/vk_alloc.h
index f58a80625a8..2e807a96d9e 100644
--- a/src/vulkan/util/vk_alloc.h
+++ b/src/vulkan/util/vk_alloc.h
@@ -67,6 +67,23 @@ vk_free(const VkAllocationCallbacks *alloc, void *data)
    alloc->pfnFree(alloc->pUserData, data);
 }
 
+static inline char *
+vk_strdup(const VkAllocationCallbacks *alloc, const char *s,
+          VkSystemAllocationScope scope)
+{
+   if (s == NULL)
+      return NULL;
+
+   size_t size = strlen(s) + 1;
+   char *copy = vk_alloc(alloc, size, 1, scope);
+   if (copy == NULL)
+      return NULL;
+
+   memcpy(copy, s, size);
+
+   return copy;
+}
+
 static inline void *
 vk_alloc2(const VkAllocationCallbacks *parent_alloc,
           const VkAllocationCallbacks *alloc,
diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
index f2d90a6bba2..3416fef3076 100644
--- a/src/vulkan/wsi/wsi_common.c
+++ b/src/vulkan/wsi/wsi_common.c
@@ -856,17 +856,14 @@ wsi_common_get_images(VkSwapchainKHR _swapchain,
 }
 
 VkResult
-wsi_common_acquire_next_image(const struct wsi_device *wsi,
-                              VkDevice device,
-                              VkSwapchainKHR _swapchain,
-                              uint64_t timeout,
-                              VkSemaphore semaphore,
-                              uint32_t *pImageIndex)
+wsi_common_acquire_next_image2(const struct wsi_device *wsi,
+                               VkDevice device,
+                               const VkAcquireNextImageInfoKHR *pAcquireInfo,
+                               uint32_t *pImageIndex)
 {
-   WSI_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
+   WSI_FROM_HANDLE(wsi_swapchain, swapchain, pAcquireInfo->swapchain);
 
-   return swapchain->acquire_next_image(swapchain, timeout,
-                                        semaphore, pImageIndex);
+   return swapchain->acquire_next_image(swapchain, pAcquireInfo, pImageIndex);
 }
 
 VkResult
diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h
index 33e4f849ac9..14f65097bb3 100644
--- a/src/vulkan/wsi/wsi_common.h
+++ b/src/vulkan/wsi/wsi_common.h
@@ -209,12 +209,10 @@ wsi_common_get_images(VkSwapchainKHR _swapchain,
                       VkImage *pSwapchainImages);
 
 VkResult
-wsi_common_acquire_next_image(const struct wsi_device *wsi,
-                              VkDevice device,
-                              VkSwapchainKHR swapchain,
-                              uint64_t timeout,
-                              VkSemaphore semaphore,
-                              uint32_t *pImageIndex);
+wsi_common_acquire_next_image2(const struct wsi_device *wsi,
+                               VkDevice device,
+                               const VkAcquireNextImageInfoKHR *pAcquireInfo,
+                               uint32_t *pImageIndex);
 
 VkResult
 wsi_common_create_swapchain(struct wsi_device *wsi,
diff --git a/src/vulkan/wsi/wsi_common_display.c b/src/vulkan/wsi/wsi_common_display.c
index e6cba188dfa..bc87ce4822c 100644
--- a/src/vulkan/wsi/wsi_common_display.c
+++ b/src/vulkan/wsi/wsi_common_display.c
@@ -1197,8 +1197,7 @@ wsi_display_wait_for_event(struct wsi_display *wsi,
 
 static VkResult
 wsi_display_acquire_next_image(struct wsi_swapchain *drv_chain,
-                               uint64_t timeout,
-                               VkSemaphore semaphore,
+                               const VkAcquireNextImageInfoKHR *info,
                                uint32_t *image_index)
 {
    struct wsi_display_swapchain *chain =
@@ -1211,6 +1210,7 @@ wsi_display_acquire_next_image(struct wsi_swapchain *drv_chain,
    if (chain->status != VK_SUCCESS)
       return chain->status;
 
+   uint64_t timeout = info->timeout;
    if (timeout != 0 && timeout != UINT64_MAX)
       timeout = wsi_rel_to_abs_time(timeout);
 
@@ -1712,6 +1712,10 @@ wsi_display_surface_create_swapchain(
 
    VkResult result = wsi_swapchain_init(wsi_device, &chain->base, device,
                                         create_info, allocator);
+   if (result != VK_SUCCESS) {
+      vk_free(allocator, chain);
+      return result;
+   }
 
    chain->base.destroy = wsi_display_swapchain_destroy;
    chain->base.get_wsi_image = wsi_display_get_wsi_image;
@@ -2304,6 +2308,7 @@ wsi_acquire_xlib_display(VkPhysicalDevice physical_device,
    if (!crtc)
       return VK_ERROR_INITIALIZATION_FAILED;
 
+#ifdef HAVE_DRI3_MODIFIERS
    xcb_randr_lease_t lease = xcb_generate_id(connection);
    xcb_randr_create_lease_cookie_t cl_c =
       xcb_randr_create_lease(connection, root, lease, 1, 1,
@@ -2324,6 +2329,7 @@ wsi_acquire_xlib_display(VkPhysicalDevice physical_device,
       return VK_ERROR_INITIALIZATION_FAILED;
 
    wsi->fd = fd;
+#endif
 
    return VK_SUCCESS;
 }
diff --git a/src/vulkan/wsi/wsi_common_private.h b/src/vulkan/wsi/wsi_common_private.h
index 9f2aacd6560..ee7ae75b8f7 100644
--- a/src/vulkan/wsi/wsi_common_private.h
+++ b/src/vulkan/wsi/wsi_common_private.h
@@ -62,7 +62,7 @@ struct wsi_swapchain {
    struct wsi_image *(*get_wsi_image)(struct wsi_swapchain *swapchain,
                                       uint32_t image_index);
    VkResult (*acquire_next_image)(struct wsi_swapchain *swap_chain,
-                                  uint64_t timeout, VkSemaphore semaphore,
+                                  const VkAcquireNextImageInfoKHR *info,
                                   uint32_t *image_index);
    VkResult (*queue_present)(struct wsi_swapchain *swap_chain,
                              uint32_t image_index,
diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c
index 4a6a4a29b93..aeff823f4d8 100644
--- a/src/vulkan/wsi/wsi_common_wayland.c
+++ b/src/vulkan/wsi/wsi_common_wayland.c
@@ -455,10 +455,11 @@ wsi_wl_get_presentation_support(struct wsi_device *wsi_device,
       (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
 
    struct wsi_wl_display display;
-   int ret = wsi_wl_display_init(wsi, &display, wl_display, false);
-   wsi_wl_display_finish(&display);
+   VkResult ret = wsi_wl_display_init(wsi, &display, wl_display, false);
+   if (ret == VK_SUCCESS)
+      wsi_wl_display_finish(&display);
 
-   return ret == 0;
+   return ret == VK_SUCCESS;
 }
 
 static VkResult
@@ -658,8 +659,7 @@ wsi_wl_swapchain_get_wsi_image(struct wsi_swapchain *wsi_chain,
 
 static VkResult
 wsi_wl_swapchain_acquire_next_image(struct wsi_swapchain *wsi_chain,
-                                    uint64_t timeout,
-                                    VkSemaphore semaphore,
+                                    const VkAcquireNextImageInfoKHR *info,
                                     uint32_t *image_index)
 {
    struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain;
diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c
index 7e7b3a94e4b..164f760b5fc 100644
--- a/src/vulkan/wsi/wsi_common_x11.c
+++ b/src/vulkan/wsi/wsi_common_x11.c
@@ -948,11 +948,11 @@ x11_present_to_x11(struct x11_swapchain *chain, uint32_t image_index,
 
 static VkResult
 x11_acquire_next_image(struct wsi_swapchain *anv_chain,
-                       uint64_t timeout,
-                       VkSemaphore semaphore,
+                       const VkAcquireNextImageInfoKHR *info,
                        uint32_t *image_index)
 {
    struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
+   uint64_t timeout = info->timeout;
 
    if (chain->threaded) {
       return x11_acquire_next_image_from_queue(chain, image_index, timeout);