diff --git a/Android.common.mk b/Android.common.mk index 397dc03dee4..ddf02b04333 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -31,6 +31,7 @@ LOCAL_C_INCLUDES += \ MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION) LOCAL_CFLAGS += \ + -O3 \ -Wno-error \ -Wno-unused-parameter \ -Wno-pointer-arith \ @@ -76,14 +77,23 @@ LOCAL_CFLAGS += \ -DMAJOR_IN_SYSMACROS \ -DVK_USE_PLATFORM_ANDROID_KHR \ -fvisibility=hidden \ - -Wno-sign-compare + -Wno-sign-compare \ + -Wno-self-assign \ + -Wno-constant-logical-operand \ + -Wno-format \ + -Wno-incompatible-pointer-types \ + -Wno-enum-conversion LOCAL_CPPFLAGS += \ -D__STDC_CONSTANT_MACROS \ -D__STDC_FORMAT_MACROS \ -D__STDC_LIMIT_MACROS \ -Wno-error=non-virtual-dtor \ - -Wno-non-virtual-dtor + -Wno-non-virtual-dtor \ + -Wno-delete-non-virtual-dtor \ + -Wno-overloaded-virtual \ + -Wno-missing-braces \ + -Wno-deprecated-register # mesa requires at least c99 compiler LOCAL_CONLYFLAGS += \ diff --git a/Readme.md b/Readme.md new file mode 100644 index 00000000000..5df295abc3a --- /dev/null +++ b/Readme.md @@ -0,0 +1,2 @@ +Any security related issues should be reported by following the instructions here: +https://01.org/security diff --git a/VERSION b/VERSION index 9a33c149fca..49fdb126eb8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -18.2.0-devel +18.2.5 diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore new file mode 100644 index 00000000000..93be8a6c4e9 --- /dev/null +++ b/bin/.cherry-ignore @@ -0,0 +1,23 @@ +# fixes: This commit has more than one Fixes tag but the commit it +# addresses didn't land in branch. +6ff1c479968819b93c46d24bd898e89ce14ac401 autotools: don't ship the git_sha1.h generated in git in the tarballs +# pick: This commit addresses a regression introduced by previous +# commit fa9e6c235da, which didn't make it for 18.2. +a72dbc461bdb7714656e62cd8f4b00a404c2e6e0 mesa: allow GL_UNSIGNED_BYTE type for SNORM reads +# fixes: This commit has more than one Fixes tag but the commit it +# addresses didn't land in branch. +c9f54486959716762e6818dabb0a73a8cd46df67 radeonsi: fix regression in indirect input swizzles. +# extra: Just some comments update. +2ad9917e187c1e9dbb053d3c98aa0e39fa374059 anv/blorp: Fix a comment as per Nanley's review feedback +# fixes: This commit was immediately reverted by commit 2dce1175c1c. +4aec44c0d9c4c0649c362199fac97efe0a3b38a4 i965/tools: 32bit compilation with meson +# pick: This commit was reverted by commit 95bb7d82ca8. +90819abb56f6b1a0cd4946b13b6caf24fb46e500 radv: fix descriptor pool allocation size +# pick: There is a specific patch for stable branch for this commit. +0d495bec25bd7584de4e988c2b4528c1996bc1d0 radeonsi: NaN should pass kill_if +# pick: This commit reverts 0fa9e6d7b30 which did not land in branch. +aa02d7e8781c25ee18b6da97606300808c84973a Revert "anv/skylake: disable ForceThreadDispatchEnable" +# pick: Explicit 18.3 only nominations. +b1b2dd06a7b777e862b525302b15bcaf407d3648 radv: add missing TFB queries support to CmdCopyQueryPoolsResults() +# fixes: This commit was reverted by commit 5f312e95f87. +a9031bf9b55602d93cccef6c926e2179c23205b4 i965/batch: avoid reverting batch buffer if saved state is an empty diff --git a/bin/install_megadrivers.py b/bin/install_megadrivers.py index 8d9ed9c6dce..551e385d1a5 100755 --- a/bin/install_megadrivers.py +++ b/bin/install_megadrivers.py @@ -43,13 +43,15 @@ def main(): master = os.path.join(to, os.path.basename(args.megadriver)) if not os.path.exists(to): + if os.path.lexists(to): + os.unlink(to) os.makedirs(to) shutil.copy(args.megadriver, master) for driver in args.drivers: abs_driver = os.path.join(to, driver) - if os.path.exists(abs_driver): + if os.path.lexists(abs_driver): os.unlink(abs_driver) print('installing {} to {}'.format(args.megadriver, abs_driver)) os.link(master, abs_driver) @@ -60,7 +62,7 @@ def main(): name, ext = os.path.splitext(driver) while ext != '.so': - if os.path.exists(name): + if os.path.lexists(name): os.unlink(name) os.symlink(driver, name) name, ext = os.path.splitext(name) diff --git a/common.py b/common.py index 24a7e8a611d..0d8cb59b436 100644 --- a/common.py +++ b/common.py @@ -107,9 +107,6 @@ def AddOptions(opts): opts.Add(BoolOption('debug', 'DEPRECATED: debug build', 'yes')) opts.Add(BoolOption('profile', 'DEPRECATED: profile build', 'no')) opts.Add(BoolOption('quiet', 'DEPRECATED: profile build', 'yes')) - opts.Add(BoolOption('texture_float', - 'enable floating-point textures and renderbuffers', - 'no')) opts.Add(BoolOption('swr', 'Build OpenSWR', 'no')) if host_platform == 'windows': opts.Add('MSVC_VERSION', 'Microsoft Visual C/C++ version') diff --git a/configure.ac b/configure.ac index ffb8424a07b..64c03506fb0 100644 --- a/configure.ac +++ b/configure.ac @@ -295,6 +295,12 @@ esac AM_CONDITIONAL(HAVE_ANDROID, test "x$android" = xyes) +# Toggle Werror since at some point clang started treating unknown -W +# flags as warnings, succeeding with the build, yet issuing an annoying +# warning. +save_CFLAGS="$CFLAGS" +export CFLAGS="$CFLAGS -Werror" + dnl dnl Check compiler flags dnl @@ -309,6 +315,11 @@ AX_CHECK_COMPILE_FLAG([-fno-math-errno], [CFLAGS="$CFLAGS AX_CHECK_COMPILE_FLAG([-fno-trapping-math], [CFLAGS="$CFLAGS -fno-trapping-math"]) AX_CHECK_COMPILE_FLAG([-fvisibility=hidden], [VISIBILITY_CFLAGS="-fvisibility=hidden"]) +CFLAGS="$save_CFLAGS" + +# Toggle Werror since at some point clang started treating unknown -W +# flags as warnings, succeeding with the build, yet issuing an annoying +# warning. dnl dnl Check C++ compiler flags dnl @@ -1415,6 +1426,7 @@ AM_CONDITIONAL(NEED_OPENGL_COMMON, test "x$enable_opengl" = xyes -o \ "x$enable_gles1" = xyes -o \ "x$enable_gles2" = xyes) AM_CONDITIONAL(NEED_KHRPLATFORM, test "x$enable_egl" = xyes -o \ + "x$enable_opengl" = xyes -o \ "x$enable_gles1" = xyes -o \ "x$enable_gles2" = xyes) @@ -1503,15 +1515,15 @@ fi AC_ARG_WITH([gl-lib-name], [AS_HELP_STRING([--with-gl-lib-name@<:@=NAME@:>@], [specify GL library name @<:@default=GL@:>@])], - [GL_LIB=$withval], - [GL_LIB="$DEFAULT_GL_LIB_NAME"]) + [AC_MSG_ERROR([--with-gl-lib-name is no longer supported. Rename the library manually if needed.])], + []) AC_ARG_WITH([osmesa-lib-name], [AS_HELP_STRING([--with-osmesa-lib-name@<:@=NAME@:>@], [specify OSMesa library name @<:@default=OSMesa@:>@])], - [OSMESA_LIB=$withval], - [OSMESA_LIB=OSMesa]) -AS_IF([test "x$GL_LIB" = xyes], [GL_LIB="$DEFAULT_GL_LIB_NAME"]) -AS_IF([test "x$OSMESA_LIB" = xyes], [OSMESA_LIB=OSMesa]) + [AC_MSG_ERROR([--with-osmesa-lib-name is no longer supported. Rename the library manually if needed.])], + []) +GL_LIB="$DEFAULT_GL_LIB_NAME" +OSMESA_LIB=OSMesa dnl dnl Mangled Mesa support @@ -1523,6 +1535,9 @@ AC_ARG_ENABLE([mangling], [enable_mangling=no] ) if test "x${enable_mangling}" = "xyes" ; then + if test "x$enable_libglvnd" = xyes; then + AC_MSG_ERROR([Conflicting options --enable-mangling and --enable-libglvnd.]) + fi DEFINES="${DEFINES} -DUSE_MGL_NAMESPACE" GL_LIB="Mangled${GL_LIB}" OSMESA_LIB="Mangled${OSMESA_LIB}" @@ -1530,6 +1545,15 @@ fi AC_SUBST([GL_LIB]) AC_SUBST([OSMESA_LIB]) +dnl HACK when building glx + glvnd we ship gl.pc, despite that glvnd should do it +dnl Thus we need to use GL as a DSO name. +if test "x$enable_libglvnd" = xyes -a "x$enable_glx" != xno; then + GL_PKGCONF_LIB="GL" +else + GL_PKGCONF_LIB="$GL_LIB" +fi +AC_SUBST([GL_PKGCONF_LIB]) + # Check for libdrm PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED], [have_libdrm=yes], [have_libdrm=no]) @@ -1658,6 +1682,8 @@ xxlib | xgallium-xlib) xdri) # DRI-based GLX + require_dri_shared_libs_and_glapi "GLX" + # find the DRI deps for libGL dri_modules="x11 xext xdamage >= $XDAMAGE_REQUIRED xfixes x11-xcb xcb xcb-glx >= $XCBGLX_REQUIRED" diff --git a/docs/faq.html b/docs/faq.html index 1f2fd66034c..6270a071dac 100644 --- a/docs/faq.html +++ b/docs/faq.html @@ -16,7 +16,7 @@

The Mesa 3D Graphics Library

Mesa Frequently Asked Questions

-Last updated: 9 October 2012 +Last updated: 19 September 2018

@@ -373,18 +373,16 @@

4.2 How do I write a new device driver?

4.3 Why isn't GL_EXT_texture_compression_s3tc implemented in Mesa?

-The specification for the extension -indicates that there are intellectual property (IP) and/or patent issues -to be dealt with. +Oh but it is! Prior to 2nd October 2017, the Mesa project did not include s3tc +support due to intellectual property (IP) and/or patent issues around the s3tc +algorithm.

-

We've been unsuccessful in getting a response from S3 (or whoever owns -the IP nowadays) to indicate whether or not an open source project can -implement the extension (specifically the compression/decompression -algorithms). +

+As of Mesa 17.3.0, Mesa now officially supports s3tc, as the patent has expired.

-In the mean time, a 3rd party -plug-in library is available. +In versions prior to this, a 3rd party +plug-in library was required.

diff --git a/docs/install.html b/docs/install.html index 08081944cfc..5493da054c5 100644 --- a/docs/install.html +++ b/docs/install.html @@ -75,7 +75,7 @@

1.1 General

Version 2.6.4 or later should work.
  • Python Mako module - -Python Mako module is required. Version 0.3.4 or later should work. +Python Mako module is required. Version 0.8.0 or later should work.
  • lex / yacc - for building the Mesa IR and GLSL compiler.
    diff --git a/docs/relnotes/18.2.0.html b/docs/relnotes/18.2.0.html index fb7a12f2859..968312ca901 100644 --- a/docs/relnotes/18.2.0.html +++ b/docs/relnotes/18.2.0.html @@ -14,7 +14,7 @@

    The Mesa 3D Graphics Library

    -

    Mesa 18.2.0 Release Notes / TBD

    +

    Mesa 18.2.0 Release Notes / September 7, 2018

    Mesa 18.2.0 is a new development release. People who are concerned @@ -40,7 +40,8 @@

    Mesa 18.2.0 Release Notes / TBD

    SHA256 checksums

    -TBD.
    +b9e6bb3eb7660b0726ba28405ffa0cb77de619e925b910b72f4d7a85c0098596  mesa-18.2.0.tar.gz
    +22452bdffff8e11bf4284278155a9f77cb28d6d73a12c507f1490732d0d9ddce  mesa-18.2.0.tar.xz
     
    @@ -59,9 +60,217 @@

    New features

  • GL_ARB_sample_locations and GL_NV_sample_locations on nvc0 (GM200+)
  • GL_ANDROID_extension_pack_es31a on radeonsi.
  • GL_KHR_texture_compression_astc_ldr on radeonsi
  • +
  • GL_NV_conservative_raster and GL_NV_conservative_raster_dilate on nvc0 (GM200+)
  • +
  • GL_NV_conservative_raster_pre_snap_triangles on nvc0 (GP102+)
  • +
  • multisampled images on nvc0 (GM107+) (now supported on GF100+)
  • Bug fixes

    +

    Changes

    diff --git a/docs/relnotes/18.2.1.html b/docs/relnotes/18.2.1.html new file mode 100644 index 00000000000..23fb8f46b5a --- /dev/null +++ b/docs/relnotes/18.2.1.html @@ -0,0 +1,227 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.2.1 Release Notes / September 21, 2018

    + +

    +Mesa 18.2.1 is a bug fix release which fixes bugs found since the 18.2.0 release. +

    +

    +Mesa 18.2.0 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +SHA256: 45419ccbe1bf9a2e15ffe71ced34615002e1b42c24b917fbe2b2f58ab1970562  mesa-18.2.1.tar.gz
    +SHA256: 9636dc6f3d188abdcca02da97cedd73640d9035224efd5db724187d062c81056  mesa-18.2.1.tar.xz
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    + + + + +

    Changes

    + +

    Andres Gomez (3):

    + + +

    Andrii Simiklit (4):

    + + +

    Bas Nieuwenhuizen (5):

    + + +

    Christopher Egert (1):

    + + +

    Dave Airlie (1):

    + + +

    Dylan Baker (1):

    + + +

    Eric Anholt (2):

    + + +

    Erik Faye-Lund (2):

    + + +

    Fritz Koenig (2):

    + + +

    Gert Wollny (2):

    + + +

    Ian Romanick (1):

    + + +

    Jason Ekstrand (11):

    + + +

    Josh Pieper (1):

    + + +

    Juan A. Suarez Romero (2):

    + + +

    Kenneth Feng (1):

    + + +

    Marek Olšák (5):

    + + +

    Mathias Fröhlich (1):

    + + +

    Mauro Rossi (3):

    + + +

    Michel Dänzer (1):

    + + +

    Pierre Moreau (1):

    + + +

    Samuel Pitoiset (7):

    + + +

    Sergii Romantsov (3):

    + + +

    Timothy Arceri (2):

    + + + +
    + + diff --git a/docs/relnotes/18.2.2.html b/docs/relnotes/18.2.2.html new file mode 100644 index 00000000000..9793c03a840 --- /dev/null +++ b/docs/relnotes/18.2.2.html @@ -0,0 +1,155 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.2.2 Release Notes / October 5, 2018

    + +

    +Mesa 18.2.2 is a bug fix release which fixes bugs found since the 18.2.1 release. +

    +

    +Mesa 18.2.2 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +SHA256: c51711168971957037cc7e3e19e8abe1ec6eeab9cf236d419a1e7728a41cac8a  mesa-18.2.2.tar.gz
    +SHA256: c3ba82b12a89d3d9fed2bdd96b4702dbb7ab675034650a8b1b718320daf073c4  mesa-18.2.2.tar.xz
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    + + + + +

    Changes

    + +

    Alex Deucher (1):

    + + +

    Andres Rodriguez (1):

    + + +

    Axel Davy (3):

    + + +

    Dylan Baker (1):

    + + +

    Eric Anholt (1):

    + + +

    Eric Engestrom (1):

    + + +

    Jason Ekstrand (1):

    + + +

    Juan A. Suarez Romero (2):

    + + +

    Leo Liu (1):

    + + +

    Marek Olšák (2):

    + + +

    Maxime (1):

    + + +

    Michal Srb (1):

    + + +

    Rhys Perry (2):

    + + +

    Samuel Iglesias Gonsálvez (1):

    + + +

    Samuel Pitoiset (1):

    + + +

    Stuart Young (1):

    + + +

    Timothy Arceri (1):

    + + + +
    + + diff --git a/docs/relnotes/18.2.3.html b/docs/relnotes/18.2.3.html new file mode 100644 index 00000000000..596a0a12072 --- /dev/null +++ b/docs/relnotes/18.2.3.html @@ -0,0 +1,167 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.2.3 Release Notes / October 19, 2018

    + +

    +Mesa 18.2.3 is a bug fix release which fixes bugs found since the 18.2.2 release. +

    +

    +Mesa 18.2.3 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +0e13e2342eae74d8848df23595c4bb4b2f8874c9e1213b8466b1fbfa7ef99375  mesa-18.2.3.tar.gz
    +e2bf83c17e1abdecb1ee81af22652e27e9aa38f963e95e60f34275cc0376304f  mesa-18.2.3.tar.xz
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    + + + + +

    Changes

    + +

    Boyuan Zhang (1):

    + + +

    Dave Airlie (1):

    + + +

    Dylan Baker (1):

    + + +

    Emil Velikov (5):

    + + +

    Eric Engestrom (1):

    + + +

    Fritz Koenig (1):

    + + +

    Gert Wollny (1):

    + + +

    Ilia Mirkin (4):

    + + +

    Jason Ekstrand (7):

    + + +

    Juan A. Suarez Romero (2):

    + + +

    Józef Kucia (1):

    + + +

    Marek Olšák (1):

    + + +

    Samuel Pitoiset (1):

    + + +

    Tapani Pälli (1):

    + + +

    Timothy Arceri (11):

    + + +

    Vinson Lee (1):

    + + + +
    + + diff --git a/docs/relnotes/18.2.4.html b/docs/relnotes/18.2.4.html new file mode 100644 index 00000000000..5da4362d09a --- /dev/null +++ b/docs/relnotes/18.2.4.html @@ -0,0 +1,154 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.2.4 Release Notes / October 31, 2018

    + +

    +Mesa 18.2.4 is a bug fix release which fixes bugs found since the 18.2.4 release. +

    +

    +Mesa 18.2.4 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +968bfe78605e9397ddf244933b1fa62edb8429fc55aaec2ae7e20bb1c82abdea  mesa-18.2.4.tar.gz
    +621d1aebb57876d5b6a5d2dcf4eb7e0620e650c6fe5cf3655c65e243adc9cb4e  mesa-18.2.4.tar.xz
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    + + + + +

    Changes

    + +

    Alex Smith (2):

    + + +

    Alok Hota (2):

    + + +

    Andres Rodriguez (1):

    + + +

    Bas Nieuwenhuizen (1):

    + + +

    Connor Abbott (2):

    + + +

    David McFarland (1):

    + + +

    Dylan Baker (1):

    + + +

    Elie Tournier (1):

    + + +

    Eric Engestrom (1):

    + + +

    Jan Vesely (1):

    + + +

    Jason Ekstrand (3):

    + + +

    Juan A. Suarez Romero (3):

    + + +

    Liviu Prodea (1):

    + + +

    Marek Olšák (1):

    + + +

    Michel Dänzer (1):

    + + +

    Nanley Chery (1):

    + + +

    Rob Clark (2):

    + + + +
    + + diff --git a/docs/relnotes/18.2.5.html b/docs/relnotes/18.2.5.html new file mode 100644 index 00000000000..d1e7887a3a9 --- /dev/null +++ b/docs/relnotes/18.2.5.html @@ -0,0 +1,171 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 18.2.5 Release Notes / November 15, 2018

    + +

    +Mesa 18.2.5 is a bug fix release which fixes bugs found since the 18.2.4 release. +

    +

    +Mesa 18.2.5 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

    + + +

    SHA256 checksums

    +
    +TBD
    +
    + + +

    New features

    +

    None

    + + +

    Bug fixes

    + + + + +

    Changes

    + +

    Andre Heider (1):

    + + +

    Andrii Simiklit (1):

    + + +

    Dylan Baker (2):

    + + +

    Emil Velikov (2):

    + + +

    Eric Engestrom (6):

    + + +

    Gert Wollny (1):

    + + +

    Jonathan Gray (1):

    + + +

    Juan A. Suarez Romero (4):

    + + +

    Lionel Landwerlin (1):

    + + +

    Marek Olšák (3):

    + + +

    Matt Turner (2):

    + + +

    Olivier Fourdan (1):

    + + +

    Rhys Perry (1):

    + + +

    Samuel Pitoiset (2):

    + + +

    Sergii Romantsov (1):

    + + +

    Timothy Arceri (4):

    + + +

    Vadym Shovkoplias (1):

    + + +

    Vinson Lee (1):

    + + + +
    + + diff --git a/include/GL/glcorearb.h b/include/GL/glcorearb.h index a78bbb6e182..3cf945c8b20 100644 --- a/include/GL/glcorearb.h +++ b/include/GL/glcorearb.h @@ -1,12 +1,12 @@ -#ifndef __glcorearb_h_ -#define __glcorearb_h_ 1 +#ifndef __gl_glcorearb_h_ +#define __gl_glcorearb_h_ 1 #ifdef __cplusplus extern "C" { #endif /* -** Copyright (c) 2013-2017 The Khronos Group Inc. +** Copyright (c) 2013-2018 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -306,7 +306,7 @@ typedef void (APIENTRYP PFNGLGETTEXPARAMETERIVPROC) (GLenum target, GLenum pname typedef void (APIENTRYP PFNGLGETTEXLEVELPARAMETERFVPROC) (GLenum target, GLint level, GLenum pname, GLfloat *params); typedef void (APIENTRYP PFNGLGETTEXLEVELPARAMETERIVPROC) (GLenum target, GLint level, GLenum pname, GLint *params); typedef GLboolean (APIENTRYP PFNGLISENABLEDPROC) (GLenum cap); -typedef void (APIENTRYP PFNGLDEPTHRANGEPROC) (GLdouble near, GLdouble far); +typedef void (APIENTRYP PFNGLDEPTHRANGEPROC) (GLdouble n, GLdouble f); typedef void (APIENTRYP PFNGLVIEWPORTPROC) (GLint x, GLint y, GLsizei width, GLsizei height); #ifdef GL_GLEXT_PROTOTYPES GLAPI void APIENTRY glCullFace (GLenum mode); @@ -355,7 +355,7 @@ GLAPI void APIENTRY glGetTexParameteriv (GLenum target, GLenum pname, GLint *par GLAPI void APIENTRY glGetTexLevelParameterfv (GLenum target, GLint level, GLenum pname, GLfloat *params); GLAPI void APIENTRY glGetTexLevelParameteriv (GLenum target, GLint level, GLenum pname, GLint *params); GLAPI GLboolean APIENTRY glIsEnabled (GLenum cap); -GLAPI void APIENTRY glDepthRange (GLdouble near, GLdouble far); +GLAPI void APIENTRY glDepthRange (GLdouble n, GLdouble f); GLAPI void APIENTRY glViewport (GLint x, GLint y, GLsizei width, GLsizei height); #endif #endif /* GL_VERSION_1_0 */ @@ -613,9 +613,9 @@ GLAPI void APIENTRY glBlendEquation (GLenum mode); #ifndef GL_VERSION_1_5 #define GL_VERSION_1_5 1 -#include -typedef ptrdiff_t GLsizeiptr; -typedef ptrdiff_t GLintptr; +#include +typedef khronos_ssize_t GLsizeiptr; +typedef khronos_intptr_t GLintptr; #define GL_BUFFER_SIZE 0x8764 #define GL_BUFFER_USAGE 0x8765 #define GL_QUERY_COUNTER_BITS 0x8864 @@ -3958,6 +3958,22 @@ GLAPI void APIENTRY glMaxShaderCompilerThreadsKHR (GLuint count); #define GL_KHR_texture_compression_astc_sliced_3d 1 #endif /* GL_KHR_texture_compression_astc_sliced_3d */ +#ifndef GL_AMD_framebuffer_multisample_advanced +#define GL_AMD_framebuffer_multisample_advanced 1 +#define GL_RENDERBUFFER_STORAGE_SAMPLES_AMD 0x91B2 +#define GL_MAX_COLOR_FRAMEBUFFER_SAMPLES_AMD 0x91B3 +#define GL_MAX_COLOR_FRAMEBUFFER_STORAGE_SAMPLES_AMD 0x91B4 +#define GL_MAX_DEPTH_STENCIL_FRAMEBUFFER_SAMPLES_AMD 0x91B5 +#define GL_NUM_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B6 +#define GL_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B7 +typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC) (GLenum target, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC) (GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glRenderbufferStorageMultisampleAdvancedAMD (GLenum target, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height); +GLAPI void APIENTRY glNamedRenderbufferStorageMultisampleAdvancedAMD (GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height); +#endif +#endif /* GL_AMD_framebuffer_multisample_advanced */ + #ifndef GL_AMD_performance_monitor #define GL_AMD_performance_monitor 1 #define GL_COUNTER_TYPE_AMD 0x8BC0 @@ -4001,6 +4017,17 @@ GLAPI void APIENTRY glGetPerfMonitorCounterDataAMD (GLuint monitor, GLenum pname #define GL_RGB_RAW_422_APPLE 0x8A51 #endif /* GL_APPLE_rgb_422 */ +#ifndef GL_EXT_EGL_image_storage +#define GL_EXT_EGL_image_storage 1 +typedef void *GLeglImageOES; +typedef void (APIENTRYP PFNGLEGLIMAGETARGETTEXSTORAGEEXTPROC) (GLenum target, GLeglImageOES image, const GLint* attrib_list); +typedef void (APIENTRYP PFNGLEGLIMAGETARGETTEXTURESTORAGEEXTPROC) (GLuint texture, GLeglImageOES image, const GLint* attrib_list); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glEGLImageTargetTexStorageEXT (GLenum target, GLeglImageOES image, const GLint* attrib_list); +GLAPI void APIENTRY glEGLImageTargetTextureStorageEXT (GLuint texture, GLeglImageOES image, const GLint* attrib_list); +#endif +#endif /* GL_EXT_EGL_image_storage */ + #ifndef GL_EXT_debug_label #define GL_EXT_debug_label 1 #define GL_PROGRAM_PIPELINE_OBJECT_EXT 0x8A4F @@ -4598,6 +4625,19 @@ GLAPI GLuint APIENTRY glCreateShaderProgramEXT (GLenum type, const GLchar *strin #endif #endif /* GL_EXT_separate_shader_objects */ +#ifndef GL_EXT_shader_framebuffer_fetch +#define GL_EXT_shader_framebuffer_fetch 1 +#define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52 +#endif /* GL_EXT_shader_framebuffer_fetch */ + +#ifndef GL_EXT_shader_framebuffer_fetch_non_coherent +#define GL_EXT_shader_framebuffer_fetch_non_coherent 1 +typedef void (APIENTRYP PFNGLFRAMEBUFFERFETCHBARRIEREXTPROC) (void); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glFramebufferFetchBarrierEXT (void); +#endif +#endif /* GL_EXT_shader_framebuffer_fetch_non_coherent */ + #ifndef GL_EXT_shader_integer_mix #define GL_EXT_shader_integer_mix 1 #endif /* GL_EXT_shader_integer_mix */ @@ -4612,6 +4652,8 @@ GLAPI GLuint APIENTRY glCreateShaderProgramEXT (GLenum type, const GLchar *strin #ifndef GL_EXT_texture_filter_minmax #define GL_EXT_texture_filter_minmax 1 +#define GL_TEXTURE_REDUCTION_MODE_EXT 0x9366 +#define GL_WEIGHTED_AVERAGE_EXT 0x9367 #endif /* GL_EXT_texture_filter_minmax */ #ifndef GL_EXT_texture_sRGB_decode @@ -4635,6 +4677,11 @@ GLAPI void APIENTRY glWindowRectanglesEXT (GLenum mode, GLsizei count, const GLi #endif #endif /* GL_EXT_window_rectangles */ +#ifndef GL_INTEL_blackhole_render +#define GL_INTEL_blackhole_render 1 +#define GL_BLACKHOLE_RENDER_INTEL 0x83FC +#endif /* GL_INTEL_blackhole_render */ + #ifndef GL_INTEL_conservative_rasterization #define GL_INTEL_conservative_rasterization 1 #define GL_CONSERVATIVE_RASTERIZATION_INTEL 0x83FE @@ -4677,7 +4724,7 @@ typedef void (APIENTRYP PFNGLENDPERFQUERYINTELPROC) (GLuint queryHandle); typedef void (APIENTRYP PFNGLGETFIRSTPERFQUERYIDINTELPROC) (GLuint *queryId); typedef void (APIENTRYP PFNGLGETNEXTPERFQUERYIDINTELPROC) (GLuint queryId, GLuint *nextQueryId); typedef void (APIENTRYP PFNGLGETPERFCOUNTERINFOINTELPROC) (GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue); -typedef void (APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten); +typedef void (APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, GLuint flags, GLsizei dataSize, void *data, GLuint *bytesWritten); typedef void (APIENTRYP PFNGLGETPERFQUERYIDBYNAMEINTELPROC) (GLchar *queryName, GLuint *queryId); typedef void (APIENTRYP PFNGLGETPERFQUERYINFOINTELPROC) (GLuint queryId, GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, GLuint *noInstances, GLuint *capsMask); #ifdef GL_GLEXT_PROTOTYPES @@ -4688,7 +4735,7 @@ GLAPI void APIENTRY glEndPerfQueryINTEL (GLuint queryHandle); GLAPI void APIENTRY glGetFirstPerfQueryIdINTEL (GLuint *queryId); GLAPI void APIENTRY glGetNextPerfQueryIdINTEL (GLuint queryId, GLuint *nextQueryId); GLAPI void APIENTRY glGetPerfCounterInfoINTEL (GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue); -GLAPI void APIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten); +GLAPI void APIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint flags, GLsizei dataSize, void *data, GLuint *bytesWritten); GLAPI void APIENTRY glGetPerfQueryIdByNameINTEL (GLchar *queryName, GLuint *queryId); GLAPI void APIENTRY glGetPerfQueryInfoINTEL (GLuint queryId, GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, GLuint *noInstances, GLuint *capsMask); #endif @@ -4923,6 +4970,11 @@ GLAPI void APIENTRY glConservativeRasterParameterfNV (GLenum pname, GLfloat valu #endif #endif /* GL_NV_conservative_raster_dilate */ +#ifndef GL_NV_conservative_raster_pre_snap +#define GL_NV_conservative_raster_pre_snap 1 +#define GL_CONSERVATIVE_RASTER_MODE_PRE_SNAP_NV 0x9550 +#endif /* GL_NV_conservative_raster_pre_snap */ + #ifndef GL_NV_conservative_raster_pre_snap_triangles #define GL_NV_conservative_raster_pre_snap_triangles 1 #define GL_CONSERVATIVE_RASTER_MODE_NV 0x954D @@ -4934,6 +4986,10 @@ GLAPI void APIENTRY glConservativeRasterParameteriNV (GLenum pname, GLint param) #endif #endif /* GL_NV_conservative_raster_pre_snap_triangles */ +#ifndef GL_NV_conservative_raster_underestimation +#define GL_NV_conservative_raster_underestimation 1 +#endif /* GL_NV_conservative_raster_underestimation */ + #ifndef GL_NV_draw_vulkan_image #define GL_NV_draw_vulkan_image 1 typedef void (APIENTRY *GLVULKANPROCNV)(void); diff --git a/include/GL/glext.h b/include/GL/glext.h index 75fd1f61185..181df28d3bb 100644 --- a/include/GL/glext.h +++ b/include/GL/glext.h @@ -1,12 +1,12 @@ -#ifndef __glext_h_ -#define __glext_h_ 1 +#ifndef __gl_glext_h_ +#define __gl_glext_h_ 1 #ifdef __cplusplus extern "C" { #endif /* -** Copyright (c) 2013-2017 The Khronos Group Inc. +** Copyright (c) 2013-2018 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -51,7 +51,7 @@ extern "C" { #define GLAPI extern #endif -#define GL_GLEXT_VERSION 20171010 +#define GL_GLEXT_VERSION 20180725 /* Generated C header for: * API: gl @@ -464,9 +464,9 @@ GLAPI void APIENTRY glBlendEquation (GLenum mode); #ifndef GL_VERSION_1_5 #define GL_VERSION_1_5 1 -#include -typedef ptrdiff_t GLsizeiptr; -typedef ptrdiff_t GLintptr; +#include +typedef khronos_ssize_t GLsizeiptr; +typedef khronos_intptr_t GLintptr; #define GL_BUFFER_SIZE 0x8764 #define GL_BUFFER_USAGE 0x8765 #define GL_QUERY_COUNTER_BITS 0x8864 @@ -4718,6 +4718,7 @@ GLAPI void APIENTRY glVertexBlendARB (GLint count); #ifndef GL_ARB_vertex_buffer_object #define GL_ARB_vertex_buffer_object 1 +#include typedef ptrdiff_t GLsizeiptrARB; typedef ptrdiff_t GLintptrARB; #define GL_BUFFER_SIZE_ARB 0x8764 @@ -5445,6 +5446,22 @@ GLAPI void APIENTRY glBlendEquationSeparateIndexedAMD (GLuint buf, GLenum modeRG #endif #endif /* GL_AMD_draw_buffers_blend */ +#ifndef GL_AMD_framebuffer_multisample_advanced +#define GL_AMD_framebuffer_multisample_advanced 1 +#define GL_RENDERBUFFER_STORAGE_SAMPLES_AMD 0x91B2 +#define GL_MAX_COLOR_FRAMEBUFFER_SAMPLES_AMD 0x91B3 +#define GL_MAX_COLOR_FRAMEBUFFER_STORAGE_SAMPLES_AMD 0x91B4 +#define GL_MAX_DEPTH_STENCIL_FRAMEBUFFER_SAMPLES_AMD 0x91B5 +#define GL_NUM_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B6 +#define GL_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B7 +typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC) (GLenum target, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC) (GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glRenderbufferStorageMultisampleAdvancedAMD (GLenum target, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height); +GLAPI void APIENTRY glNamedRenderbufferStorageMultisampleAdvancedAMD (GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height); +#endif +#endif /* GL_AMD_framebuffer_multisample_advanced */ + #ifndef GL_AMD_framebuffer_sample_positions #define GL_AMD_framebuffer_sample_positions 1 #define GL_SUBSAMPLE_DISTANCE_AMD 0x883F @@ -5709,6 +5726,10 @@ GLAPI void APIENTRY glSetMultisamplefvAMD (GLenum pname, GLuint index, const GLf #define GL_AMD_shader_explicit_vertex_parameter 1 #endif /* GL_AMD_shader_explicit_vertex_parameter */ +#ifndef GL_AMD_shader_gpu_shader_half_float_fetch +#define GL_AMD_shader_gpu_shader_half_float_fetch 1 +#endif /* GL_AMD_shader_gpu_shader_half_float_fetch */ + #ifndef GL_AMD_shader_image_load_store_lod #define GL_AMD_shader_image_load_store_lod 1 #endif /* GL_AMD_shader_image_load_store_lod */ @@ -6456,6 +6477,17 @@ GLAPI void APIENTRY glVertexBlendEnvfATI (GLenum pname, GLfloat param); #define GL_422_REV_AVERAGE_EXT 0x80CF #endif /* GL_EXT_422_pixels */ +#ifndef GL_EXT_EGL_image_storage +#define GL_EXT_EGL_image_storage 1 +typedef void *GLeglImageOES; +typedef void (APIENTRYP PFNGLEGLIMAGETARGETTEXSTORAGEEXTPROC) (GLenum target, GLeglImageOES image, const GLint* attrib_list); +typedef void (APIENTRYP PFNGLEGLIMAGETARGETTEXTURESTORAGEEXTPROC) (GLuint texture, GLeglImageOES image, const GLint* attrib_list); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glEGLImageTargetTexStorageEXT (GLenum target, GLeglImageOES image, const GLint* attrib_list); +GLAPI void APIENTRY glEGLImageTargetTextureStorageEXT (GLuint texture, GLeglImageOES image, const GLint* attrib_list); +#endif +#endif /* GL_EXT_EGL_image_storage */ + #ifndef GL_EXT_abgr #define GL_EXT_abgr 1 #define GL_ABGR_EXT 0x8000 @@ -7994,6 +8026,8 @@ GLAPI void APIENTRY glSecondaryColorPointerEXT (GLint size, GLenum type, GLsizei #define GL_LAYOUT_SHADER_READ_ONLY_EXT 0x9591 #define GL_LAYOUT_TRANSFER_SRC_EXT 0x9592 #define GL_LAYOUT_TRANSFER_DST_EXT 0x9593 +#define GL_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_EXT 0x9530 +#define GL_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_EXT 0x9531 typedef void (APIENTRYP PFNGLGENSEMAPHORESEXTPROC) (GLsizei n, GLuint *semaphores); typedef void (APIENTRYP PFNGLDELETESEMAPHORESEXTPROC) (GLsizei n, const GLuint *semaphores); typedef GLboolean (APIENTRYP PFNGLISSEMAPHOREEXTPROC) (GLuint semaphore); @@ -8052,6 +8086,19 @@ GLAPI GLuint APIENTRY glCreateShaderProgramEXT (GLenum type, const GLchar *strin #define GL_SEPARATE_SPECULAR_COLOR_EXT 0x81FA #endif /* GL_EXT_separate_specular_color */ +#ifndef GL_EXT_shader_framebuffer_fetch +#define GL_EXT_shader_framebuffer_fetch 1 +#define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52 +#endif /* GL_EXT_shader_framebuffer_fetch */ + +#ifndef GL_EXT_shader_framebuffer_fetch_non_coherent +#define GL_EXT_shader_framebuffer_fetch_non_coherent 1 +typedef void (APIENTRYP PFNGLFRAMEBUFFERFETCHBARRIEREXTPROC) (void); +#ifdef GL_GLEXT_PROTOTYPES +GLAPI void APIENTRY glFramebufferFetchBarrierEXT (void); +#endif +#endif /* GL_EXT_shader_framebuffer_fetch_non_coherent */ + #ifndef GL_EXT_shader_image_load_formatted #define GL_EXT_shader_image_load_formatted 1 #endif /* GL_EXT_shader_image_load_formatted */ @@ -8352,6 +8399,8 @@ GLAPI void APIENTRY glTexBufferEXT (GLenum target, GLenum internalformat, GLuint #ifndef GL_EXT_texture_filter_minmax #define GL_EXT_texture_filter_minmax 1 +#define GL_TEXTURE_REDUCTION_MODE_EXT 0x9366 +#define GL_WEIGHTED_AVERAGE_EXT 0x9367 #endif /* GL_EXT_texture_filter_minmax */ #ifndef GL_EXT_texture_integer @@ -9099,6 +9148,11 @@ GLAPI void APIENTRY glBlendFuncSeparateINGR (GLenum sfactorRGB, GLenum dfactorRG #define GL_INTERLACE_READ_INGR 0x8568 #endif /* GL_INGR_interlace_read */ +#ifndef GL_INTEL_blackhole_render +#define GL_INTEL_blackhole_render 1 +#define GL_BLACKHOLE_RENDER_INTEL 0x83FC +#endif /* GL_INTEL_blackhole_render */ + #ifndef GL_INTEL_conservative_rasterization #define GL_INTEL_conservative_rasterization 1 #define GL_CONSERVATIVE_RASTERIZATION_INTEL 0x83FE @@ -9180,7 +9234,7 @@ typedef void (APIENTRYP PFNGLENDPERFQUERYINTELPROC) (GLuint queryHandle); typedef void (APIENTRYP PFNGLGETFIRSTPERFQUERYIDINTELPROC) (GLuint *queryId); typedef void (APIENTRYP PFNGLGETNEXTPERFQUERYIDINTELPROC) (GLuint queryId, GLuint *nextQueryId); typedef void (APIENTRYP PFNGLGETPERFCOUNTERINFOINTELPROC) (GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue); -typedef void (APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten); +typedef void (APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, GLuint flags, GLsizei dataSize, void *data, GLuint *bytesWritten); typedef void (APIENTRYP PFNGLGETPERFQUERYIDBYNAMEINTELPROC) (GLchar *queryName, GLuint *queryId); typedef void (APIENTRYP PFNGLGETPERFQUERYINFOINTELPROC) (GLuint queryId, GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, GLuint *noInstances, GLuint *capsMask); #ifdef GL_GLEXT_PROTOTYPES @@ -9191,7 +9245,7 @@ GLAPI void APIENTRY glEndPerfQueryINTEL (GLuint queryHandle); GLAPI void APIENTRY glGetFirstPerfQueryIdINTEL (GLuint *queryId); GLAPI void APIENTRY glGetNextPerfQueryIdINTEL (GLuint queryId, GLuint *nextQueryId); GLAPI void APIENTRY glGetPerfCounterInfoINTEL (GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue); -GLAPI void APIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten); +GLAPI void APIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint flags, GLsizei dataSize, void *data, GLuint *bytesWritten); GLAPI void APIENTRY glGetPerfQueryIdByNameINTEL (GLchar *queryName, GLuint *queryId); GLAPI void APIENTRY glGetPerfQueryInfoINTEL (GLuint queryId, GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, GLuint *noInstances, GLuint *capsMask); #endif @@ -9583,6 +9637,11 @@ GLAPI void APIENTRY glConservativeRasterParameterfNV (GLenum pname, GLfloat valu #endif #endif /* GL_NV_conservative_raster_dilate */ +#ifndef GL_NV_conservative_raster_pre_snap +#define GL_NV_conservative_raster_pre_snap 1 +#define GL_CONSERVATIVE_RASTER_MODE_PRE_SNAP_NV 0x9550 +#endif /* GL_NV_conservative_raster_pre_snap */ + #ifndef GL_NV_conservative_raster_pre_snap_triangles #define GL_NV_conservative_raster_pre_snap_triangles 1 #define GL_CONSERVATIVE_RASTER_MODE_NV 0x954D @@ -9594,6 +9653,10 @@ GLAPI void APIENTRY glConservativeRasterParameteriNV (GLenum pname, GLint param) #endif #endif /* GL_NV_conservative_raster_pre_snap_triangles */ +#ifndef GL_NV_conservative_raster_underestimation +#define GL_NV_conservative_raster_underestimation 1 +#endif /* GL_NV_conservative_raster_underestimation */ + #ifndef GL_NV_copy_depth_to_color #define GL_NV_copy_depth_to_color 1 #define GL_DEPTH_STENCIL_TO_RGBA_NV 0x886E @@ -9902,7 +9965,7 @@ GLAPI void APIENTRY glFramebufferTextureFaceEXT (GLenum target, GLenum attachmen #define GL_PER_GPU_STORAGE_NV 0x9548 #define GL_MULTICAST_PROGRAMMABLE_SAMPLE_LOCATION_NV 0x9549 typedef void (APIENTRYP PFNGLRENDERGPUMASKNVPROC) (GLbitfield mask); -typedef void (APIENTRYP PFNGLMULTICASTBUFFERSUBDATANVPROC) (GLbitfield gpuMask, GLuint buffer, GLintptr offset, GLsizeiptr size, const GLvoid *data); +typedef void (APIENTRYP PFNGLMULTICASTBUFFERSUBDATANVPROC) (GLbitfield gpuMask, GLuint buffer, GLintptr offset, GLsizeiptr size, const void *data); typedef void (APIENTRYP PFNGLMULTICASTCOPYBUFFERSUBDATANVPROC) (GLuint readGpu, GLbitfield writeGpuMask, GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); typedef void (APIENTRYP PFNGLMULTICASTCOPYIMAGESUBDATANVPROC) (GLuint srcGpu, GLbitfield dstGpuMask, GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth); typedef void (APIENTRYP PFNGLMULTICASTBLITFRAMEBUFFERNVPROC) (GLuint srcGpu, GLuint dstGpu, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); @@ -9915,7 +9978,7 @@ typedef void (APIENTRYP PFNGLMULTICASTGETQUERYOBJECTI64VNVPROC) (GLuint gpu, GLu typedef void (APIENTRYP PFNGLMULTICASTGETQUERYOBJECTUI64VNVPROC) (GLuint gpu, GLuint id, GLenum pname, GLuint64 *params); #ifdef GL_GLEXT_PROTOTYPES GLAPI void APIENTRY glRenderGpuMaskNV (GLbitfield mask); -GLAPI void APIENTRY glMulticastBufferSubDataNV (GLbitfield gpuMask, GLuint buffer, GLintptr offset, GLsizeiptr size, const GLvoid *data); +GLAPI void APIENTRY glMulticastBufferSubDataNV (GLbitfield gpuMask, GLuint buffer, GLintptr offset, GLsizeiptr size, const void *data); GLAPI void APIENTRY glMulticastCopyBufferSubDataNV (GLuint readGpu, GLbitfield writeGpuMask, GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); GLAPI void APIENTRY glMulticastCopyImageSubDataNV (GLuint srcGpu, GLbitfield dstGpuMask, GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth); GLAPI void APIENTRY glMulticastBlitFramebufferNV (GLuint srcGpu, GLuint dstGpu, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); diff --git a/include/GL/glxext.h b/include/GL/glxext.h index 0f60a380c21..4c984ef4b89 100644 --- a/include/GL/glxext.h +++ b/include/GL/glxext.h @@ -1,12 +1,12 @@ -#ifndef __glxext_h_ -#define __glxext_h_ 1 +#ifndef __glx_glxext_h_ +#define __glx_glxext_h_ 1 #ifdef __cplusplus extern "C" { #endif /* -** Copyright (c) 2013-2017 The Khronos Group Inc. +** Copyright (c) 2013-2018 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -34,7 +34,7 @@ extern "C" { ** https://github.com/KhronosGroup/OpenGL-Registry */ -#define GLX_GLXEXT_VERSION 20170728 +#define GLX_GLXEXT_VERSION 20180525 /* Generated C header for: * API: glx @@ -325,6 +325,10 @@ void glXFreeContextEXT (Display *dpy, GLXContext context); #define GLX_VENDOR_NAMES_EXT 0x20F6 #endif /* GLX_EXT_libglvnd */ +#ifndef GLX_EXT_no_config_context +#define GLX_EXT_no_config_context 1 +#endif /* GLX_EXT_no_config_context */ + #ifndef GLX_EXT_stereo_tree #define GLX_EXT_stereo_tree 1 typedef struct { @@ -503,6 +507,16 @@ Bool glXSet3DfxModeMESA (int mode); #endif #endif /* GLX_MESA_set_3dfx_mode */ +#ifndef GLX_MESA_swap_control +#define GLX_MESA_swap_control 1 +typedef int ( *PFNGLXGETSWAPINTERVALMESAPROC) (void); +typedef int ( *PFNGLXSWAPINTERVALMESAPROC) (unsigned int interval); +#ifdef GLX_GLXEXT_PROTOTYPES +int glXGetSwapIntervalMESA (void); +int glXSwapIntervalMESA (unsigned int interval); +#endif +#endif /* GLX_MESA_swap_control */ + #ifndef GLX_NV_copy_buffer #define GLX_NV_copy_buffer 1 typedef void ( *PFNGLXCOPYBUFFERSUBDATANVPROC) (Display *dpy, GLXContext readCtx, GLXContext writeCtx, GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index c32cdd3767a..08d63184d1d 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -1333,6 +1333,10 @@ struct __DRIdri2ExtensionRec { #define __DRI_IMAGE_FOURCC_YVU422 0x36315659 #define __DRI_IMAGE_FOURCC_YVU444 0x34325659 +#define __DRI_IMAGE_FOURCC_P010 0x30313050 +#define __DRI_IMAGE_FOURCC_P012 0x32313050 +#define __DRI_IMAGE_FOURCC_P016 0x36313050 + /** * Queryable on images created by createImageFromNames. * diff --git a/include/GLES2/gl2.h b/include/GLES2/gl2.h index 8ba907c892c..b4051e5a7c5 100644 --- a/include/GLES2/gl2.h +++ b/include/GLES2/gl2.h @@ -1,12 +1,12 @@ -#ifndef __gl2_h_ -#define __gl2_h_ 1 +#ifndef __gles2_gl2_h_ +#define __gles2_gl2_h_ 1 #ifdef __cplusplus extern "C" { #endif /* -** Copyright (c) 2013-2017 The Khronos Group Inc. +** Copyright (c) 2013-2018 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -44,7 +44,7 @@ extern "C" { #define GL_GLES_PROTOTYPES 1 #endif -/* Generated on date 20170606 */ +/* Generated on date 20180725 */ /* Generated C header for: * API: gles2 diff --git a/include/GLES2/gl2ext.h b/include/GLES2/gl2ext.h index 0a93bfb8652..559173dee45 100644 --- a/include/GLES2/gl2ext.h +++ b/include/GLES2/gl2ext.h @@ -1,12 +1,12 @@ -#ifndef __gl2ext_h_ -#define __gl2ext_h_ 1 +#ifndef __gles2_gl2ext_h_ +#define __gles2_gl2ext_h_ 1 #ifdef __cplusplus extern "C" { #endif /* -** Copyright (c) 2013-2017 The Khronos Group Inc. +** Copyright (c) 2013-2018 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -38,7 +38,7 @@ extern "C" { #define GL_APIENTRYP GL_APIENTRY* #endif -/* Generated on date 20170804 */ +/* Generated on date 20180725 */ /* Generated C header for: * API: gles2 @@ -159,6 +159,16 @@ GL_APICALL void GL_APIENTRY glGetPointervKHR (GLenum pname, void **params); #define GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR 0x00000008 #endif /* GL_KHR_no_error */ +#ifndef GL_KHR_parallel_shader_compile +#define GL_KHR_parallel_shader_compile 1 +#define GL_MAX_SHADER_COMPILER_THREADS_KHR 0x91B0 +#define GL_COMPLETION_STATUS_KHR 0x91B1 +typedef void (GL_APIENTRYP PFNGLMAXSHADERCOMPILERTHREADSKHRPROC) (GLuint count); +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glMaxShaderCompilerThreadsKHR (GLuint count); +#endif +#endif /* GL_KHR_parallel_shader_compile */ + #ifndef GL_KHR_robust_buffer_access_behavior #define GL_KHR_robust_buffer_access_behavior 1 #endif /* GL_KHR_robust_buffer_access_behavior */ @@ -791,6 +801,22 @@ GL_APICALL void GL_APIENTRY glGetFloati_vOES (GLenum target, GLuint index, GLflo #define GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD 0x87EE #endif /* GL_AMD_compressed_ATC_texture */ +#ifndef GL_AMD_framebuffer_multisample_advanced +#define GL_AMD_framebuffer_multisample_advanced 1 +#define GL_RENDERBUFFER_STORAGE_SAMPLES_AMD 0x91B2 +#define GL_MAX_COLOR_FRAMEBUFFER_SAMPLES_AMD 0x91B3 +#define GL_MAX_COLOR_FRAMEBUFFER_STORAGE_SAMPLES_AMD 0x91B4 +#define GL_MAX_DEPTH_STENCIL_FRAMEBUFFER_SAMPLES_AMD 0x91B5 +#define GL_NUM_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B6 +#define GL_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B7 +typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC) (GLenum target, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC) (GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height); +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisampleAdvancedAMD (GLenum target, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glNamedRenderbufferStorageMultisampleAdvancedAMD (GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height); +#endif +#endif /* GL_AMD_framebuffer_multisample_advanced */ + #ifndef GL_AMD_performance_monitor #define GL_AMD_performance_monitor 1 #define GL_COUNTER_TYPE_AMD 0x8BC0 @@ -1055,6 +1081,16 @@ GL_APICALL void GL_APIENTRY glGetSyncivAPPLE (GLsync sync, GLenum pname, GLsizei #define GL_EXT_EGL_image_array 1 #endif /* GL_EXT_EGL_image_array */ +#ifndef GL_EXT_EGL_image_storage +#define GL_EXT_EGL_image_storage 1 +typedef void (GL_APIENTRYP PFNGLEGLIMAGETARGETTEXSTORAGEEXTPROC) (GLenum target, GLeglImageOES image, const GLint* attrib_list); +typedef void (GL_APIENTRYP PFNGLEGLIMAGETARGETTEXTURESTORAGEEXTPROC) (GLuint texture, GLeglImageOES image, const GLint* attrib_list); +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glEGLImageTargetTexStorageEXT (GLenum target, GLeglImageOES image, const GLint* attrib_list); +GL_APICALL void GL_APIENTRY glEGLImageTargetTextureStorageEXT (GLuint texture, GLeglImageOES image, const GLint* attrib_list); +#endif +#endif /* GL_EXT_EGL_image_storage */ + #ifndef GL_EXT_YUV_target #define GL_EXT_YUV_target 1 #define GL_SAMPLER_EXTERNAL_2D_Y2Y_EXT 0x8BE7 @@ -1126,6 +1162,20 @@ GL_APICALL void GL_APIENTRY glClearTexSubImageEXT (GLuint texture, GLint level, #endif #endif /* GL_EXT_clear_texture */ +#ifndef GL_EXT_clip_control +#define GL_EXT_clip_control 1 +#define GL_LOWER_LEFT_EXT 0x8CA1 +#define GL_UPPER_LEFT_EXT 0x8CA2 +#define GL_NEGATIVE_ONE_TO_ONE_EXT 0x935E +#define GL_ZERO_TO_ONE_EXT 0x935F +#define GL_CLIP_ORIGIN_EXT 0x935C +#define GL_CLIP_DEPTH_MODE_EXT 0x935D +typedef void (GL_APIENTRYP PFNGLCLIPCONTROLEXTPROC) (GLenum origin, GLenum depth); +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glClipControlEXT (GLenum origin, GLenum depth); +#endif +#endif /* GL_EXT_clip_control */ + #ifndef GL_EXT_clip_cull_distance #define GL_EXT_clip_cull_distance 1 #define GL_MAX_CLIP_DISTANCES_EXT 0x0D32 @@ -1680,6 +1730,8 @@ GL_APICALL void GL_APIENTRY glGetnUniformivEXT (GLuint program, GLint location, #define GL_LAYOUT_SHADER_READ_ONLY_EXT 0x9591 #define GL_LAYOUT_TRANSFER_SRC_EXT 0x9592 #define GL_LAYOUT_TRANSFER_DST_EXT 0x9593 +#define GL_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_EXT 0x9530 +#define GL_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_EXT 0x9531 typedef void (GL_APIENTRYP PFNGLGENSEMAPHORESEXTPROC) (GLsizei n, GLuint *semaphores); typedef void (GL_APIENTRYP PFNGLDELETESEMAPHORESEXTPROC) (GLsizei n, const GLuint *semaphores); typedef GLboolean (GL_APIENTRYP PFNGLISSEMAPHOREEXTPROC) (GLuint semaphore); @@ -1823,6 +1875,14 @@ GL_APICALL void GL_APIENTRY glProgramUniformMatrix4x3fvEXT (GLuint program, GLin #define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52 #endif /* GL_EXT_shader_framebuffer_fetch */ +#ifndef GL_EXT_shader_framebuffer_fetch_non_coherent +#define GL_EXT_shader_framebuffer_fetch_non_coherent 1 +typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERFETCHBARRIEREXTPROC) (void); +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glFramebufferFetchBarrierEXT (void); +#endif +#endif /* GL_EXT_shader_framebuffer_fetch_non_coherent */ + #ifndef GL_EXT_shader_group_vote #define GL_EXT_shader_group_vote 1 #endif /* GL_EXT_shader_group_vote */ @@ -2067,12 +2127,24 @@ GL_APICALL void GL_APIENTRY glTexBufferRangeEXT (GLenum target, GLenum internalf #ifndef GL_EXT_texture_filter_minmax #define GL_EXT_texture_filter_minmax 1 +#define GL_TEXTURE_REDUCTION_MODE_EXT 0x9366 +#define GL_WEIGHTED_AVERAGE_EXT 0x9367 #endif /* GL_EXT_texture_filter_minmax */ #ifndef GL_EXT_texture_format_BGRA8888 #define GL_EXT_texture_format_BGRA8888 1 #endif /* GL_EXT_texture_format_BGRA8888 */ +#ifndef GL_EXT_texture_format_sRGB_override +#define GL_EXT_texture_format_sRGB_override 1 +#define GL_TEXTURE_FORMAT_SRGB_OVERRIDE_EXT 0x8FBF +#endif /* GL_EXT_texture_format_sRGB_override */ + +#ifndef GL_EXT_texture_mirror_clamp_to_edge +#define GL_EXT_texture_mirror_clamp_to_edge 1 +#define GL_MIRROR_CLAMP_TO_EDGE_EXT 0x8743 +#endif /* GL_EXT_texture_mirror_clamp_to_edge */ + #ifndef GL_EXT_texture_norm16 #define GL_EXT_texture_norm16 1 #define GL_R16_EXT 0x822A @@ -2275,6 +2347,11 @@ GL_APICALL void GL_APIENTRY glFramebufferTexture2DMultisampleIMG (GLenum target, #define GL_CUBIC_MIPMAP_LINEAR_IMG 0x913B #endif /* GL_IMG_texture_filter_cubic */ +#ifndef GL_INTEL_blackhole_render +#define GL_INTEL_blackhole_render 1 +#define GL_BLACKHOLE_RENDER_INTEL 0x83FC +#endif /* GL_INTEL_blackhole_render */ + #ifndef GL_INTEL_conservative_rasterization #define GL_INTEL_conservative_rasterization 1 #define GL_CONSERVATIVE_RASTERIZATION_INTEL 0x83FE @@ -2317,7 +2394,7 @@ typedef void (GL_APIENTRYP PFNGLENDPERFQUERYINTELPROC) (GLuint queryHandle); typedef void (GL_APIENTRYP PFNGLGETFIRSTPERFQUERYIDINTELPROC) (GLuint *queryId); typedef void (GL_APIENTRYP PFNGLGETNEXTPERFQUERYIDINTELPROC) (GLuint queryId, GLuint *nextQueryId); typedef void (GL_APIENTRYP PFNGLGETPERFCOUNTERINFOINTELPROC) (GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue); -typedef void (GL_APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten); +typedef void (GL_APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC) (GLuint queryHandle, GLuint flags, GLsizei dataSize, void *data, GLuint *bytesWritten); typedef void (GL_APIENTRYP PFNGLGETPERFQUERYIDBYNAMEINTELPROC) (GLchar *queryName, GLuint *queryId); typedef void (GL_APIENTRYP PFNGLGETPERFQUERYINFOINTELPROC) (GLuint queryId, GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, GLuint *noInstances, GLuint *capsMask); #ifdef GL_GLEXT_PROTOTYPES @@ -2328,7 +2405,7 @@ GL_APICALL void GL_APIENTRY glEndPerfQueryINTEL (GLuint queryHandle); GL_APICALL void GL_APIENTRY glGetFirstPerfQueryIdINTEL (GLuint *queryId); GL_APICALL void GL_APIENTRY glGetNextPerfQueryIdINTEL (GLuint queryId, GLuint *nextQueryId); GL_APICALL void GL_APIENTRY glGetPerfCounterInfoINTEL (GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar *counterName, GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum, GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue); -GL_APICALL void GL_APIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint flags, GLsizei dataSize, GLvoid *data, GLuint *bytesWritten); +GL_APICALL void GL_APIENTRY glGetPerfQueryDataINTEL (GLuint queryHandle, GLuint flags, GLsizei dataSize, void *data, GLuint *bytesWritten); GL_APICALL void GL_APIENTRY glGetPerfQueryIdByNameINTEL (GLchar *queryName, GLuint *queryId); GL_APICALL void GL_APIENTRY glGetPerfQueryInfoINTEL (GLuint queryId, GLuint queryNameLength, GLchar *queryName, GLuint *dataSize, GLuint *noCounters, GLuint *noInstances, GLuint *capsMask); #endif @@ -2454,6 +2531,17 @@ GL_APICALL void GL_APIENTRY glBlendBarrierNV (void); #define GL_FACTOR_MAX_AMD 0x901D #endif /* GL_NV_blend_minmax_factor */ +#ifndef GL_NV_clip_space_w_scaling +#define GL_NV_clip_space_w_scaling 1 +#define GL_VIEWPORT_POSITION_W_SCALE_NV 0x937C +#define GL_VIEWPORT_POSITION_W_SCALE_X_COEFF_NV 0x937D +#define GL_VIEWPORT_POSITION_W_SCALE_Y_COEFF_NV 0x937E +typedef void (GL_APIENTRYP PFNGLVIEWPORTPOSITIONWSCALENVPROC) (GLuint index, GLfloat xcoeff, GLfloat ycoeff); +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glViewportPositionWScaleNV (GLuint index, GLfloat xcoeff, GLfloat ycoeff); +#endif +#endif /* GL_NV_clip_space_w_scaling */ + #ifndef GL_NV_conditional_render #define GL_NV_conditional_render 1 #define GL_QUERY_WAIT_NV 0x8E13 @@ -2480,6 +2568,11 @@ GL_APICALL void GL_APIENTRY glSubpixelPrecisionBiasNV (GLuint xbits, GLuint ybit #endif #endif /* GL_NV_conservative_raster */ +#ifndef GL_NV_conservative_raster_pre_snap +#define GL_NV_conservative_raster_pre_snap 1 +#define GL_CONSERVATIVE_RASTER_MODE_PRE_SNAP_NV 0x9550 +#endif /* GL_NV_conservative_raster_pre_snap */ + #ifndef GL_NV_conservative_raster_pre_snap_triangles #define GL_NV_conservative_raster_pre_snap_triangles 1 #define GL_CONSERVATIVE_RASTER_MODE_NV 0x954D @@ -2851,6 +2944,7 @@ GL_APICALL void GL_APIENTRY glUniformMatrix4x3fvNV (GLint location, GLsizei coun #ifndef GL_NV_path_rendering #define GL_NV_path_rendering 1 +typedef double GLdouble; #define GL_PATH_FORMAT_SVG_NV 0x9070 #define GL_PATH_FORMAT_PS_NV 0x9071 #define GL_STANDARD_FONT_NAME_NV 0x9072 @@ -3061,6 +3155,25 @@ typedef GLenum (GL_APIENTRYP PFNGLPATHGLYPHINDEXARRAYNVPROC) (GLuint firstPathNa typedef GLenum (GL_APIENTRYP PFNGLPATHMEMORYGLYPHINDEXARRAYNVPROC) (GLuint firstPathName, GLenum fontTarget, GLsizeiptr fontSize, const void *fontData, GLsizei faceIndex, GLuint firstGlyphIndex, GLsizei numGlyphs, GLuint pathParameterTemplate, GLfloat emScale); typedef void (GL_APIENTRYP PFNGLPROGRAMPATHFRAGMENTINPUTGENNVPROC) (GLuint program, GLint location, GLenum genMode, GLint components, const GLfloat *coeffs); typedef void (GL_APIENTRYP PFNGLGETPROGRAMRESOURCEFVNVPROC) (GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLfloat *params); +typedef void (GL_APIENTRYP PFNGLMATRIXFRUSTUMEXTPROC) (GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar); +typedef void (GL_APIENTRYP PFNGLMATRIXLOADIDENTITYEXTPROC) (GLenum mode); +typedef void (GL_APIENTRYP PFNGLMATRIXLOADTRANSPOSEFEXTPROC) (GLenum mode, const GLfloat *m); +typedef void (GL_APIENTRYP PFNGLMATRIXLOADTRANSPOSEDEXTPROC) (GLenum mode, const GLdouble *m); +typedef void (GL_APIENTRYP PFNGLMATRIXLOADFEXTPROC) (GLenum mode, const GLfloat *m); +typedef void (GL_APIENTRYP PFNGLMATRIXLOADDEXTPROC) (GLenum mode, const GLdouble *m); +typedef void (GL_APIENTRYP PFNGLMATRIXMULTTRANSPOSEFEXTPROC) (GLenum mode, const GLfloat *m); +typedef void (GL_APIENTRYP PFNGLMATRIXMULTTRANSPOSEDEXTPROC) (GLenum mode, const GLdouble *m); +typedef void (GL_APIENTRYP PFNGLMATRIXMULTFEXTPROC) (GLenum mode, const GLfloat *m); +typedef void (GL_APIENTRYP PFNGLMATRIXMULTDEXTPROC) (GLenum mode, const GLdouble *m); +typedef void (GL_APIENTRYP PFNGLMATRIXORTHOEXTPROC) (GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar); +typedef void (GL_APIENTRYP PFNGLMATRIXPOPEXTPROC) (GLenum mode); +typedef void (GL_APIENTRYP PFNGLMATRIXPUSHEXTPROC) (GLenum mode); +typedef void (GL_APIENTRYP PFNGLMATRIXROTATEFEXTPROC) (GLenum mode, GLfloat angle, GLfloat x, GLfloat y, GLfloat z); +typedef void (GL_APIENTRYP PFNGLMATRIXROTATEDEXTPROC) (GLenum mode, GLdouble angle, GLdouble x, GLdouble y, GLdouble z); +typedef void (GL_APIENTRYP PFNGLMATRIXSCALEFEXTPROC) (GLenum mode, GLfloat x, GLfloat y, GLfloat z); +typedef void (GL_APIENTRYP PFNGLMATRIXSCALEDEXTPROC) (GLenum mode, GLdouble x, GLdouble y, GLdouble z); +typedef void (GL_APIENTRYP PFNGLMATRIXTRANSLATEFEXTPROC) (GLenum mode, GLfloat x, GLfloat y, GLfloat z); +typedef void (GL_APIENTRYP PFNGLMATRIXTRANSLATEDEXTPROC) (GLenum mode, GLdouble x, GLdouble y, GLdouble z); #ifdef GL_GLEXT_PROTOTYPES GL_APICALL GLuint GL_APIENTRY glGenPathsNV (GLsizei range); GL_APICALL void GL_APIENTRY glDeletePathsNV (GLuint path, GLsizei range); @@ -3119,6 +3232,25 @@ GL_APICALL GLenum GL_APIENTRY glPathGlyphIndexArrayNV (GLuint firstPathName, GLe GL_APICALL GLenum GL_APIENTRY glPathMemoryGlyphIndexArrayNV (GLuint firstPathName, GLenum fontTarget, GLsizeiptr fontSize, const void *fontData, GLsizei faceIndex, GLuint firstGlyphIndex, GLsizei numGlyphs, GLuint pathParameterTemplate, GLfloat emScale); GL_APICALL void GL_APIENTRY glProgramPathFragmentInputGenNV (GLuint program, GLint location, GLenum genMode, GLint components, const GLfloat *coeffs); GL_APICALL void GL_APIENTRY glGetProgramResourcefvNV (GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLfloat *params); +GL_APICALL void GL_APIENTRY glMatrixFrustumEXT (GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar); +GL_APICALL void GL_APIENTRY glMatrixLoadIdentityEXT (GLenum mode); +GL_APICALL void GL_APIENTRY glMatrixLoadTransposefEXT (GLenum mode, const GLfloat *m); +GL_APICALL void GL_APIENTRY glMatrixLoadTransposedEXT (GLenum mode, const GLdouble *m); +GL_APICALL void GL_APIENTRY glMatrixLoadfEXT (GLenum mode, const GLfloat *m); +GL_APICALL void GL_APIENTRY glMatrixLoaddEXT (GLenum mode, const GLdouble *m); +GL_APICALL void GL_APIENTRY glMatrixMultTransposefEXT (GLenum mode, const GLfloat *m); +GL_APICALL void GL_APIENTRY glMatrixMultTransposedEXT (GLenum mode, const GLdouble *m); +GL_APICALL void GL_APIENTRY glMatrixMultfEXT (GLenum mode, const GLfloat *m); +GL_APICALL void GL_APIENTRY glMatrixMultdEXT (GLenum mode, const GLdouble *m); +GL_APICALL void GL_APIENTRY glMatrixOrthoEXT (GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar); +GL_APICALL void GL_APIENTRY glMatrixPopEXT (GLenum mode); +GL_APICALL void GL_APIENTRY glMatrixPushEXT (GLenum mode); +GL_APICALL void GL_APIENTRY glMatrixRotatefEXT (GLenum mode, GLfloat angle, GLfloat x, GLfloat y, GLfloat z); +GL_APICALL void GL_APIENTRY glMatrixRotatedEXT (GLenum mode, GLdouble angle, GLdouble x, GLdouble y, GLdouble z); +GL_APICALL void GL_APIENTRY glMatrixScalefEXT (GLenum mode, GLfloat x, GLfloat y, GLfloat z); +GL_APICALL void GL_APIENTRY glMatrixScaledEXT (GLenum mode, GLdouble x, GLdouble y, GLdouble z); +GL_APICALL void GL_APIENTRY glMatrixTranslatefEXT (GLenum mode, GLfloat x, GLfloat y, GLfloat z); +GL_APICALL void GL_APIENTRY glMatrixTranslatedEXT (GLenum mode, GLdouble x, GLdouble y, GLdouble z); #endif #endif /* GL_NV_path_rendering */ @@ -3230,6 +3362,10 @@ GL_APICALL void GL_APIENTRY glResolveDepthValuesNV (void); #define GL_SAMPLER_CUBE_SHADOW_NV 0x8DC5 #endif /* GL_NV_shadow_samplers_cube */ +#ifndef GL_NV_stereo_view_rendering +#define GL_NV_stereo_view_rendering 1 +#endif /* GL_NV_stereo_view_rendering */ + #ifndef GL_NV_texture_border_clamp #define GL_NV_texture_border_clamp 1 #define GL_TEXTURE_BORDER_COLOR_NV 0x1004 @@ -3432,6 +3568,19 @@ GL_APICALL void GL_APIENTRY glFramebufferFetchBarrierQCOM (void); #endif #endif /* GL_QCOM_shader_framebuffer_fetch_noncoherent */ +#ifndef GL_QCOM_texture_foveated +#define GL_QCOM_texture_foveated 1 +#define GL_TEXTURE_FOVEATED_FEATURE_BITS_QCOM 0x8BFB +#define GL_TEXTURE_FOVEATED_MIN_PIXEL_DENSITY_QCOM 0x8BFC +#define GL_TEXTURE_FOVEATED_FEATURE_QUERY_QCOM 0x8BFD +#define GL_TEXTURE_FOVEATED_NUM_FOCAL_POINTS_QUERY_QCOM 0x8BFE +#define GL_FRAMEBUFFER_INCOMPLETE_FOVEATION_QCOM 0x8BFF +typedef void (GL_APIENTRYP PFNGLTEXTUREFOVEATIONPARAMETERSQCOMPROC) (GLuint texture, GLuint layer, GLuint focalPoint, GLfloat focalX, GLfloat focalY, GLfloat gainX, GLfloat gainY, GLfloat foveaArea); +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glTextureFoveationParametersQCOM (GLuint texture, GLuint layer, GLuint focalPoint, GLfloat focalX, GLfloat focalY, GLfloat gainX, GLfloat gainY, GLfloat foveaArea); +#endif +#endif /* GL_QCOM_texture_foveated */ + #ifndef GL_QCOM_tiled_rendering #define GL_QCOM_tiled_rendering 1 #define GL_COLOR_BUFFER_BIT0_QCOM 0x00000001 diff --git a/include/GLES3/gl3.h b/include/GLES3/gl3.h index 71e72b403ee..532bbbd3e2e 100644 --- a/include/GLES3/gl3.h +++ b/include/GLES3/gl3.h @@ -1,12 +1,12 @@ -#ifndef __gl3_h_ -#define __gl3_h_ 1 +#ifndef __gles2_gl3_h_ +#define __gles2_gl3_h_ 1 #ifdef __cplusplus extern "C" { #endif /* -** Copyright (c) 2013-2017 The Khronos Group Inc. +** Copyright (c) 2013-2018 The Khronos Group Inc. ** ** Permission is hereby granted, free of charge, to any person obtaining a ** copy of this software and/or associated documentation files (the @@ -44,7 +44,7 @@ extern "C" { #define GL_GLES_PROTOTYPES 1 #endif -/* Generated on date 20170606 */ +/* Generated on date 20180725 */ /* Generated C header for: * API: gles2 diff --git a/include/meson.build b/include/meson.build index b4555eabbfc..081c1bc0008 100644 --- a/include/meson.build +++ b/include/meson.build @@ -43,7 +43,7 @@ if with_gles2 ) endif -if with_gles1 or with_gles2 or with_egl +if with_gles1 or with_gles2 or with_opengl or with_egl install_headers('KHR/khrplatform.h', subdir : 'KHR') endif diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h index c8d30597230..35ea3559b02 100644 --- a/include/pci_ids/radeonsi_pci_ids.h +++ b/include/pci_ids/radeonsi_pci_ids.h @@ -204,6 +204,7 @@ CHIPSET(0x67CC, POLARIS10) CHIPSET(0x67CF, POLARIS10) CHIPSET(0x67D0, POLARIS10) CHIPSET(0x67DF, POLARIS10) +CHIPSET(0x6FDF, POLARIS10) CHIPSET(0x98E4, STONEY) @@ -243,3 +244,4 @@ CHIPSET(0x66A7, VEGA20) CHIPSET(0x66AF, VEGA20) CHIPSET(0x15DD, RAVEN) +CHIPSET(0x15D8, RAVEN) diff --git a/include/vulkan/vulkan_core.h b/include/vulkan/vulkan_core.h index 06c860707b8..fe450142503 100644 --- a/include/vulkan/vulkan_core.h +++ b/include/vulkan/vulkan_core.h @@ -43,7 +43,7 @@ extern "C" { #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff) #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff) // Version of this file -#define VK_HEADER_VERSION 80 +#define VK_HEADER_VERSION 84 #define VK_NULL_HANDLE 0 @@ -305,6 +305,8 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_WIN32_KEYED_MUTEX_ACQUIRE_RELEASE_INFO_NV = 1000058000, VK_STRUCTURE_TYPE_VALIDATION_FLAGS_EXT = 1000061000, VK_STRUCTURE_TYPE_VI_SURFACE_CREATE_INFO_NN = 1000062000, + VK_STRUCTURE_TYPE_IMAGE_VIEW_ASTC_DECODE_MODE_EXT = 1000067000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ASTC_DECODE_FEATURES_EXT = 1000067001, VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR = 1000073000, VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR = 1000073001, VK_STRUCTURE_TYPE_MEMORY_WIN32_HANDLE_PROPERTIES_KHR = 1000073002, @@ -380,6 +382,10 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_EXTERNAL_FORMAT_ANDROID = 1000129005, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT = 1000130000, VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT = 1000130001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT = 1000138000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT = 1000138001, + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT = 1000138002, + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT = 1000138003, VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT = 1000143000, VK_STRUCTURE_TYPE_RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT = 1000143001, VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT = 1000143002, @@ -406,6 +412,11 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD = 1000185000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT = 1000190000, VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT = 1000190001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT = 1000190002, + VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV = 1000206000, + VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_NV = 1000206001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR = 1000211000, + VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES, @@ -440,6 +451,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES, VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, + VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT = VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_FENCE_INFO_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_FENCE_INFO, VK_STRUCTURE_TYPE_EXTERNAL_FENCE_PROPERTIES_KHR = VK_STRUCTURE_TYPE_EXTERNAL_FENCE_PROPERTIES, VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO, @@ -1118,6 +1130,7 @@ typedef enum VkDescriptorType { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC = 8, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = 9, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = 10, + VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT = 1000138000, VK_DESCRIPTOR_TYPE_BEGIN_RANGE = VK_DESCRIPTOR_TYPE_SAMPLER, VK_DESCRIPTOR_TYPE_END_RANGE = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, VK_DESCRIPTOR_TYPE_RANGE_SIZE = (VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT - VK_DESCRIPTOR_TYPE_SAMPLER + 1), @@ -4573,7 +4586,6 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSurfaceKHR) #define VK_KHR_SURFACE_SPEC_VERSION 25 #define VK_KHR_SURFACE_EXTENSION_NAME "VK_KHR_surface" -#define VK_COLORSPACE_SRGB_NONLINEAR_KHR VK_COLOR_SPACE_SRGB_NONLINEAR_KHR typedef enum VkColorSpaceKHR { @@ -4592,6 +4604,7 @@ typedef enum VkColorSpaceKHR { VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT = 1000104012, VK_COLOR_SPACE_PASS_THROUGH_EXT = 1000104013, VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT = 1000104014, + VK_COLORSPACE_SRGB_NONLINEAR_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR, VK_COLOR_SPACE_BEGIN_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR, VK_COLOR_SPACE_END_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR, VK_COLOR_SPACE_RANGE_SIZE_KHR = (VK_COLOR_SPACE_SRGB_NONLINEAR_KHR - VK_COLOR_SPACE_SRGB_NONLINEAR_KHR + 1), @@ -5979,13 +5992,24 @@ typedef struct VkPhysicalDevice8BitStorageFeaturesKHR { +#define VK_KHR_vulkan_memory_model 1 +#define VK_KHR_VULKAN_MEMORY_MODEL_SPEC_VERSION 2 +#define VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME "VK_KHR_vulkan_memory_model" + +typedef struct VkPhysicalDeviceVulkanMemoryModelFeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 vulkanMemoryModel; + VkBool32 vulkanMemoryModelDeviceScope; +} VkPhysicalDeviceVulkanMemoryModelFeaturesKHR; + + + #define VK_EXT_debug_report 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugReportCallbackEXT) #define VK_EXT_DEBUG_REPORT_SPEC_VERSION 9 #define VK_EXT_DEBUG_REPORT_EXTENSION_NAME "VK_EXT_debug_report" -#define VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT -#define VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT typedef enum VkDebugReportObjectTypeEXT { @@ -6025,6 +6049,8 @@ typedef enum VkDebugReportObjectTypeEXT { VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT = 33, VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT = 1000156000, VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT = 1000085000, + VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT, + VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_KHR_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_BEGIN_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT, @@ -6411,10 +6437,10 @@ typedef enum VkValidationCheckEXT { } VkValidationCheckEXT; typedef struct VkValidationFlagsEXT { - VkStructureType sType; - const void* pNext; - uint32_t disabledValidationCheckCount; - VkValidationCheckEXT* pDisabledValidationChecks; + VkStructureType sType; + const void* pNext; + uint32_t disabledValidationCheckCount; + const VkValidationCheckEXT* pDisabledValidationChecks; } VkValidationFlagsEXT; @@ -6429,6 +6455,24 @@ typedef struct VkValidationFlagsEXT { #define VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME "VK_EXT_shader_subgroup_vote" +#define VK_EXT_astc_decode_mode 1 +#define VK_EXT_ASTC_DECODE_MODE_SPEC_VERSION 1 +#define VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME "VK_EXT_astc_decode_mode" + +typedef struct VkImageViewASTCDecodeModeEXT { + VkStructureType sType; + const void* pNext; + VkFormat decodeMode; +} VkImageViewASTCDecodeModeEXT; + +typedef struct VkPhysicalDeviceASTCDecodeFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 decodeModeSharedExponent; +} VkPhysicalDeviceASTCDecodeFeaturesEXT; + + + #define VK_EXT_conditional_rendering 1 #define VK_EXT_CONDITIONAL_RENDERING_SPEC_VERSION 1 #define VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME "VK_EXT_conditional_rendering" @@ -6744,7 +6788,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkReleaseDisplayEXT( #define VK_EXT_display_surface_counter 1 #define VK_EXT_DISPLAY_SURFACE_COUNTER_SPEC_VERSION 1 #define VK_EXT_DISPLAY_SURFACE_COUNTER_EXTENSION_NAME "VK_EXT_display_surface_counter" -#define VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT typedef enum VkSurfaceCounterFlagBitsEXT { @@ -7298,6 +7341,42 @@ typedef struct VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT { #define VK_AMD_SHADER_FRAGMENT_MASK_EXTENSION_NAME "VK_AMD_shader_fragment_mask" +#define VK_EXT_inline_uniform_block 1 +#define VK_EXT_INLINE_UNIFORM_BLOCK_SPEC_VERSION 1 +#define VK_EXT_INLINE_UNIFORM_BLOCK_EXTENSION_NAME "VK_EXT_inline_uniform_block" + +typedef struct VkPhysicalDeviceInlineUniformBlockFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 inlineUniformBlock; + VkBool32 descriptorBindingInlineUniformBlockUpdateAfterBind; +} VkPhysicalDeviceInlineUniformBlockFeaturesEXT; + +typedef struct VkPhysicalDeviceInlineUniformBlockPropertiesEXT { + VkStructureType sType; + void* pNext; + uint32_t maxInlineUniformBlockSize; + uint32_t maxPerStageDescriptorInlineUniformBlocks; + uint32_t maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks; + uint32_t maxDescriptorSetInlineUniformBlocks; + uint32_t maxDescriptorSetUpdateAfterBindInlineUniformBlocks; +} VkPhysicalDeviceInlineUniformBlockPropertiesEXT; + +typedef struct VkWriteDescriptorSetInlineUniformBlockEXT { + VkStructureType sType; + const void* pNext; + uint32_t dataSize; + const void* pData; +} VkWriteDescriptorSetInlineUniformBlockEXT; + +typedef struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT { + VkStructureType sType; + const void* pNext; + uint32_t maxInlineUniformBlockBindings; +} VkDescriptorPoolInlineUniformBlockCreateInfoEXT; + + + #define VK_EXT_shader_stencil_export 1 #define VK_EXT_SHADER_STENCIL_EXPORT_SPEC_VERSION 1 #define VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME "VK_EXT_shader_stencil_export" @@ -7481,7 +7560,6 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkValidationCacheEXT) #define VK_EXT_VALIDATION_CACHE_SPEC_VERSION 1 #define VK_EXT_VALIDATION_CACHE_EXTENSION_NAME "VK_EXT_validation_cache" -#define VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT typedef enum VkValidationCacheHeaderVersionEXT { @@ -7732,7 +7810,7 @@ typedef struct VkPhysicalDeviceShaderCorePropertiesAMD { #define VK_EXT_vertex_attribute_divisor 1 -#define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_SPEC_VERSION 1 +#define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_SPEC_VERSION 3 #define VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME "VK_EXT_vertex_attribute_divisor" typedef struct VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT { @@ -7753,6 +7831,13 @@ typedef struct VkPipelineVertexInputDivisorStateCreateInfoEXT { const VkVertexInputBindingDivisorDescriptionEXT* pVertexBindingDivisors; } VkPipelineVertexInputDivisorStateCreateInfoEXT; +typedef struct VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 vertexAttributeInstanceRateDivisor; + VkBool32 vertexAttributeInstanceRateZeroDivisor; +} VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT; + #define VK_NV_shader_subgroup_partitioned 1 @@ -7760,6 +7845,38 @@ typedef struct VkPipelineVertexInputDivisorStateCreateInfoEXT { #define VK_NV_SHADER_SUBGROUP_PARTITIONED_EXTENSION_NAME "VK_NV_shader_subgroup_partitioned" +#define VK_NV_device_diagnostic_checkpoints 1 +#define VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_SPEC_VERSION 2 +#define VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME "VK_NV_device_diagnostic_checkpoints" + +typedef struct VkQueueFamilyCheckpointPropertiesNV { + VkStructureType sType; + void* pNext; + VkPipelineStageFlags checkpointExecutionStageMask; +} VkQueueFamilyCheckpointPropertiesNV; + +typedef struct VkCheckpointDataNV { + VkStructureType sType; + void* pNext; + VkPipelineStageFlagBits stage; + void* pCheckpointMarker; +} VkCheckpointDataNV; + + +typedef void (VKAPI_PTR *PFN_vkCmdSetCheckpointNV)(VkCommandBuffer commandBuffer, const void* pCheckpointMarker); +typedef void (VKAPI_PTR *PFN_vkGetQueueCheckpointDataNV)(VkQueue queue, uint32_t* pCheckpointDataCount, VkCheckpointDataNV* pCheckpointData); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdSetCheckpointNV( + VkCommandBuffer commandBuffer, + const void* pCheckpointMarker); + +VKAPI_ATTR void VKAPI_CALL vkGetQueueCheckpointDataNV( + VkQueue queue, + uint32_t* pCheckpointDataCount, + VkCheckpointDataNV* pCheckpointData); +#endif + #ifdef __cplusplus } #endif diff --git a/m4/ax_check_compile_flag.m4 b/m4/ax_check_compile_flag.m4 index 51df0c09a7a..0fdca907e8b 100644 --- a/m4/ax_check_compile_flag.m4 +++ b/m4/ax_check_compile_flag.m4 @@ -55,6 +55,11 @@ # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. +# Emil: +# Toggle Werror since at some point clang started treating unknown -W +# flags as warnings, succeeding with the build, yet issuing an annoying +# warning. + #serial 3 AC_DEFUN([AX_CHECK_COMPILE_FLAG], @@ -62,7 +67,7 @@ AC_DEFUN([AX_CHECK_COMPILE_FLAG], AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS - _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1 -Werror" AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], [AS_VAR_SET(CACHEVAR,[yes])], [AS_VAR_SET(CACHEVAR,[no])]) diff --git a/meson.build b/meson.build index 86a4a4ce6da..ca5538c80e7 100644 --- a/meson.build +++ b/meson.build @@ -297,7 +297,10 @@ endif _egl = get_option('egl') if _egl == 'auto' - with_egl = with_dri and with_shared_glapi and with_platforms + with_egl = ( + not ['darwin', 'windows'].contains(host_machine.system()) and + with_dri and with_shared_glapi and with_platforms + ) elif _egl == 'true' if not with_dri error('EGL requires dri') @@ -307,6 +310,8 @@ elif _egl == 'true' error('No platforms specified, consider -Dplatforms=drm,x11 at least') elif not ['disabled', 'dri'].contains(with_glx) error('EGL requires dri, but a GLX is being built without dri') + elif ['darwin', 'windows'].contains(host_machine.system()) + error('EGL is not available on Windows or MacOS') endif with_egl = true else @@ -882,8 +887,9 @@ if not cc.links('''#include int main() { return __sync_add_and_fetch(&v, (uint64_t)1); }''', + dependencies : dep_atomic, name : 'GCC 64bit atomics') - pre_args += '-DMISSING_64_BIT_ATOMICS' + pre_args += '-DMISSING_64BIT_ATOMICS' endif # TODO: shared/static? Is this even worth doing? @@ -989,7 +995,7 @@ if cc.links(''' freelocale(loc); return 0; }''', - extra_args : pre_args, + args : pre_args, name : 'strtod has locale support') pre_args += '-DHAVE_STRTOD_L' endif @@ -1056,14 +1062,6 @@ dep_thread = dependency('threads') if dep_thread.found() and host_machine.system() != 'windows' pre_args += '-DHAVE_PTHREAD' endif -if with_amd_vk or with_gallium_radeonsi or with_gallium_r600 or with_gallium_opencl - dep_elf = dependency('libelf', required : false) - if not dep_elf.found() - dep_elf = cc.find_library('elf') - endif -else - dep_elf = null_dep -endif dep_expat = dependency('expat') # this only exists on linux so either this is linux and it will be found, or # its not linux and and wont @@ -1106,12 +1104,17 @@ endif # Loop over the enables versions and get the highest libdrm requirement for all # active drivers. +_drm_blame = '' foreach d : _libdrm_checks ver = get_variable('_drm_@0@_ver'.format(d[0])) if d[1] and ver.version_compare('>' + _drm_ver) _drm_ver = ver + _drm_blame = d[0] endif endforeach +if _drm_blame != '' + message('libdrm @0@ needed because @1@ has the highest requirement'.format(_drm_ver, _drm_blame)) +endif # Then get each libdrm module foreach d : _libdrm_checks @@ -1215,6 +1218,16 @@ elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr error('The following drivers require LLVM: Radv, RadeonSI, SWR. One of these is enabled, but LLVM is disabled.') endif +if (with_amd_vk or with_gallium_radeonsi or with_gallium_opencl or + (with_gallium_r600 and with_llvm)) + dep_elf = dependency('libelf', required : false) + if not dep_elf.found() + dep_elf = cc.find_library('elf') + endif +else + dep_elf = null_dep +endif + dep_glvnd = null_dep if with_glvnd dep_glvnd = dependency('libglvnd', version : '>= 0.2.0') diff --git a/src/amd/Android.mk b/src/amd/Android.mk index 6129e360cbf..e40e7da01bd 100644 --- a/src/amd/Android.mk +++ b/src/amd/Android.mk @@ -27,4 +27,6 @@ include $(LOCAL_PATH)/Makefile.sources include $(LOCAL_PATH)/Android.addrlib.mk include $(LOCAL_PATH)/Android.common.mk +ifneq ($(filter radeonsi,$(BOARD_GPU_DRIVERS)),) include $(LOCAL_PATH)/vulkan/Android.mk +endif diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 54b7e987015..c85d2816ba9 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -515,39 +515,51 @@ ac_build_gather_values(struct ac_llvm_context *ctx, return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false); } -/* Expand a scalar or vector to <4 x type> by filling the remaining channels - * with undef. Extract at most num_channels components from the input. +/* Expand a scalar or vector to by filling the remaining + * channels with undef. Extract at most src_channels components from the input. */ -LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, - LLVMValueRef value, - unsigned num_channels) +LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx, + LLVMValueRef value, + unsigned src_channels, + unsigned dst_channels) { LLVMTypeRef elemtype; - LLVMValueRef chan[4]; + LLVMValueRef chan[dst_channels]; if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) { unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value)); - num_channels = MIN2(num_channels, vec_size); - if (num_channels >= 4) + if (src_channels == dst_channels && vec_size == dst_channels) return value; - for (unsigned i = 0; i < num_channels; i++) + src_channels = MIN2(src_channels, vec_size); + + for (unsigned i = 0; i < src_channels; i++) chan[i] = ac_llvm_extract_elem(ctx, value, i); elemtype = LLVMGetElementType(LLVMTypeOf(value)); } else { - if (num_channels) { - assert(num_channels == 1); + if (src_channels) { + assert(src_channels == 1); chan[0] = value; } elemtype = LLVMTypeOf(value); } - while (num_channels < 4) - chan[num_channels++] = LLVMGetUndef(elemtype); + for (unsigned i = src_channels; i < dst_channels; i++) + chan[i] = LLVMGetUndef(elemtype); + + return ac_build_gather_values(ctx, chan, dst_channels); +} - return ac_build_gather_values(ctx, chan, 4); +/* Expand a scalar or vector to <4 x type> by filling the remaining channels + * with undef. Extract at most num_channels components from the input. + */ +LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, + LLVMValueRef value, + unsigned num_channels) +{ + return ac_build_expand(ctx, value, num_channels, 4); } LLVMValueRef @@ -555,7 +567,15 @@ ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den) { - LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, ""); + /* If we do (num / den), LLVM >= 7.0 does: + * return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f)); + * + * If we do (num * (1 / den)), LLVM does: + * return num * v_rcp_f32(den); + */ + LLVMValueRef one = LLVMTypeOf(num) == ctx->f64 ? ctx->f64_1 : ctx->f32_1; + LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, ""); + LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, ""); /* Use v_rcp_f32 instead of precise division. */ if (!LLVMIsConstant(ret)) diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index c5753037e7b..92d72ae4764 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -161,6 +161,9 @@ LLVMValueRef ac_build_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values, unsigned value_count); +LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx, + LLVMValueRef value, + unsigned src_channels, unsigned dst_channels); LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned num_channels); diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index 10e1ca99d41..cd3525187a0 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -149,15 +149,13 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, char features[256]; const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--"; LLVMTargetRef target = ac_get_llvm_target(triple); - bool barrier_does_waitcnt = family != CHIP_VEGA20; snprintf(features, sizeof(features), - "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s%s", + "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s", tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "", tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "", tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "", - tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "", - barrier_does_waitcnt ? ",+auto-waitcnt-before-barrier" : ""); + tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : ""); LLVMTargetMachineRef tm = LLVMCreateTargetMachine( target, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index cffc980e51f..2cb08be2b3f 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1171,7 +1171,8 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, struct ac_image_args *args, const nir_tex_instr *instr) { - enum glsl_base_type stype = glsl_get_sampler_result_type(var->type); + const struct glsl_type *type = glsl_without_array(var->type); + enum glsl_base_type stype = glsl_get_sampler_result_type(type); LLVMValueRef half_texel[2]; LLVMValueRef compare_cube_wa = NULL; LLVMValueRef result; @@ -1356,7 +1357,8 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) { nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr); nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr); - enum glsl_base_type stype = glsl_get_sampler_result_type(var->type); + const struct glsl_type *type = glsl_without_array(var->type); + enum glsl_base_type stype = glsl_get_sampler_result_type(type); if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { return lower_gather4_integer(&ctx->ac, var, args, instr); } @@ -1398,7 +1400,7 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, if (instr->dest.ssa.bit_size == 16) { unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1; - LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16Type(), 2 * load_dwords); + LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords); ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type); LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, ""); res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, ""); @@ -1671,7 +1673,7 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, }; results[idx] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); unsigned num_elems = ac_get_type_size(data_type) / elem_size_bytes; - LLVMTypeRef resTy = LLVMVectorType(LLVMIntType(instr->dest.ssa.bit_size), num_elems); + LLVMTypeRef resTy = LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), num_elems); results[idx] = LLVMBuildBitCast(ctx->ac.builder, results[idx], resTy, ""); } } @@ -1685,8 +1687,8 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, }; if (num_bytes > 16 && num_components == 3) { - /* we end up with a v4f32 and v2f32 but shuffle fails on that */ - results[1] = ac_build_expand_to_vec4(&ctx->ac, results[1], 2); + /* we end up with a v2i64 and i64 but shuffle fails on that */ + results[1] = ac_build_expand(&ctx->ac, results[1], 1, 2); } LLVMValueRef swizzle = LLVMConstVector(masks, num_components); diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index 2f4f0f8884f..94723dc9c09 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -588,8 +588,8 @@ void ac_compute_cmask(const struct radeon_info *info, unsigned base_align = num_pipes * pipe_interleave_bytes; - unsigned width = align(config->info.width, cl_width*8); - unsigned height = align(config->info.height, cl_height*8); + unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8); + unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8); unsigned slice_elements = (width * height) / (8*8); /* Each element of CMASK is a nibble. */ diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h index 3c3bc541b4f..303c036fab4 100644 --- a/src/amd/common/sid.h +++ b/src/amd/common/sid.h @@ -9123,7 +9123,9 @@ #define CIK_SDMA_PACKET_SEMAPHORE 0x7 #define CIK_SDMA_PACKET_CONSTANT_FILL 0xb #define CIK_SDMA_PACKET_SRBM_WRITE 0xe -#define CIK_SDMA_COPY_MAX_SIZE 0x3fffe0 +/* There is apparently an undocumented HW "feature" that + prevents the HW from copying past 256 bytes of (1 << 22) */ +#define CIK_SDMA_COPY_MAX_SIZE 0x3fff00 enum amd_cmp_class_flags { S_NAN = 1 << 0, // Signaling NaN diff --git a/src/amd/vulkan/Android.mk b/src/amd/vulkan/Android.mk index cee3744f40b..51b03561fa7 100644 --- a/src/amd/vulkan/Android.mk +++ b/src/amd/vulkan/Android.mk @@ -62,6 +62,7 @@ LOCAL_SRC_FILES := \ $(VULKAN_FILES) LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU # instructs LLVM to declare LLVMInitializeAMDGPU* functions +LOCAL_CFLAGS += -DVK_USE_PLATFORM_ANDROID_KHR $(call mesa-build-with-llvm) @@ -140,6 +141,7 @@ LOCAL_SRC_FILES := \ $(VULKAN_ANDROID_FILES) LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU # instructs LLVM to declare LLVMInitializeAMDGPU* functions +LOCAL_CFLAGS += -DVK_USE_PLATFORM_ANDROID_KHR $(call mesa-build-with-llvm) diff --git a/src/amd/vulkan/Makefile.am b/src/amd/vulkan/Makefile.am index e7ccc58a28e..e28f032cbee 100644 --- a/src/amd/vulkan/Makefile.am +++ b/src/amd/vulkan/Makefile.am @@ -124,7 +124,7 @@ VULKAN_LIB_DEPS += \ endif if HAVE_PLATFORM_ANDROID -AM_CPPFLAGS += $(ANDROID_CPPFLAGS) +AM_CPPFLAGS += $(ANDROID_CPPFLAGS) -DVK_USE_PLATFORM_ANDROID_KHR AM_CFLAGS += $(ANDROID_CFLAGS) VULKAN_LIB_DEPS += $(ANDROID_LIBS) VULKAN_SOURCES += $(VULKAN_ANDROID_FILES) diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 9f2842182e7..7998ba8cf91 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -144,6 +144,7 @@ libvulkan_radeon = shared_library( idep_nir, ], c_args : [c_vis_args, no_override_init_args, radv_flags], + cpp_args : [cpp_vis_args, radv_flags], link_args : [ld_args_bsymbolic, ld_args_gc_sections], install : true, ) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 7785ece8ce6..dae64406896 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1657,7 +1657,8 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, { struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); - uint32_t size = MAX_SETS * 2 * 4; + uint8_t ptr_size = HAVE_32BIT_POINTERS ? 1 : 2; + uint32_t size = MAX_SETS * 4 * ptr_size; uint32_t offset; void *ptr; @@ -1666,13 +1667,14 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, return; for (unsigned i = 0; i < MAX_SETS; i++) { - uint32_t *uptr = ((uint32_t *)ptr) + i * 2; + uint32_t *uptr = ((uint32_t *)ptr) + i * ptr_size; uint64_t set_va = 0; struct radv_descriptor_set *set = descriptors_state->sets[i]; if (descriptors_state->valid & (1u << i)) set_va = set->va; uptr[0] = set_va & 0xffffffff; - uptr[1] = set_va >> 32; + if (ptr_size == 2) + uptr[1] = set_va >> 32; } uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); @@ -1714,6 +1716,8 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS; struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_cmd_state *state = &cmd_buffer->state; + bool flush_indirect_descriptors; if (!descriptors_state->dirty) return; @@ -1721,10 +1725,14 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, if (descriptors_state->push_dirty) radv_flush_push_descriptors(cmd_buffer, bind_point); - if ((cmd_buffer->state.pipeline && cmd_buffer->state.pipeline->need_indirect_descriptor_sets) || - (cmd_buffer->state.compute_pipeline && cmd_buffer->state.compute_pipeline->need_indirect_descriptor_sets)) { + flush_indirect_descriptors = + (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS && + state->pipeline && state->pipeline->need_indirect_descriptor_sets) || + (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE && + state->compute_pipeline && state->compute_pipeline->need_indirect_descriptor_sets); + + if (flush_indirect_descriptors) radv_flush_indirect_descriptor_sets(cmd_buffer, bind_point); - } MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, @@ -2307,6 +2315,7 @@ VkResult radv_BeginCommandBuffer( cmd_buffer->state.last_num_instances = -1; cmd_buffer->state.last_vertex_offset = -1; cmd_buffer->state.last_first_instance = -1; + cmd_buffer->state.predication_type = -1; cmd_buffer->usage_flags = pBeginInfo->flags; /* setup initial configuration into command buffer */ @@ -4126,15 +4135,18 @@ static void radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, if (radv_image_has_dcc(image)) { uint32_t value = 0xffffffffu; /* Fully expanded mode. */ + bool need_decompress_pass = false; if (radv_layout_dcc_compressed(image, dst_layout, dst_queue_mask)) { value = 0x20202020u; + need_decompress_pass = true; } radv_initialize_dcc(cmd_buffer, image, value); - radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image, false); + radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image, + need_decompress_pass); } if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) { @@ -4335,6 +4347,8 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(event->bo); + si_emit_cache_flush(cmd_buffer); + radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo); MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18); @@ -4439,29 +4453,37 @@ void radv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, } /* VK_EXT_conditional_rendering */ -void vkCmdBeginConditionalRenderingEXT( +void radv_CmdBeginConditionalRenderingEXT( VkCommandBuffer commandBuffer, const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer); - bool inverted; + bool draw_visible = true; uint64_t va; va = radv_buffer_get_va(buffer->bo) + pConditionalRenderingBegin->offset; - inverted = pConditionalRenderingBegin->flags & VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; + /* By default, if the 32-bit value at offset in buffer memory is zero, + * then the rendering commands are discarded, otherwise they are + * executed as normal. If the inverted flag is set, all commands are + * discarded if the value is non zero. + */ + if (pConditionalRenderingBegin->flags & + VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT) { + draw_visible = false; + } /* Enable predication for this command buffer. */ - si_emit_set_predication_state(cmd_buffer, inverted, va); + si_emit_set_predication_state(cmd_buffer, draw_visible, va); cmd_buffer->state.predicating = true; /* Store conditional rendering user info. */ - cmd_buffer->state.predication_type = inverted; + cmd_buffer->state.predication_type = draw_visible; cmd_buffer->state.predication_va = va; } -void vkCmdEndConditionalRenderingEXT( +void radv_CmdEndConditionalRenderingEXT( VkCommandBuffer commandBuffer) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 33f24b9d302..a72cf261f66 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -45,22 +45,29 @@ #include "sid.h" #include "gfx9d.h" #include "addrlib/gfx9/chip/gfx9_enum.h" +#include "util/build_id.h" #include "util/debug.h" +#include "util/mesa-sha1.h" static int radv_device_get_cache_uuid(enum radeon_family family, void *uuid) { - uint32_t mesa_timestamp, llvm_timestamp; - uint16_t f = family; + struct mesa_sha1 ctx; + unsigned char sha1[20]; + unsigned ptr_size = sizeof(void*); + memset(uuid, 0, VK_UUID_SIZE); - if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) || - !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp)) + _mesa_sha1_init(&ctx); + + if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) || + !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx)) return -1; - memcpy(uuid, &mesa_timestamp, 4); - memcpy((char*)uuid + 4, &llvm_timestamp, 4); - memcpy((char*)uuid + 8, &f, 2); - snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv"); + _mesa_sha1_update(&ctx, &family, sizeof(family)); + _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size)); + _mesa_sha1_final(&ctx, sha1); + + memcpy(uuid, sha1, VK_UUID_SIZE); return 0; } @@ -459,7 +466,7 @@ static const struct debug_control radv_perftest_options[] = { const char * radv_get_perftest_option_name(int id) { - assert(id < ARRAY_SIZE(radv_debug_options) - 1); + assert(id < ARRAY_SIZE(radv_perftest_options) - 1); return radv_perftest_options[id].string; } @@ -480,6 +487,9 @@ radv_handle_per_app_options(struct radv_instance *instance, */ instance->perftest_flags |= RADV_PERFTEST_SISCHED; } + } else if (!strcmp(name, "DOOM_VFR")) { + /* Work around a Doom VFR game bug */ + instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS; } } @@ -818,6 +828,13 @@ void radv_GetPhysicalDeviceFeatures2( features->inheritedConditionalRendering = false; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: { + VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features = + (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext; + features->vertexAttributeInstanceRateDivisor = VK_TRUE; + features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE; + break; + } default: break; } @@ -1030,6 +1047,7 @@ void radv_GetPhysicalDeviceProperties2( VK_SUBGROUP_FEATURE_VOTE_BIT; if (pdevice->rad_info.chip_class >= VI) { properties->supportedOperations |= + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT; } @@ -1892,10 +1910,30 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff device->physical_device->rad_info.family != CHIP_CARRIZO && device->physical_device->rad_info.family != CHIP_STONEY; unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; - unsigned max_offchip_buffers = max_offchip_buffers_per_se * - device->physical_device->rad_info.max_se; + unsigned max_offchip_buffers; unsigned offchip_granularity; unsigned hs_offchip_param; + + /* + * Per RadeonSI: + * This must be one less than the maximum number due to a hw limitation. + * Various hardware bugs in SI, CIK, and GFX9 need this. + * + * Per AMDVLK: + * Vega10 should limit max_offchip_buffers to 508 (4 * 127). + * Gfx7 should limit max_offchip_buffers to 508 + * Gfx6 should limit max_offchip_buffers to 126 (2 * 63) + * + * Follow AMDVLK here. + */ + if (device->physical_device->rad_info.family == CHIP_VEGA10 || + device->physical_device->rad_info.chip_class == CIK || + device->physical_device->rad_info.chip_class == SI) + --max_offchip_buffers_per_se; + + max_offchip_buffers = max_offchip_buffers_per_se * + device->physical_device->rad_info.max_se; + switch (device->tess_offchip_block_dw_size) { default: assert(0); diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index 15d29becfd4..028d10f5fae 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -105,7 +105,7 @@ def __init__(self, name, ext_version, enable): Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'), Extension('VK_EXT_shader_viewport_index_layer', 1, True), Extension('VK_EXT_shader_stencil_export', 1, True), - Extension('VK_EXT_vertex_attribute_divisor', 1, True), + Extension('VK_EXT_vertex_attribute_divisor', 3, True), Extension('VK_AMD_draw_indirect_count', 1, True), Extension('VK_AMD_gcn_shader', 1, True), Extension('VK_AMD_rasterization_order', 1, 'device->has_out_of_order_rast'), diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c index f23ebfb2ad7..6253c27b95d 100644 --- a/src/amd/vulkan/radv_formats.c +++ b/src/amd/vulkan/radv_formats.c @@ -612,7 +612,8 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical } if (desc->layout == VK_FORMAT_LAYOUT_ETC && - physical_device->rad_info.chip_class < GFX9 && + physical_device->rad_info.family != CHIP_VEGA10 && + physical_device->rad_info.family != CHIP_RAVEN && physical_device->rad_info.family != CHIP_STONEY) { out_properties->linearTilingFeatures = linear; out_properties->optimalTilingFeatures = tiled; @@ -1111,6 +1112,25 @@ static VkResult radv_get_image_format_properties(struct radv_physical_device *ph } } + if (info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT)) { + goto unsupported; + } + } + + if (info->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) { + goto unsupported; + } + } + + if (info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) { + if (!(format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT))) { + goto unsupported; + } + } + *pImageFormatProperties = (VkImageFormatProperties) { .maxExtent = maxExtent, .maxMipLevels = maxMipLevels, diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index f1c78e8115d..b316242dc5a 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -801,8 +801,8 @@ radv_image_get_cmask_info(struct radv_device *device, unsigned base_align = num_pipes * pipe_interleave_bytes; - unsigned width = align(image->info.width, cl_width*8); - unsigned height = align(image->info.height, cl_height*8); + unsigned width = align(image->surface.u.legacy.level[0].nblk_x, cl_width*8); + unsigned height = align(image->surface.u.legacy.level[0].nblk_y, cl_height*8); unsigned slice_elements = (width * height) / (8*8); /* Each element of CMASK is a nibble. */ diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c index b42a6783fd2..74868d5a2bb 100644 --- a/src/amd/vulkan/radv_meta_fast_clear.c +++ b/src/amd/vulkan/radv_meta_fast_clear.c @@ -603,7 +603,7 @@ radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer, pipeline = cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline; } - if (radv_image_has_dcc(image)) { + if (!decompress_dcc && radv_image_has_dcc(image)) { old_predicating = cmd_buffer->state.predicating; radv_emit_set_predication_state_from_image(cmd_buffer, image, true); @@ -671,7 +671,7 @@ radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer, &cmd_buffer->pool->alloc); } - if (radv_image_has_dcc(image)) { + if (!decompress_dcc && radv_image_has_dcc(image)) { cmd_buffer->state.predicating = old_predicating; radv_emit_set_predication_state_from_image(cmd_buffer, image, false); diff --git a/src/amd/vulkan/radv_meta_resolve.c b/src/amd/vulkan/radv_meta_resolve.c index b049237ba65..2c8ba5306c0 100644 --- a/src/amd/vulkan/radv_meta_resolve.c +++ b/src/amd/vulkan/radv_meta_resolve.c @@ -358,7 +358,8 @@ static void radv_pick_resolve_method_images(struct radv_image *src_image, *method = RESOLVE_COMPUTE; else if (vk_format_is_int(src_image->vk_format)) *method = RESOLVE_COMPUTE; - else if (src_image->info.array_size > 1) + else if (src_image->info.array_size > 1 || + dest_image->info.array_size > 1) *method = RESOLVE_COMPUTE; if (radv_layout_dcc_compressed(dest_image, dest_image_layout, queue_mask)) { diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 64b6522cd93..ac3d80618c2 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -422,7 +422,7 @@ get_tcs_out_current_patch_data_offset(struct radv_shader_context *ctx) ""); } -#define MAX_ARGS 23 +#define MAX_ARGS 64 struct arg_info { LLVMTypeRef types[MAX_ARGS]; LLVMValueRef *assign[MAX_ARGS]; @@ -545,13 +545,12 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, static void -set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs, - uint32_t indirect_offset) +set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, + uint8_t num_sgprs, bool indirect) { ud_info->sgpr_idx = *sgpr_idx; ud_info->num_sgprs = num_sgprs; - ud_info->indirect = indirect_offset > 0; - ud_info->indirect_offset = indirect_offset; + ud_info->indirect = indirect; *sgpr_idx += num_sgprs; } @@ -563,7 +562,7 @@ set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, &ctx->shader_info->user_sgprs_locs.shader_data[idx]; assert(ud_info); - set_loc(ud_info, sgpr_idx, num_sgprs, 0); + set_loc(ud_info, sgpr_idx, num_sgprs, false); } static void @@ -577,15 +576,16 @@ set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx) static void set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, - uint32_t indirect_offset) + bool indirect) { struct radv_userdata_locations *locs = &ctx->shader_info->user_sgprs_locs; struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; assert(ud_info); - set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect_offset); - if (indirect_offset == 0) + set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect); + + if (!indirect) locs->descriptor_sets_enabled |= 1 << idx; } @@ -695,7 +695,7 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx, if (ctx->shader_info->info.loads_push_constants) user_sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2; - uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16; + uint32_t available_sgprs = ctx->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16; uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; uint32_t num_desc_set = util_bitcount(ctx->shader_info->info.desc_set_used_mask); @@ -806,7 +806,7 @@ set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage, for (unsigned i = 0; i < num_sets; ++i) { if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && ctx->options->layout->set[i].layout->shader_stages & stage_mask) { - set_loc_desc(ctx, i, user_sgpr_idx, 0); + set_loc_desc(ctx, i, user_sgpr_idx, false); } else ctx->descriptor_sets[i] = NULL; } @@ -817,7 +817,6 @@ set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage, for (unsigned i = 0; i < num_sets; ++i) { if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && ctx->options->layout->set[i].layout->shader_stages & stage_mask) { - set_loc_desc(ctx, i, user_sgpr_idx, i * 8); ctx->descriptor_sets[i] = ac_build_load_to_sgpr(&ctx->ac, desc_sets, @@ -1991,8 +1990,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx, uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index]; if (divisor) { - buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id, - ctx->abi.start_instance, ""); + buffer_index = ctx->abi.instance_id; if (divisor != 1) { buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index, @@ -2009,6 +2007,8 @@ handle_vs_input_decl(struct radv_shader_context *ctx, } else { buffer_index = ctx->ac.i32_0; } + + buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.start_instance, buffer_index, ""); } else buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id, ctx->abi.base_vertex, ""); @@ -2105,9 +2105,10 @@ handle_fs_input_decl(struct radv_shader_context *ctx, int idx = variable->data.location; unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); LLVMValueRef interp = NULL; + uint64_t mask; variable->data.driver_location = idx * 4; - ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location; + mask = ((1ull << attrib_count) - 1) << variable->data.location; if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) { unsigned interp_type; @@ -2128,6 +2129,15 @@ handle_fs_input_decl(struct radv_shader_context *ctx, for (unsigned i = 0; i < attrib_count; ++i) ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp; + if (idx == VARYING_SLOT_CLIP_DIST0) { + /* Do not account for the number of components inside the array + * of clip/cull distances because this might wrongly set other + * bits like primitive ID or layer. + */ + mask = 1ull << VARYING_SLOT_CLIP_DIST0; + } + + ctx->input_mask |= mask; } static void @@ -2194,6 +2204,17 @@ handle_fs_inputs(struct radv_shader_context *ctx, if (LLVMIsUndef(interp_param)) ctx->shader_info->fs.flat_shaded_mask |= 1u << index; ++index; + } else if (i == VARYING_SLOT_CLIP_DIST0) { + int length = ctx->shader_info->info.ps.num_input_clips_culls; + + for (unsigned j = 0; j < length; j += 4) { + inputs = ctx->inputs + ac_llvm_reg_index_soa(i, j); + + interp_param = *inputs; + interp_fs_input(ctx, index, interp_param, + ctx->abi.prim_mask, inputs); + ++index; + } } else if (i == VARYING_SLOT_POS) { for(int i = 0; i < 3; ++i) inputs[i] = ctx->abi.frag_pos[i]; @@ -2489,6 +2510,13 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], &args, sizeof(args)); + /* Export the clip/cull distances values to the next stage. */ + radv_export_param(ctx, param_count, &slots[0], 0xf); + outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = param_count++; + if (ctx->num_output_clips + ctx->num_output_culls > 4) { + radv_export_param(ctx, param_count, &slots[4], 0xf); + outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++; + } } LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1}; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index eb58e8a1c0a..6a51efa2980 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2514,6 +2514,7 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *cs, switch (pipeline->device->physical_device->rad_info.family) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: context_states_per_bin = 1; persistent_states_per_bin = 1; fpovs_per_batch = 63; @@ -3027,6 +3028,23 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *cs, ps_offset++; } + if (ps->info.info.ps.num_input_clips_culls) { + unsigned vs_offset; + + vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0]; + if (vs_offset != AC_EXP_PARAM_UNDEFINED) { + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true); + ++ps_offset; + } + + vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1]; + if (vs_offset != AC_EXP_PARAM_UNDEFINED && + ps->info.info.ps.num_input_clips_culls > 4) { + ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true); + ++ps_offset; + } + } + for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) { unsigned vs_offset; bool flat_shade; @@ -3319,6 +3337,17 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline, } } + /* Workaround for a VGT hang when strip primitive types are used with + * primitive restart. + */ + if (pipeline->graphics.prim_restart_enable && + (prim == V_008958_DI_PT_LINESTRIP || + prim == V_008958_DI_PT_TRISTRIP || + prim == V_008958_DI_PT_LINESTRIP_ADJ || + prim == V_008958_DI_PT_TRISTRIP_ADJ)) { + ia_multi_vgt_param.partial_vs_wave = true; + } + ia_multi_vgt_param.base = S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) | /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */ diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index e3229ab59bb..427e677cc5f 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -648,12 +648,19 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, { struct radv_device *device = cmd_buffer->device; struct radv_meta_saved_state saved_state; + bool old_predicating; radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS); + /* VK_EXT_conditional_rendering says that copy commands should not be + * affected by conditional rendering. + */ + old_predicating = cmd_buffer->state.predicating; + cmd_buffer->state.predicating = false; + struct radv_buffer dst_buffer = { .bo = dst_bo, .offset = dst_offset, @@ -736,6 +743,9 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_CS_PARTIAL_FLUSH; + /* Restore conditional rendering. */ + cmd_buffer->state.predicating = old_predicating; + radv_meta_restore(&saved_state, cmd_buffer); } diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 7d4265cfdad..4093d36c4de 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -259,9 +259,6 @@ radv_shader_compile_to_nir(struct radv_device *device, */ NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_shader_out); - NIR_PASS_V(nir, nir_remove_dead_variables, - nir_var_shader_in | nir_var_shader_out | nir_var_system_value); - /* Now that we've deleted all but the main function, we can go ahead and * lower the rest of the constant initializers. */ @@ -273,6 +270,9 @@ radv_shader_compile_to_nir(struct radv_device *device, NIR_PASS_V(nir, nir_split_var_copies); NIR_PASS_V(nir, nir_split_per_member_structs); + NIR_PASS_V(nir, nir_remove_dead_variables, + nir_var_shader_in | nir_var_shader_out | nir_var_system_value); + NIR_PASS_V(nir, nir_lower_system_values); NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); } @@ -409,6 +409,7 @@ radv_fill_shader_variant(struct radv_device *device, variant->code_size = radv_get_shader_binary_size(binary); variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) | + S_00B12C_USER_SGPR_MSB(variant->info.num_user_sgprs >> 5) | S_00B12C_SCRATCH_EN(scratch_enabled); variant->rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) | diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 03760b689c3..c490b69f52b 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -174,6 +174,7 @@ struct radv_shader_info { bool has_pcoord; bool prim_id_input; bool layer_input; + uint8_t num_input_clips_culls; } ps; struct { bool uses_grid_size; @@ -191,7 +192,6 @@ struct radv_userdata_info { int8_t sgpr_idx; uint8_t num_sgprs; bool indirect; - uint32_t indirect_offset; }; struct radv_userdata_locations { diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 8026cca46c8..a45c847c46c 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -341,6 +341,7 @@ static void gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var, struct radv_shader_info *info) { + unsigned attrib_count = glsl_count_attribute_slots(var->type, false); const struct glsl_type *type = glsl_without_array(var->type); int idx = var->data.location; @@ -354,6 +355,9 @@ gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var, case VARYING_SLOT_LAYER: info->ps.layer_input = true; break; + case VARYING_SLOT_CLIP_DIST0: + info->ps.num_input_clips_culls = attrib_count; + break; default: break; } diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c index 20484177135..6479bea070b 100644 --- a/src/amd/vulkan/radv_wsi.c +++ b/src/amd/vulkan/radv_wsi.c @@ -206,23 +206,38 @@ VkResult radv_GetSwapchainImagesKHR( } VkResult radv_AcquireNextImageKHR( - VkDevice _device, + VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, - VkFence _fence, + VkFence fence, + uint32_t* pImageIndex) +{ + VkAcquireNextImageInfoKHR acquire_info = { + .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR, + .swapchain = swapchain, + .timeout = timeout, + .semaphore = semaphore, + .fence = fence, + .deviceMask = 0, + }; + + return radv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex); +} + +VkResult radv_AcquireNextImage2KHR( + VkDevice _device, + const VkAcquireNextImageInfoKHR* pAcquireInfo, uint32_t* pImageIndex) { RADV_FROM_HANDLE(radv_device, device, _device); struct radv_physical_device *pdevice = device->physical_device; - RADV_FROM_HANDLE(radv_fence, fence, _fence); - - VkResult result = wsi_common_acquire_next_image(&pdevice->wsi_device, - _device, - swapchain, - timeout, - semaphore, - pImageIndex); + RADV_FROM_HANDLE(radv_fence, fence, pAcquireInfo->fence); + + VkResult result = wsi_common_acquire_next_image2(&pdevice->wsi_device, + _device, + pAcquireInfo, + pImageIndex); if (fence && (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR)) { fence->submitted = true; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 2337036c67a..63e07e457c1 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -342,6 +342,7 @@ si_emit_config(struct radv_physical_device *physical_device, switch (physical_device->rad_info.family) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: pc_lines = 4096; break; case CHIP_RAVEN: @@ -531,16 +532,16 @@ si_write_scissors(struct radeon_cmdbuf *cs, int first, VkRect2D scissor = si_intersect_scissor(&scissors[i], &viewport_scissor); get_viewport_xform(viewports + i, scale, translate); - scale[0] = abs(scale[0]); - scale[1] = abs(scale[1]); + scale[0] = fabsf(scale[0]); + scale[1] = fabsf(scale[1]); if (scale[0] < 0.5) scale[0] = 0.5; if (scale[1] < 0.5) scale[1] = 0.5; - guardband_x = MIN2(guardband_x, (max_range - abs(translate[0])) / scale[0]); - guardband_y = MIN2(guardband_y, (max_range - abs(translate[1])) / scale[1]); + guardband_x = MIN2(guardband_x, (max_range - fabsf(translate[0])) / scale[0]); + guardband_y = MIN2(guardband_y, (max_range - fabsf(translate[1])) / scale[1]); radeon_emit(cs, S_028250_TL_X(scissor.offset.x) | S_028250_TL_Y(scissor.offset.y) | @@ -698,7 +699,7 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, * counters) must immediately precede every timestamp event to * prevent a GPU hang on GFX9. */ - if (chip_class == GFX9) { + if (chip_class == GFX9 && !is_mec) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); radeon_emit(cs, gfx9_eop_bug_va); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index c94c0f339fd..149c2562187 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -673,7 +673,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, if (!cs->num_buffers) continue; - if (unique_bo_count == 0) { + if (unique_bo_count == 0 && !cs->num_virtual_buffers) { memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle)); unique_bo_count = cs->num_buffers; continue; diff --git a/src/broadcom/Android.cle.mk b/src/broadcom/Android.cle.mk index 9b728424fa1..5634a8d4ad3 100644 --- a/src/broadcom/Android.cle.mk +++ b/src/broadcom/Android.cle.mk @@ -29,6 +29,10 @@ LOCAL_SRC_FILES := $(BROADCOM_DECODER_FILES) LOCAL_STATIC_LIBRARIES := libmesa_broadcom_genxml +LOCAL_C_INCLUDES += $(MESA_TOP)/src/gallium/include + +LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH) + LOCAL_SHARED_LIBRARIES := libexpat libz include $(MESA_COMMON_MK) diff --git a/src/broadcom/Android.genxml.mk b/src/broadcom/Android.genxml.mk index eb5d142fe09..91e0de05d98 100644 --- a/src/broadcom/Android.genxml.mk +++ b/src/broadcom/Android.genxml.mk @@ -39,7 +39,7 @@ $(intermediates)/dummy.c: # This is the list of auto-generated files headers LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/broadcom/, $(BROADCOM_GENXML_GENERATED_FILES)) -define header-gen +define pack-header-gen @mkdir -p $(dir $@) @echo "Gen Header: $(PRIVATE_MODULE) <= $(notdir $(@))" $(hide) $(PRIVATE_SCRIPT) $(PRIVATE_SCRIPT_FLAGS) $(PRIVATE_XML) $(PRIVATE_VER) > $@ @@ -49,25 +49,25 @@ $(intermediates)/broadcom/cle/v3d_packet_v21_pack.h: PRIVATE_SCRIPT := $(MESA_PY $(intermediates)/broadcom/cle/v3d_packet_v21_pack.h: PRIVATE_XML := $(LOCAL_PATH)/cle/v3d_packet_v21.xml $(intermediates)/broadcom/cle/v3d_packet_v21_pack.h: PRIVATE_VER := 21 $(intermediates)/broadcom/cle/v3d_packet_v21_pack.h: $(LOCAL_PATH)/cle/v3d_packet_v21.xml $(LOCAL_PATH)/cle/gen_pack_header.py - $(call header-gen) + $(call pack-header-gen) $(intermediates)/broadcom/cle/v3d_packet_v33_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/cle/gen_pack_header.py $(intermediates)/broadcom/cle/v3d_packet_v33_pack.h: PRIVATE_XML := $(LOCAL_PATH)/cle/v3d_packet_v33.xml $(intermediates)/broadcom/cle/v3d_packet_v33_pack.h: PRIVATE_VER := 33 $(intermediates)/broadcom/cle/v3d_packet_v33_pack.h: $(LOCAL_PATH)/cle/v3d_packet_v33.xml $(LOCAL_PATH)/cle/gen_pack_header.py - $(call header-gen) + $(call pack-header-gen) $(intermediates)/broadcom/cle/v3d_packet_v41_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/cle/gen_pack_header.py $(intermediates)/broadcom/cle/v3d_packet_v41_pack.h: PRIVATE_XML := $(LOCAL_PATH)/cle/v3d_packet_v33.xml $(intermediates)/broadcom/cle/v3d_packet_v41_pack.h: PRIVATE_VER := 41 $(intermediates)/broadcom/cle/v3d_packet_v41_pack.h: $(LOCAL_PATH)/cle/v3d_packet_v33.xml $(LOCAL_PATH)/cle/gen_pack_header.py - $(call header-gen) + $(call pack-header-gen) $(intermediates)/broadcom/cle/v3d_packet_v42_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/cle/gen_pack_header.py $(intermediates)/broadcom/cle/v3d_packet_v42_pack.h: PRIVATE_XML := $(LOCAL_PATH)/cle/v3d_packet_v33.xml $(intermediates)/broadcom/cle/v3d_packet_v42_pack.h: PRIVATE_VER := 42 $(intermediates)/broadcom/cle/v3d_packet_v42_pack.h: $(LOCAL_PATH)/cle/v3d_packet_v33.xml $(LOCAL_PATH)/cle/gen_pack_header.py - $(call header-gen) + $(call pack-header-gen) $(intermediates)/broadcom/cle/v3d_xml.h: $(addprefix $(MESA_TOP)/src/broadcom/,$(BROADCOM_GENXML_XML_FILES)) $(MESA_TOP)/src/intel/genxml/gen_zipped_file.py @mkdir -p $(dir $@) diff --git a/src/broadcom/cle/v3d_packet_v33.xml b/src/broadcom/cle/v3d_packet_v33.xml index 6ce8299e26b..f471d542c56 100644 --- a/src/broadcom/cle/v3d_packet_v33.xml +++ b/src/broadcom/cle/v3d_packet_v33.xml @@ -528,6 +528,16 @@ + + + + + + + + + + diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h index 5685c7a2161..b0a2a02154c 100644 --- a/src/broadcom/common/v3d_device_info.h +++ b/src/broadcom/common/v3d_device_info.h @@ -27,13 +27,14 @@ #include /** - * Struct for tracking features of the V3D chip. This is where we'll store - * boolean flags for features in a specific version, but for now it's just the - * version + * Struct for tracking features of the V3D chip across driver and compiler. */ struct v3d_device_info { /** Simple V3D version: major * 10 + minor */ uint8_t ver; + + /** Size of the VPM, in bytes. */ + int vpm_size; }; #endif diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index fb5ecd6410c..4f3b621fd29 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -462,6 +462,7 @@ struct choose_scoreboard { int last_magic_sfu_write_tick; int last_ldvary_tick; int last_uniforms_reset_tick; + int last_thrsw_tick; bool tlb_locked; }; @@ -1095,10 +1096,16 @@ qpu_instruction_valid_in_thrend_slot(struct v3d_compile *c, } static bool -valid_thrsw_sequence(struct v3d_compile *c, +valid_thrsw_sequence(struct v3d_compile *c, struct choose_scoreboard *scoreboard, struct qinst *qinst, int instructions_in_sequence, bool is_thrend) { + /* No emitting our thrsw while the previous thrsw hasn't happened yet. */ + if (scoreboard->last_thrsw_tick + 3 > + scoreboard->tick - instructions_in_sequence) { + return false; + } + for (int slot = 0; slot < instructions_in_sequence; slot++) { /* No scheduling SFU when the result would land in the other * thread. The simulator complains for safety, though it @@ -1159,7 +1166,8 @@ emit_thrsw(struct v3d_compile *c, if (!v3d_qpu_sig_pack(c->devinfo, &sig, &packed_sig)) break; - if (!valid_thrsw_sequence(c, prev_inst, slots_filled + 1, + if (!valid_thrsw_sequence(c, scoreboard, + prev_inst, slots_filled + 1, is_thrend)) { break; } @@ -1173,7 +1181,9 @@ emit_thrsw(struct v3d_compile *c, if (merge_inst) { merge_inst->qpu.sig.thrsw = true; needs_free = true; + scoreboard->last_thrsw_tick = scoreboard->tick - slots_filled; } else { + scoreboard->last_thrsw_tick = scoreboard->tick; insert_scheduled_instruction(c, block, scoreboard, inst); time++; slots_filled++; @@ -1475,6 +1485,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c) scoreboard.last_ldvary_tick = -10; scoreboard.last_magic_sfu_write_tick = -10; scoreboard.last_uniforms_reset_tick = -10; + scoreboard.last_thrsw_tick = -10; if (debug) { fprintf(stderr, "Pre-schedule instructions\n"); diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 33a9942734d..070e6a3aa59 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -648,6 +648,9 @@ struct v3d_vs_prog_data { /* Total number of components written, for the shader state record. */ uint32_t vpm_output_size; + + /* Value to be programmed in VCM_CACHE_SIZE. */ + uint8_t vcm_cache_size; }; struct v3d_fs_prog_data { @@ -928,7 +931,7 @@ VIR_A_ALU2(OR) VIR_A_ALU2(XOR) VIR_A_ALU2(VADD) VIR_A_ALU2(VSUB) -VIR_A_ALU2(STVPMV) +VIR_A_NODST_2(STVPMV) VIR_A_ALU1(NOT) VIR_A_ALU1(NEG) VIR_A_ALU1(FLAPUSH) diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 86379faa5bb..6b55b0e03bc 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -452,6 +452,16 @@ vir_emit_def(struct v3d_compile *c, struct qinst *inst) { assert(inst->dst.file == QFILE_NULL); + /* If we're emitting an instruction that's a def, it had better be + * writing a register. + */ + if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) { + assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP || + v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op)); + assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP || + v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op)); + } + inst->dst = vir_get_temp(c); if (inst->dst.file == QFILE_TEMP) @@ -746,10 +756,29 @@ uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler, if (prog_data->uses_iid) prog_data->vpm_input_size++; - /* Input/output segment size are in 8x32-bit multiples. */ + /* Input/output segment size are in sectors (8 rows of 32 bits per + * channel). + */ prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8; prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8; + /* Compute VCM cache size. We set up our program to take up less than + * half of the VPM, so that any set of bin and render programs won't + * run out of space. We need space for at least one input segment, + * and then allocate the rest to output segments (one for the current + * program, the rest to VCM). The valid range of the VCM cache size + * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4 + * batches. + */ + assert(c->devinfo->vpm_size); + int sector_size = 16 * sizeof(uint32_t) * 8; + int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size; + int half_vpm = vpm_size_in_sectors / 2; + int vpm_output_sectors = half_vpm - prog_data->vpm_input_size; + int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size; + assert(vpm_output_batches >= 2); + prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4); + return v3d_return_qpu_insts(c, final_assembly_size); } diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c index 5a856acd7ed..61d273575d9 100644 --- a/src/broadcom/compiler/vir_register_allocate.c +++ b/src/broadcom/compiler/vir_register_allocate.c @@ -94,6 +94,15 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g, } } + /* Refuse to spill a ldvary's dst, because that means + * that ldvary's r5 would end up being used across a + * thrsw. + */ + if (inst->qpu.sig.ldvary) { + assert(inst->dst.file == QFILE_TEMP); + BITSET_CLEAR(c->spillable, inst->dst.index); + } + if (inst->is_last_thrsw) started_last_seg = true; @@ -102,7 +111,7 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g, started_last_seg = true; /* Track when we're in between a TMU setup and the - * final LDTMU from that TMU setup. We can't + * final LDTMU or TMUWT from that TMU setup. We can't * spill/fill any temps during that time, because that * involves inserting a new TMU setup/LDTMU sequence. */ @@ -110,6 +119,10 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g, is_last_ldtmu(inst, block)) in_tmu_operation = false; + if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU && + inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) + in_tmu_operation = false; + if (v3d_qpu_writes_tmu(&inst->qpu)) in_tmu_operation = true; } @@ -206,6 +219,7 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp) inst->dst); v3d_emit_spill_tmua(c, spill_offset); vir_emit_thrsw(c); + vir_TMUWT(c); c->spills++; } diff --git a/src/compiler/Android.glsl.mk b/src/compiler/Android.glsl.mk index 0aabafa2673..37b3cb80251 100644 --- a/src/compiler/Android.glsl.mk +++ b/src/compiler/Android.glsl.mk @@ -48,7 +48,7 @@ LOCAL_STATIC_LIBRARIES := \ libmesa_nir LOCAL_MODULE := libmesa_glsl - +LOCAL_CFLAGS += -Wno-error include $(LOCAL_PATH)/Android.glsl.gen.mk include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/compiler/Android.nir.mk b/src/compiler/Android.nir.mk index 75a247a245d..59da5dbdc1c 100644 --- a/src/compiler/Android.nir.mk +++ b/src/compiler/Android.nir.mk @@ -41,6 +41,9 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary +LOCAL_CFLAGS := \ + -Wno-missing-braces + LOCAL_STATIC_LIBRARIES := libmesa_compiler LOCAL_MODULE := libmesa_nir diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 74160ec142b..d3f7a0fbdd6 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -1683,6 +1683,12 @@ ast_expression::do_hir(exec_list *instructions, op[1] = this->subexpressions[1]->hir(instructions, state); orig_type = op[0]->type; + + /* Break out if operand types were not parsed successfully. */ + if ((op[0]->type == glsl_type::error_type || + op[1]->type == glsl_type::error_type)) + break; + type = arithmetic_result_type(op[0], op[1], (this->oper == ast_mul_assign), state, & loc); @@ -1928,6 +1934,11 @@ ast_expression::do_hir(exec_list *instructions, error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); + if (error_emitted) { + result = ir_rvalue::error_value(ctx); + break; + } + type = arithmetic_result_type(op[0], op[1], false, state, & loc); ir_rvalue *temp_rhs; diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l b/src/compiler/glsl/glcpp/glcpp-lex.l index 9cfcc120222..f7003da0cc8 100644 --- a/src/compiler/glsl/glcpp/glcpp-lex.l +++ b/src/compiler/glsl/glcpp/glcpp-lex.l @@ -289,6 +289,7 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? * token. */ if (parser->first_non_space_token_this_line) { BEGIN HASH; + yyextra->in_define = false; } RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN); @@ -336,43 +337,55 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? /* For the pre-processor directives, we return these tokens * even when we are otherwise skipping. */ ifdef { - BEGIN INITIAL; - yyextra->lexing_directive = 1; - yyextra->space_tokens = 0; - RETURN_TOKEN_NEVER_SKIP (IFDEF); + if (!yyextra->in_define) { + BEGIN INITIAL; + yyextra->lexing_directive = 1; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (IFDEF); + } } ifndef { - BEGIN INITIAL; - yyextra->lexing_directive = 1; - yyextra->space_tokens = 0; - RETURN_TOKEN_NEVER_SKIP (IFNDEF); + if (!yyextra->in_define) { + BEGIN INITIAL; + yyextra->lexing_directive = 1; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (IFNDEF); + } } if/[^_a-zA-Z0-9] { - BEGIN INITIAL; - yyextra->lexing_directive = 1; - yyextra->space_tokens = 0; - RETURN_TOKEN_NEVER_SKIP (IF); + if (!yyextra->in_define) { + BEGIN INITIAL; + yyextra->lexing_directive = 1; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (IF); + } } elif/[^_a-zA-Z0-9] { - BEGIN INITIAL; - yyextra->lexing_directive = 1; - yyextra->space_tokens = 0; - RETURN_TOKEN_NEVER_SKIP (ELIF); + if (!yyextra->in_define) { + BEGIN INITIAL; + yyextra->lexing_directive = 1; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (ELIF); + } } else { - BEGIN INITIAL; - yyextra->space_tokens = 0; - RETURN_TOKEN_NEVER_SKIP (ELSE); + if (!yyextra->in_define) { + BEGIN INITIAL; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (ELSE); + } } endif { - BEGIN INITIAL; - yyextra->space_tokens = 0; - RETURN_TOKEN_NEVER_SKIP (ENDIF); + if (!yyextra->in_define) { + BEGIN INITIAL; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (ENDIF); + } } error[^\r\n]* { @@ -399,7 +412,8 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? * and not whitespace). This will generate an error. */ define{HSPACE}* { - if (! parser->skipping) { + yyextra->in_define = true; + if (!parser->skipping) { BEGIN DEFINE; yyextra->space_tokens = 0; RETURN_TOKEN (DEFINE_TOKEN); @@ -420,8 +434,10 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? /* This will catch any non-directive garbage after a HASH */ {NONSPACE} { - BEGIN INITIAL; - RETURN_TOKEN (GARBAGE); + if (!parser->skipping) { + BEGIN INITIAL; + RETURN_TOKEN (GARBAGE); + } } /* An identifier immediately followed by '(' */ diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 4be5cfa3d54..c951d9526ac 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -224,10 +224,12 @@ expanded_line: glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); _glcpp_parser_skip_stack_change_if (parser, & @1, "elif", $2.value); } -| LINE_EXPANDED integer_constant NEWLINE { +| LINE_EXPANDED expression NEWLINE { + if (parser->is_gles && $2.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); parser->has_new_line_number = 1; - parser->new_line_number = $2; - _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2); + parser->new_line_number = $2.value; + _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2.value); } | LINE_EXPANDED integer_constant integer_constant NEWLINE { parser->has_new_line_number = 1; @@ -238,6 +240,17 @@ expanded_line: "#line %" PRIiMAX " %" PRIiMAX "\n", $2, $3); } +| LINE_EXPANDED '(' expression ')' '(' expression ')' NEWLINE { + if (parser->is_gles && $3.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $3.undefined_macro); + if (parser->is_gles && $6.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $6.undefined_macro); + parser->has_new_line_number = 1; + parser->new_line_number = $3.value; + parser->has_new_source_number = 1; + parser->new_source_number = $6.value; + _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX " %" PRIiMAX "\n", $3.value, $6.value); + } ; define: @@ -1074,6 +1087,20 @@ _token_list_equal_ignoring_space(token_list_t *a, token_list_t *b) while (1) { + if (node_a == NULL && node_b == NULL) + break; + + /* Ignore trailing whitespace */ + if (node_a == NULL && node_b->token->type == SPACE) { + while (node_b && node_b->token->type == SPACE) + node_b = node_b->next; + } + + if (node_b == NULL && node_a->token->type == SPACE) { + while (node_a && node_a->token->type == SPACE) + node_a = node_a->next; + } + if (node_a == NULL && node_b == NULL) break; diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h index c7e382ed30c..e786b24b132 100644 --- a/src/compiler/glsl/glcpp/glcpp.h +++ b/src/compiler/glsl/glcpp/glcpp.h @@ -197,6 +197,7 @@ struct glcpp_parser { int first_non_space_token_this_line; int newline_as_space; int in_control_line; + bool in_define; int paren_count; int commented_newlines; skip_node_t *skip_stack; diff --git a/src/compiler/glsl/glcpp/meson.build b/src/compiler/glsl/glcpp/meson.build index 09d44ddd687..287da35006d 100644 --- a/src/compiler/glsl/glcpp/meson.build +++ b/src/compiler/glsl/glcpp/meson.build @@ -64,8 +64,9 @@ if with_tests foreach m : modes test( 'glcpp test (@0@)'.format(m), - find_program('tests/glcpp_test.py'), + prog_python2, args : [ + join_paths(meson.current_source_dir(), 'tests/glcpp_test.py'), glcpp, join_paths(meson.current_source_dir(), 'tests'), '--@0@'.format(m), ], diff --git a/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c index ae7ea09f67e..2b084e0960a 100644 --- a/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c +++ b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c @@ -2,6 +2,7 @@ #define TWO ( 1+1 ) #define FOUR (2 + 2) #define SIX (3 + 3) +#define EIGHT (8 + 8) /* Redefinitions with whitespace in same places, but different amounts, (so no * error). */ @@ -9,6 +10,9 @@ #define FOUR (2 + 2) #define SIX (3/*comment is whitespace*/+ /* collapsed */ /* to */ /* one */ /* space */ 3) +/* Trailing whitespace (no error) */ +#define EIGHT (8 + 8) + /* Redefinitions with whitespace in different places. Each of these should * trigger an error. */ #define TWO (1 + 1) diff --git a/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected index 602bdef94c2..766849e34a9 100644 --- a/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected +++ b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected @@ -1,14 +1,15 @@ -0:14(9): preprocessor error: Redefinition of macro TWO +0:18(9): preprocessor error: Redefinition of macro TWO -0:15(9): preprocessor error: Redefinition of macro FOUR +0:19(9): preprocessor error: Redefinition of macro FOUR -0:16(9): preprocessor error: Redefinition of macro SIX +0:20(9): preprocessor error: Redefinition of macro SIX + @@ -18,5 +19,8 @@ + + + diff --git a/src/compiler/glsl/glcpp/tests/glcpp_test.py b/src/compiler/glsl/glcpp/tests/glcpp_test.py old mode 100755 new mode 100644 index 8ac5d7cb0a1..8c7552124a6 --- a/src/compiler/glsl/glcpp/tests/glcpp_test.py +++ b/src/compiler/glsl/glcpp/tests/glcpp_test.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python2 # encoding=utf-8 # Copyright © 2018 Intel Corporation diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h index 67b38f48eff..d05d1998a50 100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@ -667,8 +667,8 @@ class ir_variable : public ir_instruction { * variable has been used. For example, it is an error to redeclare a * variable as invariant after it has been used. * - * This is only maintained in the ast_to_hir.cpp path, not in - * Mesa's fixed function or ARB program paths. + * This is maintained in the ast_to_hir.cpp path and during linking, + * but not in Mesa's fixed function or ARB program paths. */ unsigned used:1; diff --git a/src/compiler/glsl/ir_constant_expression.cpp b/src/compiler/glsl/ir_constant_expression.cpp index 4a0aff72c6f..c9788c70535 100644 --- a/src/compiler/glsl/ir_constant_expression.cpp +++ b/src/compiler/glsl/ir_constant_expression.cpp @@ -826,7 +826,7 @@ ir_dereference_array::constant_expression_value(void *mem_ctx, const unsigned component = idx->value.u[0]; return new(mem_ctx) ir_constant(array, component); - } else { + } else if (array->type->is_array()) { const unsigned index = idx->value.u[0]; return array->get_array_element(index)->clone(mem_ctx, NULL); } diff --git a/src/compiler/glsl/link_interface_blocks.cpp b/src/compiler/glsl/link_interface_blocks.cpp index e5eca9460e3..801fbcd5d9f 100644 --- a/src/compiler/glsl/link_interface_blocks.cpp +++ b/src/compiler/glsl/link_interface_blocks.cpp @@ -417,9 +417,15 @@ validate_interstage_inout_blocks(struct gl_shader_program *prog, * write to any of the pre-defined outputs (e.g. if the vertex shader * does not write to gl_Position, etc), which is allowed and results in * undefined behavior. + * + * From Section 4.3.4 (Inputs) of the GLSL 1.50 spec: + * + * "Only the input variables that are actually read need to be written + * by the previous stage; it is allowed to have superfluous + * declarations of input variables." */ if (producer_def == NULL && - !is_builtin_gl_in_block(var, consumer->Stage)) { + !is_builtin_gl_in_block(var, consumer->Stage) && var->data.used) { linker_error(prog, "Input block `%s' is not an output of " "the previous stage\n", var->get_interface_type()->name); return; diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 3ce78fe6428..b7260aca4ec 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -2187,6 +2187,41 @@ link_cs_input_layout_qualifiers(struct gl_shader_program *prog, } } +/** + * Link all out variables on a single stage which are not + * directly used in a shader with the main function. + */ +static void +link_output_variables(struct gl_linked_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + struct glsl_symbol_table *symbols = linked_shader->symbols; + + for (unsigned i = 0; i < num_shaders; i++) { + + /* Skip shader object with main function */ + if (shader_list[i]->symbols->get_function("main")) + continue; + + foreach_in_list(ir_instruction, ir, shader_list[i]->ir) { + if (ir->ir_type != ir_type_variable) + continue; + + ir_variable *var = (ir_variable *) ir; + + if (var->data.mode == ir_var_shader_out && + !symbols->get_variable(var->name)) { + var = var->clone(linked_shader, NULL); + symbols->add_variable(var); + linked_shader->ir->push_head(var); + } + } + } + + return; +} + /** * Combine a group of shaders for a single stage to generate a linked shader @@ -2352,6 +2387,9 @@ link_intrastage_shaders(void *mem_ctx, return NULL; } + if (linked->Stage != MESA_SHADER_FRAGMENT) + link_output_variables(linked, shader_list, num_shaders); + /* Make a pass over all variable declarations to ensure that arrays with * unspecified sizes have a size specified. The size is inferred from the * max_array_access field. diff --git a/src/compiler/glsl/lower_vector_derefs.cpp b/src/compiler/glsl/lower_vector_derefs.cpp index 7583d1fdd3e..6cd9a2d819a 100644 --- a/src/compiler/glsl/lower_vector_derefs.cpp +++ b/src/compiler/glsl/lower_vector_derefs.cpp @@ -59,8 +59,7 @@ vector_deref_visitor::visit_enter(ir_assignment *ir) if (!deref->array->type->is_vector()) return ir_rvalue_enter_visitor::visit_enter(ir); - ir_dereference *const new_lhs = (ir_dereference *) deref->array; - ir->set_lhs(new_lhs); + ir_rvalue *const new_lhs = deref->array; void *mem_ctx = ralloc_parent(ir); ir_constant *old_index_constant = @@ -72,8 +71,16 @@ vector_deref_visitor::visit_enter(ir_assignment *ir) ir->rhs, deref->array_index); ir->write_mask = (1 << new_lhs->type->vector_elements) - 1; + ir->set_lhs(new_lhs); + } else if (new_lhs->ir_type != ir_type_swizzle) { + ir->set_lhs(new_lhs); + ir->write_mask = 1 << old_index_constant->get_uint_component(0); } else { - ir->write_mask = 1 << old_index_constant->get_int_component(0); + /* If the "new" LHS is a swizzle, use the set_lhs helper to instead + * swizzle the RHS. + */ + unsigned component[1] = { old_index_constant->get_uint_component(0) }; + ir->set_lhs(new(mem_ctx) ir_swizzle(new_lhs, component, 1)); } return ir_rvalue_enter_visitor::visit_enter(ir); diff --git a/src/compiler/glsl/tests/meson.build b/src/compiler/glsl/tests/meson.build index fc7b863a278..821760e962d 100644 --- a/src/compiler/glsl/tests/meson.build +++ b/src/compiler/glsl/tests/meson.build @@ -84,8 +84,10 @@ test( ) test( - 'glsl compiler warnings', find_program('warnings_test.py'), + 'glsl compiler warnings', + prog_python2, args : [ + join_paths(meson.current_source_dir(), 'warnings_test.py'), '--glsl-compiler', glsl_compiler, '--test-directory', join_paths( meson.source_root(), 'src', 'compiler', 'glsl', 'tests', 'warnings' @@ -94,6 +96,9 @@ test( ) test( 'glsl optimization', - find_program('optimization_test.py'), - args : ['--test-runner', glsl_test], + prog_python2, + args : [ + join_paths(meson.current_source_dir(), 'optimization_test.py'), + '--test-runner', glsl_test + ], ) diff --git a/src/compiler/glsl/tests/optimization_test.py b/src/compiler/glsl/tests/optimization_test.py old mode 100755 new mode 100644 index 577d2dfc20f..f8518a168e0 --- a/src/compiler/glsl/tests/optimization_test.py +++ b/src/compiler/glsl/tests/optimization_test.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python2 # encoding=utf-8 # Copyright © 2018 Intel Corporation diff --git a/src/compiler/glsl/tests/warnings_test.py b/src/compiler/glsl/tests/warnings_test.py old mode 100755 new mode 100644 index 2e0f23180f3..2c4fa5a0d5a --- a/src/compiler/glsl/tests/warnings_test.py +++ b/src/compiler/glsl/tests/warnings_test.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # encoding=utf-8 # Copyright © 2017 Intel Corporation diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index bca6a32c956..b7c8754b4cf 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2926,6 +2926,7 @@ bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); bool nir_lower_phis_to_regs_block(nir_block *block); bool nir_lower_ssa_defs_to_regs_block(nir_block *block); +bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); bool nir_opt_algebraic(nir_shader *shader); bool nir_opt_algebraic_before_ffma(nir_shader *shader); diff --git a/src/compiler/nir/nir_control_flow.c b/src/compiler/nir/nir_control_flow.c index 1622b35a6c9..0617c6a905d 100644 --- a/src/compiler/nir/nir_control_flow.c +++ b/src/compiler/nir/nir_control_flow.c @@ -444,6 +444,23 @@ nearest_loop(nir_cf_node *node) return nir_cf_node_as_loop(node); } +static void +remove_phi_src(nir_block *block, nir_block *pred) +{ + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_foreach_phi_src_safe(src, phi) { + if (src->pred == pred) { + list_del(&src->src.use_link); + exec_node_remove(&src->node); + } + } + } +} + /* * update the CFG after a jump instruction has been added to the end of a block */ @@ -454,6 +471,10 @@ nir_handle_add_jump(nir_block *block) nir_instr *instr = nir_block_last_instr(block); nir_jump_instr *jump_instr = nir_instr_as_jump(instr); + if (block->successors[0]) + remove_phi_src(block->successors[0], block); + if (block->successors[1]) + remove_phi_src(block->successors[1], block); unlink_block_successors(block); nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); @@ -477,23 +498,6 @@ nir_handle_add_jump(nir_block *block) } } -static void -remove_phi_src(nir_block *block, nir_block *pred) -{ - nir_foreach_instr(instr, block) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - nir_foreach_phi_src_safe(src, phi) { - if (src->pred == pred) { - list_del(&src->src.use_link); - exec_node_remove(&src->node); - } - } - } -} - /* Removes the successor of a block with a jump. Note that the jump to be * eliminated may be free-floating. */ diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c index c03acf83597..6f788ad1aa5 100644 --- a/src/compiler/nir/nir_deref.c +++ b/src/compiler/nir/nir_deref.c @@ -24,6 +24,7 @@ #include "nir.h" #include "nir_builder.h" #include "nir_deref.h" +#include "util/hash_table.h" void nir_deref_path_init(nir_deref_path *path, @@ -270,3 +271,135 @@ nir_fixup_deref_modes(nir_shader *shader) } } } + +struct rematerialize_deref_state { + bool progress; + nir_builder builder; + nir_block *block; + struct hash_table *cache; +}; + +static nir_deref_instr * +rematerialize_deref_in_block(nir_deref_instr *deref, + struct rematerialize_deref_state *state) +{ + if (deref->instr.block == state->block) + return deref; + + if (!state->cache) { + state->cache = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + } + + struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref); + if (cached) + return cached->data; + + nir_builder *b = &state->builder; + nir_deref_instr *new_deref = + nir_deref_instr_create(b->shader, deref->deref_type); + new_deref->mode = deref->mode; + new_deref->type = deref->type; + + if (deref->deref_type == nir_deref_type_var) { + new_deref->var = deref->var; + } else { + nir_deref_instr *parent = nir_src_as_deref(deref->parent); + if (parent) { + parent = rematerialize_deref_in_block(parent, state); + new_deref->parent = nir_src_for_ssa(&parent->dest.ssa); + } else { + nir_src_copy(&new_deref->parent, &deref->parent, new_deref); + } + } + + switch (deref->deref_type) { + case nir_deref_type_var: + case nir_deref_type_array_wildcard: + case nir_deref_type_cast: + /* Nothing more to do */ + break; + + case nir_deref_type_array: + assert(!nir_src_as_deref(deref->arr.index)); + nir_src_copy(&new_deref->arr.index, &deref->arr.index, new_deref); + break; + + case nir_deref_type_struct: + new_deref->strct.index = deref->strct.index; + break; + + default: + unreachable("Invalid deref instruction type"); + } + + nir_ssa_dest_init(&new_deref->instr, &new_deref->dest, + deref->dest.ssa.num_components, + deref->dest.ssa.bit_size, + deref->dest.ssa.name); + nir_builder_instr_insert(b, &new_deref->instr); + + return new_deref; +} + +static bool +rematerialize_deref_src(nir_src *src, void *_state) +{ + struct rematerialize_deref_state *state = _state; + + nir_deref_instr *deref = nir_src_as_deref(*src); + if (!deref) + return true; + + nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state); + if (block_deref != deref) { + nir_instr_rewrite_src(src->parent_instr, src, + nir_src_for_ssa(&block_deref->dest.ssa)); + nir_deref_instr_remove_if_unused(deref); + state->progress = true; + } + + return true; +} + +/** Re-materialize derefs in every block + * + * This pass re-materializes deref instructions in every block in which it is + * used. After this pass has been run, every use of a deref will be of a + * deref in the same block as the use. Also, all unused derefs will be + * deleted as a side-effect. + */ +bool +nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl) +{ + struct rematerialize_deref_state state = { 0 }; + nir_builder_init(&state.builder, impl); + + nir_foreach_block(block, impl) { + state.block = block; + + /* Start each block with a fresh cache */ + if (state.cache) + _mesa_hash_table_clear(state.cache, NULL); + + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_deref) { + nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)); + continue; + } + + state.builder.cursor = nir_before_instr(instr); + nir_foreach_src(instr, rematerialize_deref_src, &state); + } + +#ifndef NDEBUG + nir_if *following_if = nir_block_get_following_if(block); + if (following_if) + assert(!nir_src_as_deref(following_if->condition)); +#endif + } + + _mesa_hash_table_destroy(state.cache, NULL); + + return state.progress; +} diff --git a/src/compiler/nir/nir_format_convert.h b/src/compiler/nir/nir_format_convert.h index 45532b74884..e5cc653c44e 100644 --- a/src/compiler/nir/nir_format_convert.h +++ b/src/compiler/nir/nir_format_convert.h @@ -191,7 +191,7 @@ nir_format_unpack_11f11f10f(nir_builder *b, nir_ssa_def *packed) { nir_ssa_def *chans[3]; chans[0] = nir_mask_shift(b, packed, 0x000007ff, 4); - chans[1] = nir_mask_shift(b, packed, 0x003ff100, -7); + chans[1] = nir_mask_shift(b, packed, 0x003ff800, -7); chans[2] = nir_mask_shift(b, packed, 0xffc00000, -17); for (unsigned i = 0; i < 3; i++) diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c index 1aa35509b11..413807ff28d 100644 --- a/src/compiler/nir/nir_from_ssa.c +++ b/src/compiler/nir/nir_from_ssa.c @@ -974,6 +974,12 @@ nir_lower_ssa_defs_to_regs_block(nir_block *block) mov->dest.dest = nir_dest_for_reg(reg); mov->dest.write_mask = (1 << reg->num_components) - 1; nir_instr_insert(nir_after_instr(&load->instr), &mov->instr); + } else if (instr->type == nir_instr_type_deref) { + /* Derefs should always be SSA values, don't rewrite them. */ + nir_deref_instr *deref = nir_instr_as_deref(instr); + nir_foreach_use_safe(use, &deref->dest.ssa) + assert(use->parent_instr->block == block); + assert(list_empty(&deref->dest.ssa.if_uses)); } else { nir_foreach_dest(instr, dest_replace_ssa_with_reg, &state); } diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c index 85712a7cb1c..6e6655dfc9d 100644 --- a/src/compiler/nir/nir_linking_helpers.c +++ b/src/compiler/nir/nir_linking_helpers.c @@ -176,9 +176,12 @@ nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) } static uint8_t -get_interp_type(nir_variable *var, bool default_to_smooth_interp) +get_interp_type(nir_variable *var, const struct glsl_type *type, + bool default_to_smooth_interp) { - if (var->data.interpolation != INTERP_MODE_NONE) + if (glsl_type_is_integer(type)) + return INTERP_MODE_FLAT; + else if (var->data.interpolation != INTERP_MODE_NONE) return var->data.interpolation; else if (default_to_smooth_interp) return INTERP_MODE_SMOOTH; @@ -233,7 +236,7 @@ get_slot_component_masks_and_interp_types(struct exec_list *var_list, unsigned comps_slot2 = 0; for (unsigned i = 0; i < slots; i++) { interp_type[location + i] = - get_interp_type(var, default_to_smooth_interp); + get_interp_type(var, type, default_to_smooth_interp); interp_loc[location + i] = get_interp_loc(var); if (dual_slot) { @@ -405,7 +408,7 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps, continue; bool found_new_offset = false; - uint8_t interp = get_interp_type(var, default_to_smooth_interp); + uint8_t interp = get_interp_type(var, type, default_to_smooth_interp); for (; cursor[interp] < 32; cursor[interp]++) { uint8_t cursor_used_comps = comps[cursor[interp]]; diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index 742c8d8ee66..0be3aba9456 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -107,11 +107,11 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) if (!b->shader->options->lower_pack_half_2x16) return false; + nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, instr, 0); + nir_ssa_def *val = - nir_pack_half_2x16_split(b, nir_channel(b, instr->src[0].src.ssa, - instr->src[0].swizzle[0]), - nir_channel(b, instr->src[0].src.ssa, - instr->src[0].swizzle[1])); + nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0), + nir_channel(b, src_vec2, 1)); nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); nir_instr_remove(&instr->instr); @@ -130,9 +130,11 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) if (!b->shader->options->lower_unpack_half_2x16) return false; + nir_ssa_def *packed = nir_ssa_for_alu_src(b, instr, 0); + nir_ssa_def *comps[2]; - comps[0] = nir_unpack_half_2x16_split_x(b, instr->src[0].src.ssa); - comps[1] = nir_unpack_half_2x16_split_y(b, instr->src[0].src.ssa); + comps[0] = nir_unpack_half_2x16_split_x(b, packed); + comps[1] = nir_unpack_half_2x16_split_y(b, packed); nir_ssa_def *vec = nir_vec(b, comps, 2); nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec)); @@ -144,8 +146,8 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) assert(b->shader->options->lower_pack_snorm_2x16 || b->shader->options->lower_pack_unorm_2x16); - nir_ssa_def *word = - nir_extract_u16(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); + nir_ssa_def *word = nir_extract_u16(b, nir_ssa_for_alu_src(b, instr, 0), + nir_imm_int(b, 0)); nir_ssa_def *val = nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)), nir_channel(b, word, 0)); @@ -159,8 +161,8 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) assert(b->shader->options->lower_pack_snorm_4x8 || b->shader->options->lower_pack_unorm_4x8); - nir_ssa_def *byte = - nir_extract_u8(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); + nir_ssa_def *byte = nir_extract_u8(b, nir_ssa_for_alu_src(b, instr, 0), + nir_imm_int(b, 0)); nir_ssa_def *val = nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)), nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))), @@ -173,14 +175,15 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) } case nir_op_fdph: { + nir_ssa_def *src0_vec = nir_ssa_for_alu_src(b, instr, 0); + nir_ssa_def *src1_vec = nir_ssa_for_alu_src(b, instr, 1); + nir_ssa_def *sum[4]; for (unsigned i = 0; i < 3; i++) { - sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa, - instr->src[0].swizzle[i]), - nir_channel(b, instr->src[1].src.ssa, - instr->src[1].swizzle[i])); + sum[i] = nir_fmul(b, nir_channel(b, src0_vec, i), + nir_channel(b, src1_vec, i)); } - sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]); + sum[3] = nir_channel(b, src1_vec, 3); nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]), nir_fadd(b, sum[2], sum[3])); diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 7fc4ff1d407..19526d86d43 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -462,13 +462,13 @@ ('unpack_64_2x32_split_y', a)), a), # Byte extraction - (('ushr', a, 24), ('extract_u8', a, 3), '!options->lower_extract_byte'), + (('ushr', 'a@32', 24), ('extract_u8', a, 3), '!options->lower_extract_byte'), (('iand', 0xff, ('ushr', a, 16)), ('extract_u8', a, 2), '!options->lower_extract_byte'), (('iand', 0xff, ('ushr', a, 8)), ('extract_u8', a, 1), '!options->lower_extract_byte'), (('iand', 0xff, a), ('extract_u8', a, 0), '!options->lower_extract_byte'), # Word extraction - (('ushr', a, 16), ('extract_u16', a, 1), '!options->lower_extract_word'), + (('ushr', 'a@32', 16), ('extract_u16', a, 1), '!options->lower_extract_word'), (('iand', 0xffff, a), ('extract_u16', a, 0), '!options->lower_extract_word'), # Subtracts diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index b3d0bf1decb..d8e03d6ccbb 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -180,6 +180,13 @@ opt_peel_loop_initial_if(nir_loop *loop) } } + /* We're about to re-arrange a bunch of blocks so make sure that we don't + * have deref uses which cross block boundaries. We don't want a deref + * accidentally ending up in a phi. + */ + nir_rematerialize_derefs_in_use_blocks_impl( + nir_cf_node_get_function(&loop->cf_node)); + /* Before we do anything, convert the loop to LCSSA. We're about to * replace a bunch of SSA defs with registers and this will prevent any of * it from leaking outside the loop. @@ -423,12 +430,6 @@ nir_opt_if(nir_shader *shader) */ nir_lower_regs_to_ssa_impl(function->impl); - /* Calling nir_convert_loop_to_lcssa() in opt_peel_loop_initial_if() - * adds extra phi nodes which may not be valid if they're used for - * something such as a deref. Remove any unneeded phis. - */ - nir_opt_remove_phis_impl(function->impl); - progress = true; } } diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c index 955dfede694..161c4ba04e9 100644 --- a/src/compiler/nir/nir_opt_loop_unroll.c +++ b/src/compiler/nir/nir_opt_loop_unroll.c @@ -49,6 +49,9 @@ static void loop_prepare_for_unroll(nir_loop *loop) { + nir_rematerialize_derefs_in_use_blocks_impl( + nir_cf_node_get_function(&loop->cf_node)); + nir_convert_loop_to_lcssa(loop); /* Lower phis at the top level of the loop body */ diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index c8a29404969..e00273995db 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -279,6 +279,11 @@ glsl_type_is_boolean(const struct glsl_type *type) { return type->is_boolean(); } +bool +glsl_type_is_integer(const struct glsl_type *type) +{ + return type->is_integer(); +} const glsl_type * glsl_void_type(void) diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index db3a4dee2d9..7db32e3e008 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -139,6 +139,7 @@ bool glsl_type_is_image(const struct glsl_type *type); bool glsl_type_is_dual_slot(const struct glsl_type *type); bool glsl_type_is_numeric(const struct glsl_type *type); bool glsl_type_is_boolean(const struct glsl_type *type); +bool glsl_type_is_integer(const struct glsl_type *type); bool glsl_sampler_type_is_shadow(const struct glsl_type *type); bool glsl_sampler_type_is_array(const struct glsl_type *type); bool glsl_contains_atomic(const struct glsl_type *type); diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 32ebdd78a1f..688b33e6607 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1771,11 +1771,17 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, nir_const_value src[4]; for (unsigned i = 0; i < count - 4; i++) { - nir_constant *c = - vtn_value(b, w[4 + i], vtn_value_type_constant)->constant; + struct vtn_value *src_val = + vtn_value(b, w[4 + i], vtn_value_type_constant); + + /* If this is an unsized source, pull the bit size from the + * source; otherwise, we'll use the bit size from the destination. + */ + if (!nir_alu_type_get_type_size(nir_op_infos[op].input_types[i])) + bit_size = glsl_get_bit_size(src_val->type->type); unsigned j = swap ? 1 - i : i; - src[j] = c->values[0]; + src[j] = src_val->constant->values[0]; } val->constant->values[0] = diff --git a/src/egl/Android.mk b/src/egl/Android.mk index 11128ded93c..e775b176082 100644 --- a/src/egl/Android.mk +++ b/src/egl/Android.mk @@ -46,7 +46,10 @@ LOCAL_CFLAGS := \ LOCAL_C_INCLUDES := \ $(MESA_TOP)/include/drm-uapi \ $(MESA_TOP)/src/egl/main \ - $(MESA_TOP)/src/egl/drivers/dri2 + $(MESA_TOP)/src/egl/drivers/dri2 \ + frameworks/native/libs/nativebase/include \ + frameworks/native/libs/nativewindow/include \ + frameworks/native/libs/arect/include LOCAL_STATIC_LIBRARIES := \ libmesa_util \ @@ -65,6 +68,10 @@ ifeq ($(BOARD_USES_DRM_GRALLOC),true) LOCAL_SHARED_LIBRARIES += libgralloc_drm endif +ifeq ($(strip $(BOARD_USES_GRALLOC1)),true) +LOCAL_CFLAGS += -DHAVE_GRALLOC1 +endif + ifeq ($(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5 6 7),) LOCAL_SHARED_LIBRARIES += libnativewindow endif @@ -80,8 +87,12 @@ ifneq ($(MESA_BUILD_GALLIUM),) LOCAL_REQUIRED_MODULES += gallium_dri endif +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +LOCAL_HEADER_LIBRARIES += libnativebase_headers +endif + LOCAL_MODULE := libGLES_mesa LOCAL_MODULE_RELATIVE_PATH := egl - +LOCAL_CFLAGS += -Wno-error include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index c3024795a10..81337d20920 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -65,6 +65,38 @@ #include "util/u_vector.h" #include "mapi/glapi/glapi.h" +/* The kernel header drm_fourcc.h defines the DRM formats below. We duplicate + * some of the definitions here so that building Mesa won't bleeding-edge + * kernel headers. + */ +#ifndef DRM_FORMAT_R8 +#define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ +#endif + +#ifndef DRM_FORMAT_RG88 +#define DRM_FORMAT_RG88 fourcc_code('R', 'G', '8', '8') /* [15:0] R:G 8:8 little endian */ +#endif + +#ifndef DRM_FORMAT_GR88 +#define DRM_FORMAT_GR88 fourcc_code('G', 'R', '8', '8') /* [15:0] G:R 8:8 little endian */ +#endif + +#ifndef DRM_FORMAT_R16 +#define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R 16 little endian */ +#endif + +#ifndef DRM_FORMAT_GR1616 +#define DRM_FORMAT_GR1616 fourcc_code('G', 'R', '3', '2') /* [31:0] R:G 16:16 little endian */ +#endif + +#ifndef DRM_FORMAT_P010 +#define DRM_FORMAT_P010 fourcc_code('P', '0', '1', '0') /* 2x2 subsampled Cb:Cr plane 10 bits per channel */ +#endif + +#ifndef DRM_FORMAT_MOD_INVALID +#define DRM_FORMAT_MOD_INVALID ((1ULL<<56) - 1) +#endif + #define NUM_ATTRIBS 12 static void @@ -669,7 +701,7 @@ dri2_setup_screen(_EGLDisplay *disp) dri2_renderer_query_integer(dri2_dpy, __DRI2_RENDERER_HAS_CONTEXT_PRIORITY); - disp->Extensions.EXT_pixel_format_float = EGL_TRUE; + disp->Extensions.EXT_pixel_format_float = EGL_FALSE; if (dri2_renderer_query_integer(dri2_dpy, __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB)) @@ -2195,13 +2227,13 @@ dri2_check_dma_buf_attribs(const _EGLImageAttribs *attrs) return EGL_TRUE; } -/* Returns the total number of file descriptors. Zero indicates an error. */ +/* Returns the total number of planes for the format or zero if it isn't a + * valid fourcc format. + */ static unsigned -dri2_check_dma_buf_format(const _EGLImageAttribs *attrs) +dri2_num_fourcc_format_planes(EGLint format) { - unsigned plane_n; - - switch (attrs->DMABufFourCC.Value) { + switch (format) { case DRM_FORMAT_R8: case DRM_FORMAT_RG88: case DRM_FORMAT_GR88: @@ -2249,14 +2281,15 @@ dri2_check_dma_buf_format(const _EGLImageAttribs *attrs) case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: - plane_n = 1; - break; + return 1; + case DRM_FORMAT_NV12: case DRM_FORMAT_NV21: case DRM_FORMAT_NV16: case DRM_FORMAT_NV61: - plane_n = 2; - break; + case DRM_FORMAT_P010: + return 2; + case DRM_FORMAT_YUV410: case DRM_FORMAT_YVU410: case DRM_FORMAT_YUV411: @@ -2267,9 +2300,19 @@ dri2_check_dma_buf_format(const _EGLImageAttribs *attrs) case DRM_FORMAT_YVU422: case DRM_FORMAT_YUV444: case DRM_FORMAT_YVU444: - plane_n = 3; - break; + return 3; + default: + return 0; + } +} + +/* Returns the total number of file descriptors. Zero indicates an error. */ +static unsigned +dri2_check_dma_buf_format(const _EGLImageAttribs *attrs) +{ + unsigned plane_n = dri2_num_fourcc_format_planes(attrs->DMABufFourCC.Value); + if (plane_n == 0) { _eglError(EGL_BAD_ATTRIBUTE, "invalid format"); return 0; } @@ -2343,6 +2386,18 @@ dri2_query_dma_buf_formats(_EGLDriver *drv, _EGLDisplay *disp, formats, count)) return EGL_FALSE; + if (max > 0) { + /* Assert that all of the formats returned are actually fourcc formats. + * Some day, if we want the internal interface function to be able to + * return the fake fourcc formats defined in dri_interface.h, we'll have + * to do something more clever here to pair the list down to just real + * fourcc formats so that we don't leak the fake internal ones. + */ + for (int i = 0; i < *count; i++) { + assert(dri2_num_fourcc_format_planes(formats[i]) > 0); + } + } + return EGL_TRUE; } @@ -2353,6 +2408,9 @@ dri2_query_dma_buf_modifiers(_EGLDriver *drv, _EGLDisplay *disp, EGLint format, { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); + if (dri2_num_fourcc_format_planes(format) == 0) + return _eglError(EGL_BAD_PARAMETER, "invalid fourcc format"); + if (max < 0) return _eglError(EGL_BAD_PARAMETER, "invalid value for max count of formats"); diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index f8001ec4b66..3ce4a3e04ea 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -69,6 +69,10 @@ struct zwp_linux_dmabuf_v1; #include #endif /* HAVE_ANDROID_PLATFORM */ +#ifdef HAVE_GRALLOC1 +#include +#endif + #include "eglconfig.h" #include "eglcontext.h" #include "egldisplay.h" @@ -229,7 +233,14 @@ struct dri2_egl_display #endif #ifdef HAVE_ANDROID_PLATFORM - const gralloc_module_t *gralloc; + const hw_module_t *gralloc; + uint16_t gralloc_version; +#ifdef HAVE_GRALLOC1 + gralloc1_device_t *gralloc1_dvc; + GRALLOC1_PFN_LOCK_FLEX pfn_lockflex; + GRALLOC1_PFN_GET_FORMAT pfn_getFormat; + GRALLOC1_PFN_UNLOCK pfn_unlock; +#endif #endif bool is_render_node; @@ -290,6 +301,7 @@ struct dri2_egl_surface struct { #ifdef HAVE_WAYLAND_PLATFORM struct wl_buffer *wl_buffer; + bool wl_release; __DRIimage *dri_image; /* for is_different_gpu case. NULL else */ __DRIimage *linear_copy; diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c index cc16fd8118f..f6499ce2bb0 100644 --- a/src/egl/drivers/dri2/platform_android.c +++ b/src/egl/drivers/dri2/platform_android.c @@ -49,6 +49,8 @@ #define ALIGN(val, align) (((val) + (align) - 1) & ~((align) - 1)) +#define GRALLOC_DRM_GET_FORMAT 1 + struct droid_yuv_format { /* Lookup keys */ int native; /* HAL_PIXEL_FORMAT_ */ @@ -59,14 +61,26 @@ struct droid_yuv_format { int fourcc; /* __DRI_IMAGE_FOURCC_ */ }; +/* This enumeration can be deleted if Android defined it in + * system/core/include/system/graphics.h + */ +enum { + HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL = 0x100, + HAL_PIXEL_FORMAT_NV12 = 0x10F, + HAL_PIXEL_FORMAT_P010_INTEL = 0x110 +}; + /* The following table is used to look up a DRI image FourCC based * on native format and information contained in android_ycbcr struct. */ static const struct droid_yuv_format droid_yuv_formats[] = { /* Native format, YCrCb, Chroma step, DRI image FourCC */ { HAL_PIXEL_FORMAT_YCbCr_420_888, 0, 2, __DRI_IMAGE_FOURCC_NV12 }, + { HAL_PIXEL_FORMAT_P010_INTEL, 0, 4, __DRI_IMAGE_FOURCC_P010 }, { HAL_PIXEL_FORMAT_YCbCr_420_888, 0, 1, __DRI_IMAGE_FOURCC_YUV420 }, { HAL_PIXEL_FORMAT_YCbCr_420_888, 1, 1, __DRI_IMAGE_FOURCC_YVU420 }, { HAL_PIXEL_FORMAT_YV12, 1, 1, __DRI_IMAGE_FOURCC_YVU420 }, + { HAL_PIXEL_FORMAT_NV12, 0, 2, __DRI_IMAGE_FOURCC_NV12 }, + { HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL, 0, 2, __DRI_IMAGE_FOURCC_NV12 }, /* HACK: See droid_create_image_from_prime_fd() and * https://issuetracker.google.com/32077885. */ { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED, 0, 2, __DRI_IMAGE_FOURCC_NV12 }, @@ -248,6 +262,51 @@ droid_window_dequeue_buffer(struct dri2_egl_surface *dri2_surf) return EGL_TRUE; } +static int +droid_resolve_format(struct dri2_egl_display *dri2_dpy, + struct ANativeWindowBuffer *buf) +{ + int format = -1; + int ret; + + if (buf->format != HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED) + return buf->format; +#ifdef HAVE_GRALLOC1 + if(dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) { + + if (!dri2_dpy->pfn_getFormat) { + _eglLog(_EGL_WARNING, "Gralloc does not support getFormat"); + return -1; + } + ret = dri2_dpy->pfn_getFormat(dri2_dpy->gralloc1_dvc, buf->handle, + &format); + if (ret) { + _eglLog(_EGL_WARNING, "gralloc->getFormat failed: %d", ret); + return -1; + } + } else { +#else + const gralloc_module_t *gralloc0; + gralloc0 = dri2_dpy->gralloc; + + if (!gralloc0->perform) { + _eglLog(_EGL_WARNING, "gralloc->perform not supported"); + return -1; + } + ret = gralloc0->perform(dri2_dpy->gralloc, + GRALLOC_DRM_GET_FORMAT, + buf->handle, &format); + if (ret){ + _eglLog(_EGL_WARNING, "gralloc->perform failed with error: %d", ret); + return -1; + } +#endif +#ifdef HAVE_GRALLOC1 + } +#endif + return format; +} + static EGLBoolean droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf) { @@ -436,7 +495,7 @@ droid_swap_interval(_EGLDriver *drv, _EGLDisplay *dpy, struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf); struct ANativeWindow *window = dri2_surf->window; - if (window->setSwapInterval(window, interval)) + if (window && window->setSwapInterval(window, interval)) return EGL_FALSE; surf->SwapInterval = interval; @@ -622,11 +681,18 @@ droid_query_buffer_age(_EGLDriver *drv, { struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surface); + /* To avoid blocking other EGL calls, release the display mutex before + * we enter droid_window_dequeue_buffer() and re-acquire the mutex upon + * return. + */ + mtx_unlock(&disp->Mutex); if (update_buffers(dri2_surf) < 0) { _eglError(EGL_BAD_ALLOC, "droid_query_buffer_age"); + mtx_lock(&disp->Mutex); return -1; } + mtx_lock(&disp->Mutex); return dri2_surf->back ? dri2_surf->back->age : 0; } @@ -666,6 +732,31 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) return EGL_TRUE; } +static int get_ycbcr_from_flexlayout(struct android_flex_layout *outFlexLayout, struct android_ycbcr *ycbcr) +{ + + for( int i = 0; i < outFlexLayout->num_planes; i++) { + switch(outFlexLayout->planes[i].component){ + case FLEX_COMPONENT_Y: + ycbcr->y = outFlexLayout->planes[i].top_left; + ycbcr->ystride = outFlexLayout->planes[i].v_increment; + break; + case FLEX_COMPONENT_Cb: + ycbcr->cb = outFlexLayout->planes[i].top_left; + ycbcr->cstride = outFlexLayout->planes[i].v_increment; + break; + case FLEX_COMPONENT_Cr: + ycbcr->cr = outFlexLayout->planes[i].top_left; + ycbcr->chroma_step = outFlexLayout->planes[i].h_increment; + break; + default: + _eglLog(_EGL_WARNING,"unknown component 0x%x", __func__, outFlexLayout->planes[i].component); + break; + } + } + return 0; +} + #if ANDROID_API_LEVEL >= 23 static EGLBoolean droid_set_damage_region(_EGLDriver *drv, @@ -709,30 +800,70 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx, { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); struct android_ycbcr ycbcr; +#ifdef HAVE_GRALLOC1 + struct android_flex_layout outFlexLayout; + gralloc1_rect_t accessRegion; +#endif size_t offsets[3]; size_t pitches[3]; int is_ycrcb; int fourcc; int ret; - if (!dri2_dpy->gralloc->lock_ycbcr) { - _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr"); + int format = droid_resolve_format(dri2_dpy, buf); + if (format < 0) { + _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); return NULL; } memset(&ycbcr, 0, sizeof(ycbcr)); - ret = dri2_dpy->gralloc->lock_ycbcr(dri2_dpy->gralloc, buf->handle, - 0, 0, 0, 0, 0, &ycbcr); - if (ret) { - /* HACK: See droid_create_image_from_prime_fd() and - * https://issuetracker.google.com/32077885.*/ - if (buf->format == HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED) - return NULL; - - _eglLog(_EGL_WARNING, "gralloc->lock_ycbcr failed: %d", ret); - return NULL; - } - dri2_dpy->gralloc->unlock(dri2_dpy->gralloc, buf->handle); +#ifdef HAVE_GRALLOC1 + if(dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) { + if (!dri2_dpy->pfn_lockflex) { + _eglLog(_EGL_WARNING, "Gralloc does not support lockflex"); + return NULL; + } + + ret = dri2_dpy->pfn_lockflex(dri2_dpy->gralloc1_dvc, buf->handle, + 0, 0, &accessRegion, &outFlexLayout, -1); + if (ret) { + _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret); + return NULL; + } + ret = get_ycbcr_from_flexlayout(&outFlexLayout, &ycbcr); + if (ret) { + _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret); + return NULL; + } + int outReleaseFence = 0; + dri2_dpy->pfn_unlock(dri2_dpy->gralloc1_dvc, buf->handle, &outReleaseFence); + } else { +#endif + const gralloc_module_t *gralloc0; + gralloc0 = dri2_dpy->gralloc; + + if (!gralloc0->lock_ycbcr) { + _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr"); + return NULL; + } + + ret = gralloc0->lock_ycbcr(gralloc0, buf->handle, + 0, 0, 0, 0, 0, &ycbcr); + + if (ret) { + /* HACK: See droid_create_image_from_prime_fd() and + * https://issuetracker.google.com/32077885.*/ + if (buf->format == HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED) + return NULL; + + _eglLog(_EGL_WARNING, "gralloc->lock_ycbcr failed: %d", ret); + return NULL; + } + + gralloc0->unlock(dri2_dpy->gralloc, buf->handle); +#ifdef HAVE_GRALLOC1 + } +#endif /* When lock_ycbcr's usage argument contains no SW_READ/WRITE flags * it will return the .y/.cb/.cr pointers based on a NULL pointer, @@ -757,14 +888,15 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx, /* .chroma_step is the byte distance between the same chroma channel * values of subsequent pixels, assumed to be the same for Cb and Cr. */ - fourcc = get_fourcc_yuv(buf->format, is_ycrcb, ycbcr.chroma_step); + fourcc = get_fourcc_yuv(format, is_ycrcb, ycbcr.chroma_step); if (fourcc == -1) { _eglLog(_EGL_WARNING, "unsupported YUV format, native = %x, is_ycrcb = %d, chroma_step = %d", - buf->format, is_ycrcb, ycbcr.chroma_step); + format, is_ycrcb, ycbcr.chroma_step); return NULL; } - if (ycbcr.chroma_step == 2) { + /* FIXME? we should not rely on chroma_step */ + if (ycbcr.chroma_step == 2 || ycbcr.chroma_step == 4) { /* Semi-planar Y + CbCr or Y + CrCb format. */ const EGLint attr_list_2plane[] = { EGL_WIDTH, buf->width, @@ -806,9 +938,16 @@ static _EGLImage * droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx, struct ANativeWindowBuffer *buf, int fd) { + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); unsigned int pitch; - if (is_yuv(buf->format)) { + int format = droid_resolve_format(dri2_dpy, buf); + if (format < 0) { + _eglLog(_EGL_WARNING, "Could not resolve buffer format"); + return NULL; + } + + if (is_yuv(format)) { _EGLImage *image; image = droid_create_image_from_prime_fd_yuv(disp, ctx, buf, fd); @@ -823,13 +962,13 @@ droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx, return image; } - const int fourcc = get_fourcc(buf->format); + const int fourcc = get_fourcc(format); if (fourcc == -1) { _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); return NULL; } - pitch = buf->stride * get_format_bpp(buf->format); + pitch = buf->stride * get_format_bpp(format); if (pitch == 0) { _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); return NULL; @@ -1134,6 +1273,25 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy) return (config_count != 0); } +#ifdef HAVE_DRM_GRALLOC +static int +droid_open_device_drm_gralloc(struct dri2_egl_display *dri2_dpy) +{ + int fd = -1, err = -EINVAL; + + if (dri2_dpy->gralloc->perform) + err = dri2_dpy->gralloc->perform(dri2_dpy->gralloc, + GRALLOC_MODULE_PERFORM_GET_DRM_FD, + &fd); + if (err || fd < 0) { + _eglLog(_EGL_WARNING, "fail to get drm fd"); + fd = -1; + } + + return (fd >= 0) ? fcntl(fd, F_DUPFD_CLOEXEC, 3) : -1; +} +#endif /* HAVE_DRM_GRALLOC */ + static const struct dri2_egl_display_vtbl droid_display_vtbl = { .authenticate = NULL, .create_window_surface = droid_create_window_surface, @@ -1207,7 +1365,7 @@ droid_load_driver(_EGLDisplay *disp) dri2_dpy->is_render_node = drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER; if (!dri2_dpy->is_render_node) { - #ifdef HAVE_DRM_GRALLOC +#ifdef HAVE_DRM_GRALLOC /* Handle control nodes using __DRI_DRI2_LOADER extension and GEM names * for backwards compatibility with drm_gralloc. (Do not use on new * systems.) */ @@ -1216,10 +1374,10 @@ droid_load_driver(_EGLDisplay *disp) err = "DRI2: failed to load driver"; goto error; } - #else +#else err = "DRI2: handle is not for a render node"; goto error; - #endif +#endif } else { dri2_dpy->loader_extensions = droid_image_loader_extensions; if (!dri2_load_driver_dri3(disp)) { @@ -1362,6 +1520,7 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp) { struct dri2_egl_display *dri2_dpy; const char *err; + hw_device_t *device; int ret; /* Not supported yet */ @@ -1381,10 +1540,35 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp) err = "DRI2: failed to get gralloc module"; goto cleanup; } + dri2_dpy->gralloc_version = dri2_dpy->gralloc->module_api_version; +#ifdef HAVE_GRALLOC1 + if (dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) { + ret = dri2_dpy->gralloc->methods->open(dri2_dpy->gralloc, GRALLOC_HARDWARE_MODULE_ID, &device); + if (ret) { + err = "Failed to open hw_device device"; + goto cleanup; + } else { + dri2_dpy->gralloc1_dvc = (gralloc1_device_t *)device; + + dri2_dpy->pfn_lockflex = (GRALLOC1_PFN_LOCK_FLEX)\ + dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_LOCK_FLEX); + + dri2_dpy->pfn_getFormat = (GRALLOC1_PFN_GET_FORMAT)\ + dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_GET_FORMAT); + + dri2_dpy->pfn_unlock = (GRALLOC1_PFN_UNLOCK)\ + dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_UNLOCK); + } + } +#endif disp->DriverData = (void *) dri2_dpy; +#ifdef HAVE_DRM_GRALLOC + dri2_dpy->fd = droid_open_device_drm_gralloc(dri2_dpy); +#else dri2_dpy->fd = droid_open_device(disp); +#endif if (dri2_dpy->fd < 0) { err = "DRI2: failed to open device"; goto cleanup; diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index dca099500a8..15eeee5d686 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -182,9 +182,12 @@ wl_buffer_release(void *data, struct wl_buffer *buffer) if (dri2_surf->color_buffers[i].wl_buffer == buffer) break; - if (i == ARRAY_SIZE(dri2_surf->color_buffers)) { + assert (i < ARRAY_SIZE(dri2_surf->color_buffers)); + + if (dri2_surf->color_buffers[i].wl_release) { wl_buffer_destroy(buffer); - return; + dri2_surf->color_buffers[i].wl_release = false; + dri2_surf->color_buffers[i].wl_buffer = NULL; } dri2_surf->color_buffers[i].locked = false; @@ -201,6 +204,17 @@ resize_callback(struct wl_egl_window *wl_win, void *data) struct dri2_egl_display *dri2_dpy = dri2_egl_display(dri2_surf->base.Resource.Display); + /* Update the surface size as soon as native window is resized; from user + * pov, this makes the effect that resize is done inmediately after native + * window resize, without requiring to wait until the first draw. + * + * A more detailed and lengthy explanation can be found at + * https://lists.freedesktop.org/archives/mesa-dev/2018-June/196474.html + */ + if (!dri2_surf->back) { + dri2_surf->base.Width = wl_win->width; + dri2_surf->base.Height = wl_win->height; + } dri2_dpy->flush->invalidate(dri2_surf->dri_drawable); } @@ -258,6 +272,9 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, goto cleanup_surf; } + dri2_surf->base.Width = window->width; + dri2_surf->base.Height = window->height; + visual_idx = dri2_wl_visual_idx_from_config(dri2_dpy, config); assert(visual_idx != -1); @@ -411,9 +428,14 @@ dri2_wl_release_buffers(struct dri2_egl_surface *dri2_surf) dri2_egl_display(dri2_surf->base.Resource.Display); for (int i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) { - if (dri2_surf->color_buffers[i].wl_buffer && - !dri2_surf->color_buffers[i].locked) - wl_buffer_destroy(dri2_surf->color_buffers[i].wl_buffer); + if (dri2_surf->color_buffers[i].wl_buffer) { + if (dri2_surf->color_buffers[i].locked) { + dri2_surf->color_buffers[i].wl_release = true; + } else { + wl_buffer_destroy(dri2_surf->color_buffers[i].wl_buffer); + dri2_surf->color_buffers[i].wl_buffer = NULL; + } + } if (dri2_surf->color_buffers[i].dri_image) dri2_dpy->image->destroyImage(dri2_surf->color_buffers[i].dri_image); if (dri2_surf->color_buffers[i].linear_copy) @@ -422,11 +444,9 @@ dri2_wl_release_buffers(struct dri2_egl_surface *dri2_surf) munmap(dri2_surf->color_buffers[i].data, dri2_surf->color_buffers[i].data_size); - dri2_surf->color_buffers[i].wl_buffer = NULL; dri2_surf->color_buffers[i].dri_image = NULL; dri2_surf->color_buffers[i].linear_copy = NULL; dri2_surf->color_buffers[i].data = NULL; - dri2_surf->color_buffers[i].locked = false; } if (dri2_dpy->dri2) @@ -577,8 +597,8 @@ update_buffers(struct dri2_egl_surface *dri2_surf) struct dri2_egl_display *dri2_dpy = dri2_egl_display(dri2_surf->base.Resource.Display); - if (dri2_surf->base.Width != dri2_surf->wl_win->width || - dri2_surf->base.Height != dri2_surf->wl_win->height) { + if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width || + dri2_surf->base.Height != dri2_surf->wl_win->attached_height) { dri2_wl_release_buffers(dri2_surf); @@ -955,6 +975,8 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv, dri2_surf->current->wl_buffer = create_wl_buffer(dri2_dpy, dri2_surf, image); + dri2_surf->current->wl_release = false; + wl_buffer_add_listener(dri2_surf->current->wl_buffer, &wl_buffer_listener, dri2_surf); } diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c b/src/egl/drivers/dri2/platform_x11_dri3.c index c3c9c2dd45d..e1967422f0a 100644 --- a/src/egl/drivers/dri2/platform_x11_dri3.c +++ b/src/egl/drivers/dri2/platform_x11_dri3.c @@ -107,12 +107,17 @@ static const struct loader_dri3_vtable egl_dri3_vtable = { static EGLBoolean dri3_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf) { + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf); + xcb_drawable_t drawable = dri3_surf->loader_drawable.drawable; (void) drv; loader_dri3_drawable_fini(&dri3_surf->loader_drawable); + if (surf->Type == EGL_PBUFFER_BIT) + xcb_free_pixmap (dri2_dpy->conn, drawable); + dri2_fini_surface(surf); free(surf); diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index deb479b6d56..fadb2b16871 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1208,6 +1208,9 @@ eglSwapInterval(EGLDisplay dpy, EGLint interval) if (_eglGetSurfaceHandle(surf) == EGL_NO_SURFACE) RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE); + if (surf->Type != EGL_WINDOW_BIT) + RETURN_EGL_EVAL(disp, EGL_TRUE); + interval = CLAMP(interval, surf->Config->MinSwapInterval, surf->Config->MaxSwapInterval); @@ -1243,6 +1246,9 @@ eglSwapBuffers(EGLDisplay dpy, EGLSurface surface) RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE); #endif + if (surf->Type != EGL_WINDOW_BIT) + RETURN_EGL_EVAL(disp, EGL_TRUE); + /* From the EGL 1.5 spec: * * If eglSwapBuffers is called and the native window associated with @@ -1282,6 +1288,9 @@ _eglSwapBuffersWithDamageCommon(_EGLDisplay *disp, _EGLSurface *surf, surf != ctx->DrawSurface) RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE); + if (surf->Type != EGL_WINDOW_BIT) + RETURN_EGL_EVAL(disp, EGL_TRUE); + if ((n_rects > 0 && rects == NULL) || n_rects < 0) RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_FALSE); diff --git a/src/egl/main/eglcurrent.c b/src/egl/main/eglcurrent.c index 7af3011b757..545697e5662 100644 --- a/src/egl/main/eglcurrent.c +++ b/src/egl/main/eglcurrent.c @@ -137,13 +137,37 @@ _eglDestroyThreadInfo(_EGLThreadInfo *t) } +/** + * Delete/free a _EGLThreadInfo object. + */ +static void +_eglDestroyThreadInfoCallback(_EGLThreadInfo *t) +{ + /* If this callback is called on thread termination then try to also give a + * chance to cleanup to the client drivers. If called for module termination + * then just release the thread information as calling eglReleaseThread + * would result in a deadlock. + */ + if (_egl_TSDInitialized) { + /* The callback handler has replaced the TLS entry, which is passed in as + * 't', with NULL. Restore it here so that the release thread finds it in + * the TLS entry. + */ + _eglSetTSD(t); + eglReleaseThread(); + } else { + _eglDestroyThreadInfo(t); + } +} + + /** * Make sure TSD is initialized and return current value. */ static inline _EGLThreadInfo * _eglCheckedGetTSD(void) { - if (_eglInitTSD(&_eglDestroyThreadInfo) != EGL_TRUE) { + if (_eglInitTSD(&_eglDestroyThreadInfoCallback) != EGL_TRUE) { _eglLog(_EGL_FATAL, "failed to initialize \"current\" system"); return NULL; } diff --git a/src/egl/main/egldispatchstubs.c b/src/egl/main/egldispatchstubs.c index bfc3195c779..96708aeb0dc 100644 --- a/src/egl/main/egldispatchstubs.c +++ b/src/egl/main/egldispatchstubs.c @@ -59,6 +59,11 @@ static __eglMustCastToProperFunctionPointerType FetchVendorFunc(__EGLvendorInfo } if (func == NULL) { if (errorCode != EGL_SUCCESS) { + // Since we have no vendor, the follow-up eglGetError() call will + // end up using the GLVND error code. Set it here. + if (vendor == NULL) { + exports->setEGLError(errorCode); + } _eglError(errorCode, __EGL_DISPATCH_FUNC_NAMES[index]); } return NULL; diff --git a/src/egl/meson.build b/src/egl/meson.build index 89a84fd8908..1e0b1d33af5 100644 --- a/src/egl/meson.build +++ b/src/egl/meson.build @@ -99,10 +99,10 @@ endif if with_platform_x11 files_egl += files('drivers/dri2/platform_x11.c') + incs_for_egl += inc_loader if with_dri3 files_egl += files('drivers/dri2/platform_x11_dri3.c') link_for_egl += libloader_dri3_helper - incs_for_egl += inc_loader endif deps_for_egl += [dep_x11_xcb, dep_xcb_dri2, dep_xcb_xfixes] endif @@ -114,6 +114,7 @@ if with_platform_drm endif if with_platform_surfaceless files_egl += files('drivers/dri2/platform_surfaceless.c') + incs_for_egl += [inc_loader] endif if with_platform_wayland deps_for_egl += [dep_wayland_client, dep_wayland_server, dep_wayland_egl_headers] @@ -129,6 +130,7 @@ endif if with_platform_android deps_for_egl += dep_android files_egl += files('drivers/dri2/platform_android.c') + incs_for_egl += [inc_loader] endif if with_platform_haiku incs_for_egl += inc_haikugl diff --git a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h index 90dbf658a6d..284e07386dc 100644 --- a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h +++ b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h @@ -30,6 +30,7 @@ DRI_CONF_SECTION_DEBUG DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false") DRI_CONF_FORCE_GLSL_ABS_SQRT("false") DRI_CONF_GLSL_CORRECT_DERIVATIVES_AFTER_DISCARD("false") + DRI_CONF_FORCE_COMPAT_PROFILE("false") DRI_CONF_SECTION_END DRI_CONF_SECTION_MISCELLANEOUS diff --git a/src/gallium/auxiliary/pipe-loader/meson.build b/src/gallium/auxiliary/pipe-loader/meson.build index 32e8188c68b..c0b9a53cf65 100644 --- a/src/gallium/auxiliary/pipe-loader/meson.build +++ b/src/gallium/auxiliary/pipe-loader/meson.build @@ -31,6 +31,9 @@ libpipe_loader_defines = [] if dep_libdrm.found() files_pipe_loader += files('pipe_loader_drm.c') endif +if with_dri + libpipe_loader_defines += '-DHAVE_PIPE_LOADER_DRI' +endif if with_gallium_drisw_kms libpipe_loader_defines += '-DHAVE_PIPE_LOADER_KMS' endif @@ -42,10 +45,7 @@ libpipe_loader_static = static_library( inc_util, inc_loader, inc_gallium, inc_include, inc_src, inc_gallium_aux, inc_gallium_winsys, ], - c_args : [ - c_vis_args, '-DHAVE_PIPE_LOADER_DRI', '-DGALLIUM_STATIC_TARGETS=1', - libpipe_loader_defines, - ], + c_args : [c_vis_args, libpipe_loader_defines, '-DGALLIUM_STATIC_TARGETS=1'], link_with : [libloader, libxmlconfig], dependencies : [dep_libdrm], build_by_default : false, @@ -59,7 +59,7 @@ libpipe_loader_dynamic = static_library( inc_gallium_winsys, ], c_args : [ - c_vis_args, libpipe_loader_defines, '-DHAVE_PIPE_LOADER_DRI', + c_vis_args, libpipe_loader_defines, '-DPIPE_SEARCH_DIR="@0@"'.format( join_paths(get_option('prefix'), get_option('libdir'), 'gallium-pipe') ) diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h index 53c5a7e8c4c..245a8af2ac5 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.h +++ b/src/gallium/auxiliary/util/u_threaded_context.h @@ -408,14 +408,6 @@ threaded_transfer(struct pipe_transfer *transfer) return (struct threaded_transfer*)transfer; } -static inline struct pipe_context * -threaded_context_unwrap_unsync(struct pipe_context *pipe) -{ - if (!pipe || !pipe->priv) - return pipe; - return (struct pipe_context*)pipe->priv; -} - static inline void tc_unflushed_batch_token_reference(struct tc_unflushed_batch_token **dst, struct tc_unflushed_batch_token *src) diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c index 746ff1085ce..f721613cbc5 100644 --- a/src/gallium/auxiliary/util/u_vbuf.c +++ b/src/gallium/auxiliary/util/u_vbuf.c @@ -1131,6 +1131,31 @@ static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr) mgr->dirty_real_vb_mask = 0; } +static void +u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info, + unsigned *indirect_data, unsigned stride, + unsigned draw_count) +{ + assert(info->index_size); + info->indirect = NULL; + + for (unsigned i = 0; i < draw_count; i++) { + unsigned offset = i * stride / 4; + + info->count = indirect_data[offset + 0]; + info->instance_count = indirect_data[offset + 1]; + + if (!info->count || !info->instance_count) + continue; + + info->start = indirect_data[offset + 2]; + info->index_bias = indirect_data[offset + 3]; + info->start_instance = indirect_data[offset + 4]; + + u_vbuf_draw_vbo(mgr, info); + } +} + void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) { struct pipe_context *pipe = mgr->pipe; @@ -1160,33 +1185,164 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) new_info = *info; - /* Fallback. We need to know all the parameters. */ + /* Handle indirect (multi)draws. */ if (new_info.indirect) { - struct pipe_transfer *transfer = NULL; - int *data; - - if (new_info.index_size) { - data = pipe_buffer_map_range(pipe, new_info.indirect->buffer, - new_info.indirect->offset, 20, - PIPE_TRANSFER_READ, &transfer); - new_info.index_bias = data[3]; - new_info.start_instance = data[4]; - } - else { - data = pipe_buffer_map_range(pipe, new_info.indirect->buffer, - new_info.indirect->offset, 16, - PIPE_TRANSFER_READ, &transfer); - new_info.start_instance = data[3]; + const struct pipe_draw_indirect_info *indirect = new_info.indirect; + unsigned draw_count = 0; + + /* Get the number of draws. */ + if (indirect->indirect_draw_count) { + pipe_buffer_read(pipe, indirect->indirect_draw_count, + indirect->indirect_draw_count_offset, + 4, &draw_count); + } else { + draw_count = indirect->draw_count; } - new_info.count = data[0]; - new_info.instance_count = data[1]; - new_info.start = data[2]; - pipe_buffer_unmap(pipe, transfer); - new_info.indirect = NULL; - - if (!new_info.count) + if (!draw_count) return; + + unsigned data_size = (draw_count - 1) * indirect->stride + + (new_info.index_size ? 20 : 16); + unsigned *data = malloc(data_size); + if (!data) + return; /* report an error? */ + + /* Read the used buffer range only once, because the read can be + * uncached. + */ + pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size, + data); + + if (info->index_size) { + /* Indexed multidraw. */ + unsigned index_bias0 = data[3]; + bool index_bias_same = true; + + /* If we invoke the translate path, we have to split the multidraw. */ + if (incompatible_vb_mask || + mgr->ve->incompatible_elem_mask) { + u_vbuf_split_indexed_multidraw(mgr, &new_info, data, + indirect->stride, draw_count); + free(data); + return; + } + + /* See if index_bias is the same for all draws. */ + for (unsigned i = 1; i < draw_count; i++) { + if (data[i * indirect->stride / 4 + 3] != index_bias0) { + index_bias_same = false; + break; + } + } + + /* Split the multidraw if index_bias is different. */ + if (!index_bias_same) { + u_vbuf_split_indexed_multidraw(mgr, &new_info, data, + indirect->stride, draw_count); + free(data); + return; + } + + /* If we don't need to use the translate path and index_bias is + * the same, we can process the multidraw with the time complexity + * equal to 1 draw call (except for the index range computation). + * We only need to compute the index range covering all draw calls + * of the multidraw. + * + * The driver will not look at these values because indirect != NULL. + * These values determine the user buffer bounds to upload. + */ + new_info.index_bias = index_bias0; + new_info.min_index = ~0u; + new_info.max_index = 0; + new_info.start_instance = ~0u; + unsigned end_instance = 0; + + struct pipe_transfer *transfer = NULL; + const uint8_t *indices; + + if (info->has_user_indices) { + indices = (uint8_t*)info->index.user; + } else { + indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource, + PIPE_TRANSFER_READ, &transfer); + } + + for (unsigned i = 0; i < draw_count; i++) { + unsigned offset = i * indirect->stride / 4; + unsigned start = data[offset + 2]; + unsigned count = data[offset + 0]; + unsigned start_instance = data[offset + 4]; + unsigned instance_count = data[offset + 1]; + + if (!count || !instance_count) + continue; + + /* Update the ranges of instances. */ + new_info.start_instance = MIN2(new_info.start_instance, + start_instance); + end_instance = MAX2(end_instance, start_instance + instance_count); + + /* Update the index range. */ + unsigned min, max; + new_info.count = count; /* only used by get_minmax_index */ + u_vbuf_get_minmax_index_mapped(&new_info, + indices + + new_info.index_size * start, + &min, &max); + + new_info.min_index = MIN2(new_info.min_index, min); + new_info.max_index = MAX2(new_info.max_index, max); + } + free(data); + + if (transfer) + pipe_buffer_unmap(pipe, transfer); + + /* Set the final instance count. */ + new_info.instance_count = end_instance - new_info.start_instance; + + if (new_info.start_instance == ~0u || !new_info.instance_count) + return; + } else { + /* Non-indexed multidraw. + * + * Keep the draw call indirect and compute minimums & maximums, + * which will determine the user buffer bounds to upload, but + * the driver will not look at these values because indirect != NULL. + * + * This efficiently processes the multidraw with the time complexity + * equal to 1 draw call. + */ + new_info.start = ~0u; + new_info.start_instance = ~0u; + unsigned end_vertex = 0; + unsigned end_instance = 0; + + for (unsigned i = 0; i < draw_count; i++) { + unsigned offset = i * indirect->stride / 4; + unsigned start = data[offset + 2]; + unsigned count = data[offset + 0]; + unsigned start_instance = data[offset + 3]; + unsigned instance_count = data[offset + 1]; + + new_info.start = MIN2(new_info.start, start); + new_info.start_instance = MIN2(new_info.start_instance, + start_instance); + + end_vertex = MAX2(end_vertex, start + count); + end_instance = MAX2(end_instance, start_instance + instance_count); + } + free(data); + + /* Set the final counts. */ + new_info.count = end_vertex - new_info.start; + new_info.instance_count = end_instance - new_info.start_instance; + + if (new_info.start == ~0u || !new_info.count || !new_info.instance_count) + return; + } } if (new_info.index_size) { @@ -1211,7 +1367,8 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) * We would have to break this drawing operation into several ones. */ /* Use some heuristic to see if unrolling indices improves * performance. */ - if (!new_info.primitive_restart && + if (!info->indirect && + !new_info.primitive_restart && num_vertices > new_info.count*2 && num_vertices - new_info.count > 32 && !u_vbuf_mapping_vertex_buffer_blocks(mgr)) { diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c b/src/gallium/auxiliary/vl/vl_winsys_dri3.c index 8e3c4a0e04d..df2c9c0e50c 100644 --- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c +++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c @@ -712,7 +712,6 @@ vl_dri3_screen_destroy(struct vl_screen *vscreen) if (scrn->front_buffer) { dri3_free_front_buffer(scrn, scrn->front_buffer); scrn->front_buffer = NULL; - return; } for (i = 0; i < BACK_BUFFER_NUM; ++i) { diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index 88f6fb557d0..7d5ca25e686 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -211,6 +211,15 @@ fd_set_framebuffer_state(struct pipe_context *pctx, struct fd_context *ctx = fd_context(pctx); struct pipe_framebuffer_state *cso; + cso = &ctx->batch->framebuffer; + + if (util_framebuffer_state_equal(cso, framebuffer)) + return; + + util_copy_framebuffer_state(cso, framebuffer); + + cso->samples = util_framebuffer_get_num_samples(cso); + if (ctx->screen->reorder) { struct fd_batch *batch, *old_batch = NULL; @@ -239,14 +248,9 @@ fd_set_framebuffer_state(struct pipe_context *pctx, DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush, framebuffer->cbufs[0], framebuffer->zsbuf); fd_batch_flush(ctx->batch, false, false); + util_copy_framebuffer_state(&ctx->batch->framebuffer, cso); } - cso = &ctx->batch->framebuffer; - - util_copy_framebuffer_state(cso, framebuffer); - - cso->samples = util_framebuffer_get_num_samples(cso); - ctx->dirty |= FD_DIRTY_FRAMEBUFFER; ctx->disabled_scissor.minx = 0; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp index c7436e2e297..49a5f3b01f2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp @@ -320,11 +320,11 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq) if (mask & 0x1) bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0), - loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), suq->tex.bindless)); + loadMsAdjInfo32(suq->tex.target, 0, slot, ind, suq->tex.bindless)); if (mask & 0x2) { int d = util_bitcount(mask & 0x1); bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d), - loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), suq->tex.bindless)); + loadMsAdjInfo32(suq->tex.target, 1, slot, ind, suq->tex.bindless)); } } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 1410cf26c87..3feb1fcf138 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1732,6 +1732,45 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless prog->driver->io.suInfoBase); } +Value * +NVC0LoweringPass::loadMsAdjInfo32(TexInstruction::Target target, uint32_t index, int slot, Value *ind, bool bindless) +{ + if (!bindless || targ->getChipset() < NVISA_GM107_CHIPSET) + return loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(index), bindless); + + assert(bindless); + + Value *samples = bld.getSSA(); + // this shouldn't be lowered because it's being inserted before the current instruction + TexInstruction *tex = new_TexInstruction(func, OP_TXQ); + tex->tex.target = target; + tex->tex.query = TXQ_TYPE; + tex->tex.mask = 0x4; + tex->tex.r = 0xff; + tex->tex.s = 0x1f; + tex->tex.rIndirectSrc = 0; + tex->setDef(0, samples); + tex->setSrc(0, ind); + tex->setSrc(1, bld.loadImm(NULL, 0)); + bld.insert(tex); + + // doesn't work with sample counts other than 1/2/4/8 but they aren't supported + switch (index) { + case 0: { + Value *tmp = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), samples, bld.mkImm(2)); + return bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(2)); + } + case 1: { + Value *tmp = bld.mkCmp(OP_SET, CC_GT, TYPE_U32, bld.getSSA(), TYPE_U32, samples, bld.mkImm(2))->getDef(0); + return bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), tmp, bld.mkImm(1)); + } + default: { + assert(false); + return NULL; + } + } +} + static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) { switch (su->tex.target.getEnum()) { @@ -1817,8 +1856,8 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex) Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); Value *ind = tex->getIndirectR(); - Value *ms_x = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), tex->tex.bindless); - Value *ms_y = loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), tex->tex.bindless); + Value *ms_x = loadMsAdjInfo32(tex->tex.target, 0, slot, ind, tex->tex.bindless); + Value *ms_y = loadMsAdjInfo32(tex->tex.target, 1, slot, ind, tex->tex.bindless); bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); @@ -2151,13 +2190,36 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su) } } +void +NVC0LoweringPass::insertOOBSurfaceOpResult(TexInstruction *su) +{ + if (!su->getPredicate()) + return; + + bld.setPosition(su, true); + + for (unsigned i = 0; su->defExists(i); ++i) { + ValueDef &def = su->def(i); + + Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0)); + assert(su->cc == CC_NOT_P); + mov->setPredicate(CC_P, su->getPredicate()); + Instruction *uni = bld.mkOp2(OP_UNION, TYPE_U32, bld.getSSA(), NULL, mov->getDef(0)); + + def.replace(uni->getDef(0), false); + uni->setSrc(0, def.get()); + } +} + void NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) { processSurfaceCoordsNVE4(su); - if (su->op == OP_SULDP) + if (su->op == OP_SULDP) { convertSurfaceFormat(su); + insertOOBSurfaceOpResult(su); + } if (su->op == OP_SUREDB || su->op == OP_SUREDP) { assert(su->getPredicate()); @@ -2267,8 +2329,10 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su) processSurfaceCoordsNVC0(su); - if (su->op == OP_SULDP) + if (su->op == OP_SULDP) { convertSurfaceFormat(su); + insertOOBSurfaceOpResult(su); + } if (su->op == OP_SUREDB || su->op == OP_SUREDP) { const int dim = su->tex.target.getDim(); @@ -2370,8 +2434,10 @@ NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su) { processSurfaceCoordsGM107(su); - if (su->op == OP_SULDP) + if (su->op == OP_SULDP) { convertSurfaceFormat(su); + insertOOBSurfaceOpResult(su); + } if (su->op == OP_SUREDP) { Value *def = su->getDef(0); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 8724c09afd9..4136b1ecfeb 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -148,7 +148,7 @@ class NVC0LoweringPass : public Pass void handlePIXLD(Instruction *); void checkPredicate(Instruction *); - Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless); + Value *loadMsAdjInfo32(TexInstruction::Target targ, uint32_t index, int slot, Value *ind, bool bindless); virtual bool visit(Instruction *); @@ -161,6 +161,7 @@ class NVC0LoweringPass : public Pass Value *loadResInfo32(Value *ptr, uint32_t off, uint16_t base); Value *loadResInfo64(Value *ptr, uint32_t off, uint16_t base); Value *loadResLength32(Value *ptr, uint32_t off, uint16_t base); + Value *loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless); Value *loadBufInfo64(Value *ptr, uint32_t off); Value *loadBufLength32(Value *ptr, uint32_t off); Value *loadUboInfo64(Value *ptr, uint32_t off); @@ -172,6 +173,7 @@ class NVC0LoweringPass : public Pass void processSurfaceCoordsNVE4(TexInstruction *); void processSurfaceCoordsNVC0(TexInstruction *); void convertSurfaceFormat(TexInstruction *); + void insertOOBSurfaceOpResult(TexInstruction *); Value *calculateSampleOffset(Value *sampleID); protected: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 16022e6f237..7bb12cdf4ce 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -3855,7 +3855,7 @@ Program::optimizeSSA(int level) RUN_PASS(2, AlgebraicOpt, run); RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks RUN_PASS(1, ConstantFolding, foldAll); - RUN_PASS(1, Split64BitOpPreRA, run); + RUN_PASS(0, Split64BitOpPreRA, run); RUN_PASS(2, LateAlgebraicOpt, run); RUN_PASS(1, LoadPropagation, run); RUN_PASS(1, IndirectPropagation, run); diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index c97b707955c..d7898ed58fe 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -148,20 +148,21 @@ nouveau_screen_bo_get_handle(struct pipe_screen *pscreen, static void nouveau_disk_cache_create(struct nouveau_screen *screen) { - uint32_t mesa_timestamp; - char *timestamp_str; - int res; - - if (disk_cache_get_function_timestamp(nouveau_disk_cache_create, - &mesa_timestamp)) { - res = asprintf(×tamp_str, "%u", mesa_timestamp); - if (res != -1) { - screen->disk_shader_cache = - disk_cache_create(nouveau_screen_get_name(&screen->base), - timestamp_str, 0); - free(timestamp_str); - } - } + struct mesa_sha1 ctx; + unsigned char sha1[20]; + char cache_id[20 * 2 + 1]; + + _mesa_sha1_init(&ctx); + if (!disk_cache_get_function_identifier(nouveau_disk_cache_create, + &ctx)) + return; + + _mesa_sha1_final(&ctx, sha1); + disk_cache_format_hex_id(cache_id, sha1, 20 * 2); + + screen->disk_shader_cache = + disk_cache_create(nouveau_screen_get_name(&screen->base), + cache_id, 0); } int diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c index a55adfa59f4..0a693d7b173 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c @@ -318,8 +318,8 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = F3(A, R16G16B16X16_FLOAT, RGBX16_FLOAT, R, G, B, xx, FLOAT, R16_G16_B16_A16, TB), F3(A, R16G16B16X16_UNORM, RGBA16_UNORM, R, G, B, xx, UNORM, R16_G16_B16_A16, T), F3(A, R16G16B16X16_SNORM, RGBA16_SNORM, R, G, B, xx, SNORM, R16_G16_B16_A16, T), - I3(A, R16G16B16X16_SINT, RGBA16_SINT, R, G, B, xx, SINT, R16_G16_B16_A16, T), - I3(A, R16G16B16X16_UINT, RGBA16_UINT, R, G, B, xx, UINT, R16_G16_B16_A16, T), + I3(A, R16G16B16X16_SINT, RGBA16_SINT, R, G, B, xx, SINT, R16_G16_B16_A16, TR), + I3(A, R16G16B16X16_UINT, RGBA16_UINT, R, G, B, xx, UINT, R16_G16_B16_A16, TR), F2(A, R16G16_FLOAT, RG16_FLOAT, R, G, xx, xx, FLOAT, R16_G16, IB), F2(A, R16G16_UNORM, RG16_UNORM, R, G, xx, xx, UNORM, R16_G16, IC), @@ -337,8 +337,8 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = C4(A, R8G8B8A8_SINT, RGBA8_SINT, R, G, B, A, SINT, A8B8G8R8, IR), C4(A, R8G8B8A8_UINT, RGBA8_UINT, R, G, B, A, UINT, A8B8G8R8, IR), F3(A, R8G8B8X8_SNORM, RGBA8_SNORM, R, G, B, xx, SNORM, A8B8G8R8, T), - I3(A, R8G8B8X8_SINT, RGBA8_SINT, R, G, B, xx, SINT, A8B8G8R8, T), - I3(A, R8G8B8X8_UINT, RGBA8_UINT, R, G, B, xx, UINT, A8B8G8R8, T), + I3(A, R8G8B8X8_SINT, RGBA8_SINT, R, G, B, xx, SINT, A8B8G8R8, TR), + I3(A, R8G8B8X8_UINT, RGBA8_UINT, R, G, B, xx, UINT, A8B8G8R8, TR), F2(A, R8G8_UNORM, RG8_UNORM, R, G, xx, xx, UNORM, G8R8, IB), F2(A, R8G8_SNORM, RG8_SNORM, R, G, xx, xx, SNORM, G8R8, IC), diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index 1ef0f5030a5..de840eb531b 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -1669,6 +1669,13 @@ nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) struct nouveau_pushbuf *push = nv50->base.pushbuf; bool eng3d = FALSE; + if (info->src.box.width == 0 || info->src.box.height == 0 || + info->dst.box.width == 0 || info->dst.box.height == 0) { + pipe_debug_message(&nv50->base.debug, ERROR, + "Blit with zero-size src or dst box"); + return; + } + if (util_format_is_depth_or_stencil(info->dst.resource->format)) { if (!(info->mask & PIPE_MASK_ZS)) return; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index df5723dc37c..726160d7c05 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -429,9 +429,9 @@ static const uint64_t nve4_read_hw_sm_counters_code[] = * mov b32 $r6 $pm6 * mov b32 $r7 $pm7 * set $p0 0x1 eq u32 $r8 0x0 - * mov b32 $r10 c7[0x620] + * mov b32 $r10 c7[0x6a0] * ext u32 $r8 $r12 0x414 - * mov b32 $r11 c7[0x624] + * mov b32 $r11 c7[0x6a4] * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04 * ext u32 $r9 $r12 0x208 * (not $p0) exit @@ -449,7 +449,7 @@ static const uint64_t nve4_read_hw_sm_counters_code[] = * add b32 $r12 $c $r12 $r9 * st b128 wt g[$r10d] $r0q * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00 - * mov b32 $r0 c7[0x628] + * mov b32 $r0 c7[0x6a8] * add b32 $r13 $r13 0x0 $c * $p1 st b128 wt g[$r12d+0x40] $r4q * st b32 wt g[$r12d+0x50] $r0 @@ -467,9 +467,9 @@ static const uint64_t nve4_read_hw_sm_counters_code[] = 0x2c00000028019c04ULL, 0x2c0000002c01dc04ULL, 0x190e0000fc81dc03ULL, - 0x28005c1880029de4ULL, + 0x28005c1a80029de4ULL, 0x7000c01050c21c03ULL, - 0x28005c189002dde4ULL, + 0x28005c1a9002dde4ULL, 0x204282020042e047ULL, 0x7000c00820c25c03ULL, 0x80000000000021e7ULL, @@ -487,7 +487,7 @@ static const uint64_t nve4_read_hw_sm_counters_code[] = 0x4801000024c31c03ULL, 0x9400000000a01fc5ULL, 0x200002e04202c047ULL, - 0x28005c18a0001de4ULL, + 0x28005c1aa0001de4ULL, 0x0800000000d35c42ULL, 0x9400000100c107c5ULL, 0x9400000140c01f85ULL, @@ -510,9 +510,9 @@ static const uint64_t nvf0_read_hw_sm_counters_code[] = 0x86400000051c001aULL, 0x86400000059c001eULL, 0xdb201c007f9c201eULL, - 0x64c03ce0c41c002aULL, + 0x64c03ce0d41c002aULL, 0xc00000020a1c3021ULL, - 0x64c03ce0c49c002eULL, + 0x64c03ce0d49c002eULL, 0x0810a0808010b810ULL, 0xc0000001041c3025ULL, 0x180000000020003cULL, @@ -530,7 +530,7 @@ static const uint64_t nvf0_read_hw_sm_counters_code[] = 0xe0840000049c3032ULL, 0xfe800000001c2800ULL, 0x080000b81080b010ULL, - 0x64c03ce0c51c0002ULL, + 0x64c03ce0d51c0002ULL, 0xe08040007f9c3436ULL, 0xfe80000020043010ULL, 0xfc800000281c3000ULL, @@ -554,10 +554,10 @@ static const uint64_t gm107_read_hw_sm_counters_code[] = 0x001f8401fc2007a1ULL, /* sched (st 0x1 wr 0x5) (st 0x1 wt 0x1) (st 0x1) */ 0xf0c8000000b70007ULL, /* mov $r7 $pm7 */ 0x5b6403800087ff07ULL, /* isetp eq u32 and $p0 0x1 0x0 $r8 0x1 */ - 0x4c98079c1887000aULL, /* mov $r10 c7[0x620] 0xf */ + 0x4c98079c1a87000aULL, /* mov $r10 c7[0x6a0] 0xf */ 0x001fa400fc2017e1ULL, /* sched (st 0x1 wt 0x2) (st 0x1) (st 0x9) */ 0x3800000091470c08ULL, /* bfe u32 $r8 $r12 0x914 */ - 0x4c98079c1897000bULL, /* mov $r11 c7[0x624] 0xf */ + 0x4c98079c1a97000bULL, /* mov $r11 c7[0x6a4] 0xf */ 0x3800000020870c09ULL, /* bfe u32 $r9 $r12 0x208 */ 0x001c1800fc2007edULL, /* sched (st 0xd) (st 0x1) (st 0x6 wr 0x0) */ 0xe30000000008000fULL, /* not $p0 exit */ @@ -578,7 +578,7 @@ static const uint64_t gm107_read_hw_sm_counters_code[] = 0x003f983c1c4007e1ULL, /* sched (st 0x1) (st 0x2 rd 0x0 wt 0x3c) (st 0x6 wt 0x1) */ 0x5c1008000ff70d0dULL, /* iadd x $r13 $r13 0x0 */ 0xbfd0000000070a00ULL, /* st e wt b128 g[$r10] $r0 0x1 */ - 0x4c98079c18a70000ULL, /* mov $r0 c7[0x628] 0xf */ + 0x4c98079c1aa70000ULL, /* mov $r0 c7[0x6a8] 0xf */ 0x001fbc00fc2007e6ULL, /* sched (st 0x1) (st 0x1) (st 0xf) */ 0xbfd0000004010c04ULL, /* $p1 st e wt b128 g[$r12+0x40] $r4 0x1 */ 0xbf90000005070c00ULL, /* st e wt b32 g[$r12+0x50] $r0 0x1 */ @@ -1760,14 +1760,14 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] = * mov b32 $r6 $pm6 * mov b32 $r7 $pm7 * set $p0 0x1 eq u32 $r8 0x0 - * mov b32 $r10 c15[0x620] - * mov b32 $r11 c15[0x624] + * mov b32 $r10 c15[0x6a0] + * mov b32 $r11 c15[0x6a4] * ext u32 $r8 $r9 0x414 * (not $p0) exit * mul $r8 u32 $r8 u32 48 * add b32 $r10 $c $r10 $r8 * add b32 $r11 $r11 0x0 $c - * mov b32 $r8 c15[0x628] + * mov b32 $r8 c15[0x6a8] * st b128 wt g[$r10d+0x00] $r0q * st b128 wt g[$r10d+0x10] $r4q * st b32 wt g[$r10d+0x20] $r8 @@ -1783,14 +1783,14 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] = 0x2c00000028019c04ULL, 0x2c0000002c01dc04ULL, 0x190e0000fc81dc03ULL, - 0x28007c1880029de4ULL, - 0x28007c189002dde4ULL, + 0x28007c1a80029de4ULL, + 0x28007c1a9002dde4ULL, 0x7000c01050921c03ULL, 0x80000000000021e7ULL, 0x10000000c0821c02ULL, 0x4801000020a29c03ULL, 0x0800000000b2dc42ULL, - 0x28007c18a0021de4ULL, + 0x28007c1aa0021de4ULL, 0x9400000000a01fc5ULL, 0x9400000040a11fc5ULL, 0x9400000080a21f85ULL, diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 39b1369758a..03881c62785 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -1573,6 +1573,13 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) struct nouveau_pushbuf *push = nvc0->base.pushbuf; bool eng3d = false; + if (info->src.box.width == 0 || info->src.box.height == 0 || + info->dst.box.width == 0 || info->dst.box.height == 0) { + pipe_debug_message(&nvc0->base.debug, ERROR, + "Blit with zero-size src or dst box"); + return; + } + if (util_format_is_depth_or_stencil(info->dst.resource->format)) { if (!(info->mask & PIPE_MASK_ZS)) return; @@ -1610,6 +1617,10 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) else if (util_format_is_alpha(info->src.format)) eng3d = info->src.format != PIPE_FORMAT_A8_UNORM; + else + if (util_format_is_srgb(info->dst.format) && + util_format_get_nr_components(info->src.format) == 1) + eng3d = true; else eng3d = !nv50_2d_format_supported(info->src.format); } diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 90eae1e2829..a77f58242e3 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -715,7 +715,6 @@ static void compute_emit_cs(struct r600_context *rctx, rctx->cmd_buf_is_compute = true; } - r600_need_cs_space(rctx, 0, true); if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) { r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty); current = rctx->cs_shader_state.shader->sel->current; @@ -742,16 +741,22 @@ static void compute_emit_cs(struct r600_context *rctx, } rctx->cs_block_grid_sizes[3] = rctx->cs_block_grid_sizes[7] = 0; rctx->driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true; + + evergreen_emit_atomic_buffer_setup_count(rctx, current, combined_atomics, &atomic_used_mask); + r600_need_cs_space(rctx, 0, true, util_bitcount(atomic_used_mask)); + if (need_buf_const) { eg_setup_buffer_constants(rctx, PIPE_SHADER_COMPUTE); } r600_update_driver_const_buffers(rctx, true); - if (evergreen_emit_atomic_buffer_setup(rctx, current, combined_atomics, &atomic_used_mask)) { + evergreen_emit_atomic_buffer_setup(rctx, true, combined_atomics, atomic_used_mask); + if (atomic_used_mask) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); } - } + } else + r600_need_cs_space(rctx, 0, true, 0); /* Initialize all the compute-related registers. * diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index d3f3e227c1f..5e0e27b0f16 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -109,7 +109,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) + - R600_MAX_PFP_SYNC_ME_DWORDS, FALSE); + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0); /* Flush the caches for the first copy only. */ if (rctx->b.flags) { diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 57e81e30c27..cc41e114369 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -4030,7 +4030,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx, if (!buffers || !buffers[idx].buffer) { pipe_resource_reference(&abuf->buffer, NULL); - astate->enabled_mask &= ~(1 << i); continue; } buf = &buffers[idx]; @@ -4038,7 +4037,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx, pipe_resource_reference(&abuf->buffer, buf->buffer); abuf->buffer_offset = buf->buffer_offset; abuf->buffer_size = buf->buffer_size; - astate->enabled_mask |= (1 << i); } } @@ -4868,20 +4866,15 @@ static void cayman_write_count_to_gds(struct r600_context *rctx, radeon_emit(cs, reloc); } -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, - struct r600_pipe_shader *cs_shader, - struct r600_shader_atomic *combined_atomics, - uint8_t *atomic_used_mask_p) +void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx, + struct r600_pipe_shader *cs_shader, + struct r600_shader_atomic *combined_atomics, + uint8_t *atomic_used_mask_p) { - struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; - unsigned pkt_flags = 0; uint8_t atomic_used_mask = 0; int i, j, k; bool is_compute = cs_shader ? true : false; - if (is_compute) - pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; - for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) { uint8_t num_atomic_stage; struct r600_pipe_shader *pshader; @@ -4914,8 +4907,25 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, } } } + *atomic_used_mask_p = atomic_used_mask; +} + +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, + bool is_compute, + struct r600_shader_atomic *combined_atomics, + uint8_t atomic_used_mask) +{ + struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; + unsigned pkt_flags = 0; + uint32_t mask; + + if (is_compute) + pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE; + + mask = atomic_used_mask; + if (!mask) + return; - uint32_t mask = atomic_used_mask; while (mask) { unsigned atomic_index = u_bit_scan(&mask); struct r600_shader_atomic *atomic = &combined_atomics[atomic_index]; @@ -4927,8 +4937,6 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, else evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags); } - *atomic_used_mask_p = atomic_used_mask; - return true; } void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, @@ -4940,7 +4948,7 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; uint32_t pkt_flags = 0; uint32_t event = EVENT_TYPE_PS_DONE; - uint32_t mask = astate->enabled_mask; + uint32_t mask; uint64_t dst_offset; unsigned reloc; diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 1cfc180ad6c..a2f5f637b20 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -31,7 +31,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, - boolean count_draw_in) + boolean count_draw_in, unsigned num_atomics) { /* Flush the DMA IB if it's not empty. */ if (radeon_emitted(ctx->b.dma.cs, 0)) @@ -61,6 +61,9 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS; } + /* add atomic counters, 8 pre + 8 post per counter + 16 post if any counters */ + num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0); + /* Count in r600_suspend_queries. */ num_dw += ctx->b.num_cs_dw_queries_suspend; @@ -526,7 +529,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, r600_need_cs_space(rctx, 10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) + - 3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE); + 3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0); /* Flush the caches for the first copy only. */ if (rctx->b.flags) { diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 6204e3c557b..239005cab7f 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -446,8 +446,6 @@ struct r600_shader_state { }; struct r600_atomic_buffer_state { - uint32_t enabled_mask; - uint32_t dirty_mask; struct pipe_shader_buffer buffer[EG_MAX_ATOMIC_BUFFERS]; }; @@ -773,7 +771,7 @@ void r600_context_gfx_flush(void *context, unsigned flags, struct pipe_fence_handle **fence); void r600_begin_new_cs(struct r600_context *ctx); void r600_flush_emit(struct r600_context *ctx); -void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in); +void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in, unsigned num_atomics); void r600_emit_pfp_sync_me(struct r600_context *rctx); void r600_cp_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *dst, uint64_t dst_offset, @@ -1067,10 +1065,14 @@ void r600_delete_shader_selector(struct pipe_context *ctx, struct r600_pipe_shader_selector *sel); struct r600_shader_atomic; -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, - struct r600_pipe_shader *cs_shader, +void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx, + struct r600_pipe_shader *cs_shader, + struct r600_shader_atomic *combined_atomics, + uint8_t *atomic_used_mask_p); +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx, + bool is_compute, struct r600_shader_atomic *combined_atomics, - uint8_t *atomic_used_mask_p); + uint8_t atomic_used_mask); void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, bool is_compute, struct r600_shader_atomic *combined_atomics, diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index f7cfd0d46a6..e7c645611d7 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -854,27 +854,28 @@ static void r600_disk_cache_create(struct r600_common_screen *rscreen) if (rscreen->debug_flags & DBG_ALL_SHADERS) return; - uint32_t mesa_timestamp; - if (disk_cache_get_function_timestamp(r600_disk_cache_create, - &mesa_timestamp)) { - char *timestamp_str; - int res = -1; - - res = asprintf(×tamp_str, "%u",mesa_timestamp); - if (res != -1) { - /* These flags affect shader compilation. */ - uint64_t shader_debug_flags = - rscreen->debug_flags & - (DBG_FS_CORRECT_DERIVS_AFTER_KILL | - DBG_UNSAFE_MATH); - - rscreen->disk_shader_cache = - disk_cache_create(r600_get_family_name(rscreen), - timestamp_str, - shader_debug_flags); - free(timestamp_str); - } - } + struct mesa_sha1 ctx; + unsigned char sha1[20]; + char cache_id[20 * 2 + 1]; + + _mesa_sha1_init(&ctx); + if (!disk_cache_get_function_identifier(r600_disk_cache_create, + &ctx)) + return; + + _mesa_sha1_final(&ctx, sha1); + disk_cache_format_hex_id(cache_id, sha1, 20 * 2); + + /* These flags affect shader compilation. */ + uint64_t shader_debug_flags = + rscreen->debug_flags & + (DBG_FS_CORRECT_DERIVS_AFTER_KILL | + DBG_UNSAFE_MATH); + + rscreen->disk_shader_cache = + disk_cache_create(r600_get_family_name(rscreen), + cache_id, + shader_debug_flags); } static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen) diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 402d95838f0..e6c1b0be97c 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -2085,8 +2085,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info : (rctx->tes_shader)? rctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] : info->mode; - if (rctx->b.chip_class >= EVERGREEN) - evergreen_emit_atomic_buffer_setup(rctx, NULL, combined_atomics, &atomic_used_mask); + if (rctx->b.chip_class >= EVERGREEN) { + evergreen_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, &atomic_used_mask); + } if (index_size) { index_offset += info->start * index_size; @@ -2172,7 +2173,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info evergreen_setup_tess_constants(rctx, info, &num_patches); /* Emit states. */ - r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE); + r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE, util_bitcount(atomic_used_mask)); r600_flush_emit(rctx); mask = rctx->dirty_atoms; @@ -2180,6 +2181,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]); } + if (rctx->b.chip_class >= EVERGREEN) { + evergreen_emit_atomic_buffer_setup(rctx, false, combined_atomics, atomic_used_mask); + } + if (rctx->b.chip_class == CAYMAN) { /* Copied from radeonsi. */ unsigned primgroup_size = 128; /* recommended without a GS */ @@ -3284,7 +3289,7 @@ static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw, bool include_draw_vbo) { - r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo); + r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo, 0); } /* keep this at the end of this file, please */ diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 08db6bab04c..d08c6e5637c 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -774,8 +774,8 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, return; } - width = align(rtex->resource.b.b.width0, cl_width * 8); - height = align(rtex->resource.b.b.height0, cl_height * 8); + width = align(rtex->surface.u.legacy.level[0].nblk_x, cl_width * 8); + height = align(rtex->surface.u.legacy.level[0].nblk_y, cl_height * 8); slice_elements = (width * height) / (8 * 8); slice_bytes = slice_elements * 4; diff --git a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp index 5681fdc4425..b7d87eac9f4 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp @@ -567,7 +567,7 @@ int bc_builder::build_fetch_gds(fetch_node *n) { const fetch_op_info *fop = bc.op_ptr; unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f; unsigned mem_op = 4; - assert(fop->flags && FF_GDS); + assert(fop->flags & FF_GDS); if (bc.op == FETCH_OP_TF_WRITE) { mem_op = 5; diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index a7b828268b9..4b909f85f19 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -617,7 +617,7 @@ int bc_parser::decode_fetch_clause(cf_node* cf) { int r; unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1; - if (cf->bc.op_ptr->flags && FF_GDS) + if (cf->bc.op_ptr->flags & FF_GDS) cf->subtype = NST_GDS_CLAUSE; else cf->subtype = NST_TEX_CLAUSE; diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 923216d77f1..a7ef4252ee0 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -1003,25 +1003,35 @@ static void get_mjpeg_slice_header(struct ruvd_decoder *dec, struct pipe_mjpeg_p size++; for (i = 0; i < 2; ++i) { + int num = 0, j; + if (pic->huffman_table.load_huffman_table[i] == 0) continue; buf[size++] = 0x00 | i; memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16); size += 16; - memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12); - size += 12; + for (j = 0; j < 16; ++j) + num += pic->huffman_table.table[i].num_dc_codes[j]; + assert(num <= 12); + memcpy((buf + size), &pic->huffman_table.table[i].dc_values, num); + size += num; } for (i = 0; i < 2; ++i) { + int num = 0, j; + if (pic->huffman_table.load_huffman_table[i] == 0) continue; buf[size++] = 0x10 | i; memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16); size += 16; - memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162); - size += 162; + for (j = 0; j < 16; ++j) + num += pic->huffman_table.table[i].num_ac_codes[j]; + assert(num <= 162); + memcpy((buf + size), &pic->huffman_table.table[i].ac_values, num); + size += num; } bs = (uint16_t*)&buf[len_pos]; diff --git a/src/gallium/drivers/radeonsi/si_compute.h b/src/gallium/drivers/radeonsi/si_compute.h index 3a4cdea25ef..d0a89820ddf 100644 --- a/src/gallium/drivers/radeonsi/si_compute.h +++ b/src/gallium/drivers/radeonsi/si_compute.h @@ -29,7 +29,7 @@ #include "si_shader.h" -#define MAX_GLOBAL_BUFFERS 22 +#define MAX_GLOBAL_BUFFERS 32 struct si_compute { struct pipe_reference reference; diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index 186a785437d..abb7057f299 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -291,8 +291,12 @@ static boolean si_fence_finish(struct pipe_screen *screen, { struct radeon_winsys *rws = ((struct si_screen*)screen)->ws; struct si_multi_fence *rfence = (struct si_multi_fence *)fence; + struct si_context *sctx; int64_t abs_timeout = os_time_get_absolute_timeout(timeout); + ctx = threaded_context_unwrap_sync(ctx); + sctx = (struct si_context*)(ctx ? ctx : NULL); + if (!util_queue_fence_is_signalled(&rfence->ready)) { if (rfence->tc_token) { /* Ensure that si_flush_from_st will be called for @@ -345,49 +349,43 @@ static boolean si_fence_finish(struct pipe_screen *screen, } /* Flush the gfx IB if it hasn't been flushed yet. */ - if (ctx && rfence->gfx_unflushed.ctx) { - struct si_context *sctx; - - sctx = (struct si_context *)threaded_context_unwrap_unsync(ctx); - if (rfence->gfx_unflushed.ctx == sctx && - rfence->gfx_unflushed.ib_index == sctx->num_gfx_cs_flushes) { - /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile) - * spec says: - * - * "If the sync object being blocked upon will not be - * signaled in finite time (for example, by an associated - * fence command issued previously, but not yet flushed to - * the graphics pipeline), then ClientWaitSync may hang - * forever. To help prevent this behavior, if - * ClientWaitSync is called and all of the following are - * true: - * - * * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags, - * * sync is unsignaled when ClientWaitSync is called, - * * and the calls to ClientWaitSync and FenceSync were - * issued from the same context, - * - * then the GL will behave as if the equivalent of Flush - * were inserted immediately after the creation of sync." - * - * This means we need to flush for such fences even when we're - * not going to wait. - */ - threaded_context_unwrap_sync(ctx); - si_flush_gfx_cs(sctx, - (timeout ? 0 : PIPE_FLUSH_ASYNC) | - RADEON_FLUSH_START_NEXT_GFX_IB_NOW, - NULL); - rfence->gfx_unflushed.ctx = NULL; - - if (!timeout) - return false; + if (sctx && rfence->gfx_unflushed.ctx == sctx && + rfence->gfx_unflushed.ib_index == sctx->num_gfx_cs_flushes) { + /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile) + * spec says: + * + * "If the sync object being blocked upon will not be + * signaled in finite time (for example, by an associated + * fence command issued previously, but not yet flushed to + * the graphics pipeline), then ClientWaitSync may hang + * forever. To help prevent this behavior, if + * ClientWaitSync is called and all of the following are + * true: + * + * * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags, + * * sync is unsignaled when ClientWaitSync is called, + * * and the calls to ClientWaitSync and FenceSync were + * issued from the same context, + * + * then the GL will behave as if the equivalent of Flush + * were inserted immediately after the creation of sync." + * + * This means we need to flush for such fences even when we're + * not going to wait. + */ + si_flush_gfx_cs(sctx, + (timeout ? 0 : PIPE_FLUSH_ASYNC) | + RADEON_FLUSH_START_NEXT_GFX_IB_NOW, + NULL); + rfence->gfx_unflushed.ctx = NULL; - /* Recompute the timeout after all that. */ - if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { - int64_t time = os_time_get_nano(); - timeout = abs_timeout > time ? abs_timeout - time : 0; - } + if (!timeout) + return false; + + /* Recompute the timeout after all that. */ + if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { + int64_t time = os_time_get_nano(); + timeout = abs_timeout > time ? abs_timeout - time : 0; } } diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 9dbd4c64f2a..c0688d448b9 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -133,12 +133,13 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, if (ctx->current_saved_cs) { si_trace_emit(ctx); - si_log_hw_flush(ctx); /* Save the IB for debug contexts. */ si_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true); ctx->current_saved_cs->flushed = true; ctx->current_saved_cs->time_flush = os_time_get_nano(); + + si_log_hw_flush(ctx); } /* Flush the CS. */ @@ -146,8 +147,6 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, if (fence) ws->fence_reference(fence, ctx->last_gfx_fence); - /* This must be after cs_flush returns, since the context's API - * thread can concurrently read this value in si_fence_finish. */ ctx->num_gfx_cs_flushes++; /* Check VM faults if needed. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index cc05d2f8de3..6b36893698c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -751,42 +751,39 @@ static void si_disk_cache_create(struct si_screen *sscreen) if (sscreen->debug_flags & DBG_ALL_SHADERS) return; - uint32_t mesa_timestamp; - if (disk_cache_get_function_timestamp(si_disk_cache_create, - &mesa_timestamp)) { - char *timestamp_str; - int res = -1; - uint32_t llvm_timestamp; - - if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, - &llvm_timestamp)) { - res = asprintf(×tamp_str, "%u_%u", - mesa_timestamp, llvm_timestamp); - } + struct mesa_sha1 ctx; + unsigned char sha1[20]; + char cache_id[20 * 2 + 1]; - if (res != -1) { - /* These flags affect shader compilation. */ - #define ALL_FLAGS (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | \ - DBG(SI_SCHED) | \ - DBG(GISEL) | \ - DBG(UNSAFE_MATH) | \ - DBG(NIR)) - uint64_t shader_debug_flags = sscreen->debug_flags & - ALL_FLAGS; - - /* Add the high bits of 32-bit addresses, which affects - * how 32-bit addresses are expanded to 64 bits. - */ - STATIC_ASSERT(ALL_FLAGS <= UINT_MAX); - shader_debug_flags |= (uint64_t)sscreen->info.address32_hi << 32; - - sscreen->disk_shader_cache = - disk_cache_create(si_get_family_name(sscreen), - timestamp_str, - shader_debug_flags); - free(timestamp_str); - } - } + _mesa_sha1_init(&ctx); + + if (!disk_cache_get_function_identifier(si_disk_cache_create, &ctx) || + !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, + &ctx)) + return; + + _mesa_sha1_final(&ctx, sha1); + disk_cache_format_hex_id(cache_id, sha1, 20 * 2); + + /* These flags affect shader compilation. */ + #define ALL_FLAGS (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | \ + DBG(SI_SCHED) | \ + DBG(GISEL) | \ + DBG(UNSAFE_MATH) | \ + DBG(NIR)) + uint64_t shader_debug_flags = sscreen->debug_flags & + ALL_FLAGS; + + /* Add the high bits of 32-bit addresses, which affects + * how 32-bit addresses are expanded to 64 bits. + */ + STATIC_ASSERT(ALL_FLAGS <= UINT_MAX); + shader_debug_flags |= (uint64_t)sscreen->info.address32_hi << 32; + + sscreen->disk_shader_cache = + disk_cache_create(si_get_family_name(sscreen), + cache_id, + shader_debug_flags); } struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 405833d3ba7..0b25592093e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4388,9 +4388,12 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, ctx->i32_1, ""); LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]); - /* Signal vertex emission */ - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), - si_get_gs_wave_id(ctx)); + /* Signal vertex emission if vertex data was written. */ + if (offset) { + ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), + si_get_gs_wave_id(ctx)); + } + if (!use_kill) lp_build_endif(&if_state); } diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index c51d057967c..0d292864425 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -36,7 +36,8 @@ static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base, for (i = 0; i < TGSI_NUM_CHANNELS; i++) { LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i); - conds[i] = LLVMBuildFCmp(builder, LLVMRealOGE, value, + /* UGE because NaN shouldn't get killed */ + conds[i] = LLVMBuildFCmp(builder, LLVMRealUGE, value, ctx->ac.f32_0, ""); } @@ -505,18 +506,37 @@ static void emit_bfe(const struct lp_build_tgsi_action *action, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef bfe_sm5; - LLVMValueRef cond; - bfe_sm5 = ac_build_bfe(&ctx->ac, emit_data->args[0], - emit_data->args[1], emit_data->args[2], - emit_data->info->opcode == TGSI_OPCODE_IBFE); + if (HAVE_LLVM < 0x0700) { + LLVMValueRef bfe_sm5 = + ac_build_bfe(&ctx->ac, emit_data->args[0], + emit_data->args[1], emit_data->args[2], + emit_data->info->opcode == TGSI_OPCODE_IBFE); - /* Correct for GLSL semantics. */ - cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2], - LLVMConstInt(ctx->i32, 32, 0), ""); - emit_data->output[emit_data->chan] = - LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, ""); + /* Correct for GLSL semantics. */ + LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2], + LLVMConstInt(ctx->i32, 32, 0), ""); + emit_data->output[emit_data->chan] = + LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, ""); + } else { + /* FIXME: LLVM 7 returns incorrect result when count is 0. + * https://bugs.freedesktop.org/show_bug.cgi?id=107276 + */ + LLVMValueRef zero = ctx->i32_0; + LLVMValueRef bfe_sm5 = + ac_build_bfe(&ctx->ac, emit_data->args[0], + emit_data->args[1], emit_data->args[2], + emit_data->info->opcode == TGSI_OPCODE_IBFE); + + /* Correct for GLSL semantics. */ + LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2], + LLVMConstInt(ctx->i32, 32, 0), ""); + LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2], + zero, ""); + bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, ""); + emit_data->output[emit_data->chan] = + LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, ""); + } } /* this is ffs in C */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index d901401f0bb..4157e5ea3fc 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -383,7 +383,7 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, * Polaris supports primitive restart with WD_SWITCH_ON_EOP=0 * for points, line strips, and tri strips. */ - if (sscreen->info.max_se < 4 || + if (sscreen->info.max_se <= 2 || key->u.prim == PIPE_PRIM_POLYGON || key->u.prim == PIPE_PRIM_LINE_LOOP || key->u.prim == PIPE_PRIM_TRIANGLE_FAN || @@ -414,7 +414,7 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, wd_switch_on_eop = true; /* Required on CIK and later. */ - if (sscreen->info.max_se > 2 && !wd_switch_on_eop) + if (sscreen->info.max_se == 4 && !wd_switch_on_eop) ia_switch_on_eoi = true; /* Required by Hawaii and, for some special cases, by VI. */ @@ -429,6 +429,12 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, key->u.uses_instancing) partial_vs_wave = true; + /* This only applies to Polaris10 and later 4 SE chips. + * wd_switch_on_eop is already true on all other chips. + */ + if (!wd_switch_on_eop && key->u.primitive_restart) + partial_vs_wave = true; + /* If the WD switch is false, the IA switch must be false too. */ assert(wd_switch_on_eop || !ia_switch_on_eop); } diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index e55fd815264..bcff226a586 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -931,8 +931,8 @@ static void si_texture_get_htile_size(struct si_screen *sscreen, return; } - width = align(tex->buffer.b.b.width0, cl_width * 8); - height = align(tex->buffer.b.b.height0, cl_height * 8); + width = align(tex->surface.u.legacy.level[0].nblk_x, cl_width * 8); + height = align(tex->surface.u.legacy.level[0].nblk_y, cl_height * 8); slice_elements = (width * height) / (8 * 8); slice_bytes = slice_elements * 4; diff --git a/src/gallium/drivers/svga/meson.build b/src/gallium/drivers/svga/meson.build index 2976212fdfb..7981e2991f3 100644 --- a/src/gallium/drivers/svga/meson.build +++ b/src/gallium/drivers/svga/meson.build @@ -79,7 +79,7 @@ files_svga = files( libsvga = static_library( 'svga', - files_svga, + [files_svga, sha1_h], c_args : [c_vis_args, c_msvc_compat_args], include_directories : [ inc_src, inc_include, inc_gallium, inc_gallium_aux, diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index 2e7f1a88a0a..485403ae1ec 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -57,7 +57,6 @@ ['VHSUBPS', ['a', 'b'], 'a'], ['VPTESTC', ['a', 'b'], 'mInt32Ty'], ['VPTESTZ', ['a', 'b'], 'mInt32Ty'], - ['VFMADDPS', ['a', 'b', 'c'], 'a'], ['VPHADDD', ['a', 'b'], 'a'], ['PDEP32', ['a', 'b'], 'a'], ['RDTSC', [], 'mInt64Ty'], @@ -71,6 +70,7 @@ ['STACKRESTORE', 'stackrestore', ['a'], []], ['VMINPS', 'minnum', ['a', 'b'], ['a']], ['VMAXPS', 'maxnum', ['a', 'b'], ['a']], + ['VFMADDPS', 'fmuladd', ['a', 'b', 'c'], ['a']], ['DEBUGTRAP', 'debugtrap', [], []], ['POPCNT', 'ctpop', ['a'], ['a']], ['LOG2', 'log2', ['a'], ['a']], @@ -161,7 +161,8 @@ def parse_ir_builder(input_file): func_name == 'CreateAlignmentAssumptionHelper' or func_name == 'CreateGEP' or func_name == 'CreateLoad' or - func_name == 'CreateMaskedLoad'): + func_name == 'CreateMaskedLoad' or + func_name == 'CreateElementUnorderedAtomicMemCpy'): ignore = True # Convert CamelCase to CAMEL_CASE diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp index f89c502db7d..d5328c8e4e6 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp @@ -870,7 +870,6 @@ struct BlendJit : public Builder passes.add(createCFGSimplificationPass()); passes.add(createEarlyCSEPass()); passes.add(createInstructionCombiningPass()); - passes.add(createInstructionSimplifierPass()); passes.add(createConstantPropagationPass()); passes.add(createSCCPPass()); passes.add(createAggressiveDCEPass()); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 4116dad4430..26d8688f5e9 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -755,15 +755,8 @@ namespace SwrJit Value* Builder::FMADDPS(Value* a, Value* b, Value* c) { Value* vOut; - // use FMADs if available - if (JM()->mArch.AVX2()) - { - vOut = VFMADDPS(a, b, c); - } - else - { - vOut = FADD(FMUL(a, b), c); - } + // This maps to LLVM fmuladd intrinsic + vOut = VFMADDPS(a, b, c); return vOut; } diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index b4d326ebdcc..3ad0fabe81f 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -294,7 +294,6 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) optPasses.add(createCFGSimplificationPass()); optPasses.add(createEarlyCSEPass()); optPasses.add(createInstructionCombiningPass()); - optPasses.add(createInstructionSimplifierPass()); optPasses.add(createConstantPropagationPass()); optPasses.add(createSCCPPass()); optPasses.add(createAggressiveDCEPass()); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp index 7605823c04d..c34959d35ee 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp @@ -76,7 +76,6 @@ namespace SwrJit {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256}, {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256}, {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256}, - {"meta.intrinsic.VFMADDPS", Intrinsic::x86_fma_vfmadd_ps_256}, {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d}, {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32}, {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc}, diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp index 8f86af2a4b4..11ad36521b3 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp @@ -306,7 +306,6 @@ struct StreamOutJit : public Builder passes.add(createCFGSimplificationPass()); passes.add(createEarlyCSEPass()); passes.add(createInstructionCombiningPass()); - passes.add(createInstructionSimplifierPass()); passes.add(createConstantPropagationPass()); passes.add(createSCCPPass()); passes.add(createAggressiveDCEPass()); diff --git a/src/gallium/drivers/swr/swr_public.h b/src/gallium/drivers/swr/swr_public.h index 07ea6280cd6..b32f41fdf7b 100644 --- a/src/gallium/drivers/swr/swr_public.h +++ b/src/gallium/drivers/swr/swr_public.h @@ -37,7 +37,7 @@ extern "C" { struct pipe_screen *swr_create_screen(struct sw_winsys *winsys); // arch-specific dll entry point -PUBLIC struct pipe_screen *swr_create_screen_internal(struct sw_winsys *winsys); +struct pipe_screen *swr_create_screen_internal(struct sw_winsys *winsys); // cleanup for failed screen creation void swr_destroy_screen_internal(struct swr_screen **screen); diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index fa232b6838b..084f55dab99 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -1143,12 +1143,10 @@ swr_validate_env_options(struct swr_screen *screen) } -PUBLIC struct pipe_screen * swr_create_screen_internal(struct sw_winsys *winsys) { struct swr_screen *screen = CALLOC_STRUCT(swr_screen); - memset(screen, 0, sizeof(struct swr_screen)); if (!screen) return NULL; diff --git a/src/gallium/drivers/tegra/tegra_screen.c b/src/gallium/drivers/tegra/tegra_screen.c index 034ea271eec..73fdc632933 100644 --- a/src/gallium/drivers/tegra/tegra_screen.c +++ b/src/gallium/drivers/tegra/tegra_screen.c @@ -203,6 +203,7 @@ static int tegra_open_render_node(void) } if (strcmp(version->name, "nouveau") != 0) { + drmFreeVersion(version); close(fd); continue; } diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c index 2e743851bea..2f084792e61 100644 --- a/src/gallium/drivers/v3d/v3d_screen.c +++ b/src/gallium/drivers/v3d/v3d_screen.c @@ -585,6 +585,8 @@ v3d_get_device_info(struct v3d_screen *screen) uint32_t minor = (ident1.value >> 0) & 0xf; screen->devinfo.ver = major * 10 + minor; + screen->devinfo.vpm_size = (ident1.value >> 28 & 0xf) * 8192; + switch (screen->devinfo.ver) { case 33: case 41: diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c index 479adb70fdb..bfb4af13ceb 100644 --- a/src/gallium/drivers/v3d/v3dx_draw.c +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -306,6 +306,13 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, } } + cl_emit(&job->bcl, VCM_CACHE_SIZE, vcm) { + vcm.number_of_16_vertex_batches_for_binning = + v3d->prog.cs->prog_data.vs->vcm_cache_size; + vcm.number_of_16_vertex_batches_for_rendering = + v3d->prog.vs->prog_data.vs->vcm_cache_size; + } + cl_emit(&job->bcl, GL_SHADER_STATE, state) { state.address = cl_address(job->indirect.bo, shader_rec_offset); state.number_of_attribute_arrays = num_elements_to_emit; diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c index c58ac4b44a2..537dd1ff9ae 100644 --- a/src/gallium/drivers/v3d/v3dx_emit.c +++ b/src/gallium/drivers/v3d/v3dx_emit.c @@ -69,7 +69,9 @@ v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one) case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return V3D_BLEND_FACTOR_INV_CONST_ALPHA; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE; + return (dst_alpha_one ? + V3D_BLEND_FACTOR_ZERO : + V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE); default: unreachable("Bad blend factor"); } diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 900c0abaf20..06785516cae 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -222,6 +222,8 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, attr.coordinate_shader_vpm_offset = 0; attr.vertex_shader_vpm_offset = 0; } + + vc4_bo_unreference(&bo); } cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) { diff --git a/src/gallium/drivers/vc4/vc4_fence.c b/src/gallium/drivers/vc4/vc4_fence.c index 7071425595c..0dbfbe966b8 100644 --- a/src/gallium/drivers/vc4/vc4_fence.c +++ b/src/gallium/drivers/vc4/vc4_fence.c @@ -121,7 +121,8 @@ vc4_fence_server_sync(struct pipe_context *pctx, struct vc4_context *vc4 = vc4_context(pctx); struct vc4_fence *fence = vc4_fence(pfence); - sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd); + if (fence->fd >= 0) + sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd); } static int @@ -142,8 +143,12 @@ vc4_fence_context_init(struct vc4_context *vc4) /* Since we initialize the in_fence_fd to -1 (no wait necessary), * we also need to initialize our in_syncobj as signaled. */ - return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED, - &vc4->in_syncobj); + if (vc4->screen->has_syncobj) { + return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED, + &vc4->in_syncobj); + } else { + return 0; + } } void diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 13c3b7678b2..1f46b64005b 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -38,6 +38,7 @@ #include "vc4_context.h" #include "vc4_qpu.h" #include "vc4_qir.h" +#include "mesa/state_tracker/st_glsl_types.h" static struct qreg ntq_get_src(struct vc4_compile *c, nir_src src, int i); @@ -50,6 +51,12 @@ type_size(const struct glsl_type *type) return glsl_count_attribute_slots(type, false); } +static int +uniforms_type_size(const struct glsl_type *type) +{ + return st_glsl_storage_type_size(type, false); +} + static void resize_qreg_array(struct vc4_compile *c, struct qreg **regs, @@ -679,25 +686,45 @@ ntq_fceil(struct vc4_compile *c, struct qreg src) return qir_MOV(c, result); } +static struct qreg +ntq_shrink_sincos_input_range(struct vc4_compile *c, struct qreg x) +{ + /* Since we're using a Taylor approximation, we want to have a small + * number of coefficients and take advantage of sin/cos repeating + * every 2pi. We keep our x as close to 0 as we can, since the series + * will be less accurate as |x| increases. (Also, be careful of + * shifting the input x value to be tricky with sin/cos relations, + * because getting accurate values for x==0 is very important for SDL + * rendering) + */ + struct qreg scaled_x = + qir_FMUL(c, x, + qir_uniform_f(c, 1.0f / (M_PI * 2.0f))); + /* Note: FTOI truncates toward 0. */ + struct qreg x_frac = qir_FSUB(c, scaled_x, + qir_ITOF(c, qir_FTOI(c, scaled_x))); + /* Map [0.5, 1] to [-0.5, 0] */ + qir_SF(c, qir_FSUB(c, x_frac, qir_uniform_f(c, 0.5))); + qir_FSUB_dest(c, x_frac, x_frac, qir_uniform_f(c, 1.0))->cond = QPU_COND_NC; + /* Map [-1, -0.5] to [0, 0.5] */ + qir_SF(c, qir_FADD(c, x_frac, qir_uniform_f(c, 0.5))); + qir_FADD_dest(c, x_frac, x_frac, qir_uniform_f(c, 1.0))->cond = QPU_COND_NS; + + return x_frac; +} + static struct qreg ntq_fsin(struct vc4_compile *c, struct qreg src) { float coeff[] = { - -2.0 * M_PI, - pow(2.0 * M_PI, 3) / (3 * 2 * 1), - -pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1), - pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1), - -pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1), + 2.0 * M_PI, + -pow(2.0 * M_PI, 3) / (3 * 2 * 1), + pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1), + -pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1), + pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1), }; - struct qreg scaled_x = - qir_FMUL(c, - src, - qir_uniform_f(c, 1.0 / (M_PI * 2.0))); - - struct qreg x = qir_FADD(c, - ntq_ffract(c, scaled_x), - qir_uniform_f(c, -0.5)); + struct qreg x = ntq_shrink_sincos_input_range(c, src); struct qreg x2 = qir_FMUL(c, x, x); struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0])); for (int i = 1; i < ARRAY_SIZE(coeff); i++) { @@ -715,21 +742,15 @@ static struct qreg ntq_fcos(struct vc4_compile *c, struct qreg src) { float coeff[] = { - -1.0f, - pow(2.0 * M_PI, 2) / (2 * 1), - -pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1), - pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1), - -pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1), - pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1), + 1.0f, + -pow(2.0 * M_PI, 2) / (2 * 1), + pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1), + -pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1), + pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1), + -pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1), }; - struct qreg scaled_x = - qir_FMUL(c, src, - qir_uniform_f(c, 1.0f / (M_PI * 2.0f))); - struct qreg x_frac = qir_FADD(c, - ntq_ffract(c, scaled_x), - qir_uniform_f(c, -0.5)); - + struct qreg x_frac = ntq_shrink_sincos_input_range(c, src); struct qreg sum = qir_uniform_f(c, coeff[0]); struct qreg x2 = qir_FMUL(c, x_frac, x_frac); struct qreg x = x2; /* Current x^2, x^4, or x^6 */ @@ -1685,7 +1706,7 @@ static void ntq_setup_uniforms(struct vc4_compile *c) { nir_foreach_variable(var, &c->s->uniforms) { - uint32_t vec4_count = type_size(var->type); + uint32_t vec4_count = uniforms_type_size(var->type); unsigned vec4_size = 4 * sizeof(float); declare_uniform_range(c, var->data.driver_location * vec4_size, @@ -2469,9 +2490,13 @@ vc4_shader_state_create(struct pipe_context *pctx, */ s = cso->ir.nir; - NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, + NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform, + type_size, (nir_lower_io_options)0); - } else { + NIR_PASS_V(s, nir_lower_io, nir_var_uniform, + uniforms_type_size, + (nir_lower_io_options)0); + } else { assert(cso->type == PIPE_SHADER_IR_TGSI); if (vc4_debug & VC4_DEBUG_TGSI) { diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 408a9e0af2a..1e4657a7922 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -614,7 +614,9 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, } so->texture_p0 = - (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) | + (VC4_SET_FIELD((rsc->slices[0].offset + + cso->u.tex.first_layer * + rsc->cube_map_stride) >> 12, VC4_TEX_P0_OFFSET) | VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) | VC4_SET_FIELD(so->force_first_level ? cso->u.tex.last_level : diff --git a/src/gallium/drivers/virgl/virgl_encode.c b/src/gallium/drivers/virgl/virgl_encode.c index 670c5fe6c3d..9aece384288 100644 --- a/src/gallium/drivers/virgl/virgl_encode.c +++ b/src/gallium/drivers/virgl/virgl_encode.c @@ -284,7 +284,7 @@ int virgl_encode_shader_state(struct virgl_context *ctx, while (left_bytes) { uint32_t length, offlen; int hdr_len = base_hdr_size + (first_pass ? strm_hdr_size : 0); - if (ctx->cbuf->cdw + hdr_len + 1 > VIRGL_MAX_CMDBUF_DWORDS) + if (ctx->cbuf->cdw + hdr_len + 1 >= VIRGL_MAX_CMDBUF_DWORDS) ctx->base.flush(&ctx->base, NULL, 0); thispass = (VIRGL_MAX_CMDBUF_DWORDS - ctx->cbuf->cdw - hdr_len - 1) * 4; diff --git a/src/gallium/drivers/virgl/virgl_texture.c b/src/gallium/drivers/virgl/virgl_texture.c index 150a5ebd8c7..7ac794623da 100644 --- a/src/gallium/drivers/virgl/virgl_texture.c +++ b/src/gallium/drivers/virgl/virgl_texture.c @@ -177,6 +177,8 @@ static void *virgl_texture_transfer_map(struct pipe_context *ctx, /* we want to do a resolve blit into the temporary */ hw_res = trans->resolve_tmp->hw_res; offset = 0; + trans->base.stride = ((struct virgl_texture*)trans->resolve_tmp)->stride[level]; + trans->base.layer_stride = trans->base.stride * nblocksy; } else { offset = vrend_get_tex_image_offset(vtex, level, box->z); diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h index 5b72c0afc99..03377a3025a 100644 --- a/src/gallium/include/state_tracker/st_api.h +++ b/src/gallium/include/state_tracker/st_api.h @@ -190,6 +190,8 @@ struct st_egl_image */ struct st_visual { + bool no_config; + /** * Available buffers. Bitfield of ST_ATTACHMENT_*_MASK bits. */ diff --git a/src/gallium/state_trackers/clover/meson.build b/src/gallium/state_trackers/clover/meson.build index d1497e657ea..1a09d8f2ca9 100644 --- a/src/gallium/state_trackers/clover/meson.build +++ b/src/gallium/state_trackers/clover/meson.build @@ -115,7 +115,7 @@ clover_files = files( libclover = static_library( 'clover', - clover_files, + [clover_files, sha1_h], include_directories : clover_incs, cpp_args : [clover_cpp_args, cpp_vis_args], link_with : [libcltgsi, libclllvm], diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index 2ac32205d9a..b89726ceac2 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -160,6 +160,12 @@ static int convert_fourcc(int format, int *dri_components_p) format = __DRI_IMAGE_FORMAT_R8; dri_components = __DRI_IMAGE_COMPONENTS_Y_UV; break; + case __DRI_IMAGE_FOURCC_P010: + case __DRI_IMAGE_FOURCC_P012: + case __DRI_IMAGE_FOURCC_P016: + format = __DRI_IMAGE_FORMAT_R16; + dri_components = __DRI_IMAGE_COMPONENTS_Y_UV; + break; default: return -1; } @@ -1485,6 +1491,12 @@ dri2_query_dma_buf_formats(__DRIscreen *_screen, int max, int *formats, for (i = 0, j = 0; (i < ARRAY_SIZE(fourcc_formats)) && (j < max || max == 0); i++) { + /* The sRGB format is not a real FourCC as defined by drm_fourcc.h, so we + * must not leak it out to clients. + */ + if (fourcc_formats[i] == __DRI_IMAGE_FOURCC_SARGB8888) + continue; + if (pscreen->is_format_supported(pscreen, fourcc_to_pipe_format( fourcc_formats[i]), @@ -2213,8 +2225,10 @@ dri_kms_init_screen(__DRIscreen * sPriv) dri2ImageExtension.createImageFromFds = dri2_from_fds; dri2ImageExtension.createImageFromDmaBufs = dri2_from_dma_bufs; dri2ImageExtension.createImageFromDmaBufs2 = dri2_from_dma_bufs2; - dri2ImageExtension.queryDmaBufFormats = dri2_query_dma_buf_formats; - dri2ImageExtension.queryDmaBufModifiers = dri2_query_dma_buf_modifiers; + if (pscreen->query_dmabuf_modifiers) { + dri2ImageExtension.queryDmaBufFormats = dri2_query_dma_buf_formats; + dri2ImageExtension.queryDmaBufModifiers = dri2_query_dma_buf_modifiers; + } } sPriv->extensions = dri_screen_extensions; diff --git a/src/gallium/state_trackers/dri/dri_context.c b/src/gallium/state_trackers/dri/dri_context.c index fb307337a90..af9e3325f98 100644 --- a/src/gallium/state_trackers/dri/dri_context.c +++ b/src/gallium/state_trackers/dri/dri_context.c @@ -62,6 +62,7 @@ dri_create_context(gl_api api, const struct gl_config * visual, __DRIVER_CONTEXT_ATTRIB_RELEASE_BEHAVIOR; const __DRIbackgroundCallableExtension *backgroundCallable = screen->sPriv->dri2.backgroundCallable; + const struct driOptionCache *optionCache = &screen->dev->option_cache; if (screen->has_reset_status_query) { allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS; @@ -88,8 +89,13 @@ dri_create_context(gl_api api, const struct gl_config * visual, break; case API_OPENGL_COMPAT: case API_OPENGL_CORE: - attribs.profile = api == API_OPENGL_COMPAT ? ST_PROFILE_DEFAULT - : ST_PROFILE_OPENGL_CORE; + if (driQueryOptionb(optionCache, "force_compat_profile")) { + attribs.profile = ST_PROFILE_DEFAULT; + } else { + attribs.profile = api == API_OPENGL_COMPAT ? ST_PROFILE_DEFAULT + : ST_PROFILE_OPENGL_CORE; + } + attribs.major = ctx_config->major_version; attribs.minor = ctx_config->minor_version; diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index fe4e39b3f93..a0dcdb53dd2 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -308,8 +308,10 @@ dri_fill_st_visual(struct st_visual *stvis, { memset(stvis, 0, sizeof(*stvis)); - if (!mode) + if (!mode) { + stvis->no_config = true; return; + } /* Deduce the color format. */ switch (mode->redMask) { diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 150f5e3e05e..61eb5d9a45f 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -784,6 +784,10 @@ NineDevice9_SetCursorPosition( struct NineDevice9 *This, DBG("This=%p X=%d Y=%d Flags=%d\n", This, X, Y, Flags); + if (This->cursor.pos.x == X && + This->cursor.pos.y == Y) + return; + This->cursor.pos.x = X; This->cursor.pos.y = Y; diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 7db07d8f693..f9e6b962a75 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -483,7 +483,7 @@ struct shader_translator struct ureg_dst a0; struct ureg_dst tS[8]; /* texture stage registers */ struct ureg_dst tdst; /* scratch dst if we need extra modifiers */ - struct ureg_dst t[5]; /* scratch TEMPs */ + struct ureg_dst t[8]; /* scratch TEMPs */ struct ureg_src vC[2]; /* PS color in */ struct ureg_src vT[8]; /* PS texcoord in */ struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */ @@ -2273,6 +2273,18 @@ DECL_SPECIAL(POW) return D3D_OK; } +DECL_SPECIAL(RCP) +{ + struct ureg_program *ureg = tx->ureg; + struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); + struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); + struct ureg_dst tmp = tx_scratch(tx); + ureg_RCP(ureg, tmp, src); + ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp)); + ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), ureg_src(tmp)); + return D3D_OK; +} + DECL_SPECIAL(RSQ) { struct ureg_program *ureg = tx->ureg; @@ -2909,7 +2921,7 @@ static const struct sm1_op_info inst_table[] = _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */ _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */ _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */ - _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */ + _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */ _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */ _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */ _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */ diff --git a/src/gallium/state_trackers/nine/threadpool.c b/src/gallium/state_trackers/nine/threadpool.c index cc62fd25799..19721aab2dd 100644 --- a/src/gallium/state_trackers/nine/threadpool.c +++ b/src/gallium/state_trackers/nine/threadpool.c @@ -37,6 +37,7 @@ #include "os/os_thread.h" #include "threadpool.h" +/* POSIX thread function */ static void * threadpool_worker(void *data) { @@ -76,6 +77,15 @@ threadpool_worker(void *data) return NULL; } +/* Windows thread function */ +static DWORD NINE_WINAPI +wthreadpool_worker(void *data) +{ + threadpool_worker(data); + + return 0; +} + struct threadpool * _mesa_threadpool_create(struct NineSwapChain9 *swapchain) { @@ -87,7 +97,9 @@ _mesa_threadpool_create(struct NineSwapChain9 *swapchain) pthread_mutex_init(&pool->m, NULL); pthread_cond_init(&pool->new_work, NULL); - pool->wthread = NineSwapChain9_CreateThread(swapchain, threadpool_worker, pool); + /* This uses WINE's CreateThread, so the thread function needs to use + * the Windows ABI */ + pool->wthread = NineSwapChain9_CreateThread(swapchain, wthreadpool_worker, pool); if (!pool->wthread) { /* using pthread as fallback */ pthread_create(&pool->pthread, NULL, threadpool_worker, pool); diff --git a/src/gallium/state_trackers/va/image.c b/src/gallium/state_trackers/va/image.c index 3f892c9842c..807fc832c7f 100644 --- a/src/gallium/state_trackers/va/image.c +++ b/src/gallium/state_trackers/va/image.c @@ -353,6 +353,23 @@ vlVaGetImage(VADriverContextP ctx, VASurfaceID surface, int x, int y, return VA_STATUS_ERROR_INVALID_IMAGE; } + if (x < 0 || y < 0) { + mtx_unlock(&drv->mutex); + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + + if (x + width > surf->templat.width || + y + height > surf->templat.height) { + mtx_unlock(&drv->mutex); + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + + if (width > vaimage->width || + height > vaimage->height) { + mtx_unlock(&drv->mutex); + return VA_STATUS_ERROR_INVALID_PARAMETER; + } + img_buf = handle_table_get(drv->htab, vaimage->buf); if (!img_buf) { mtx_unlock(&drv->mutex); @@ -400,11 +417,19 @@ vlVaGetImage(VADriverContextP ctx, VASurfaceID surface, int x, int y, } for (i = 0; i < vaimage->num_planes; i++) { - unsigned width, height; + unsigned box_w = align(width, 2); + unsigned box_h = align(height, 2); + unsigned box_x = x & ~1; + unsigned box_y = y & ~1; if (!views[i]) continue; - vlVaVideoSurfaceSize(surf, i, &width, &height); + vl_video_buffer_adjust_size(&box_w, &box_h, i, + surf->templat.chroma_format, + surf->templat.interlaced); + vl_video_buffer_adjust_size(&box_x, &box_y, i, + surf->templat.chroma_format, + surf->templat.interlaced); for (j = 0; j < views[i]->texture->array_size; ++j) { - struct pipe_box box = {0, 0, j, width, height, 1}; + struct pipe_box box = {box_x, box_y, j, box_w, box_h, 1}; struct pipe_transfer *transfer; uint8_t *map; map = drv->pipe->transfer_map(drv->pipe, views[i]->texture, 0, diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c index cc26efe1c1a..d69313932c8 100644 --- a/src/gallium/state_trackers/va/surface.c +++ b/src/gallium/state_trackers/va/surface.c @@ -598,10 +598,8 @@ surface_from_external_memory(VADriverContextP ctx, vlVaSurface *surface, return VA_STATUS_SUCCESS; fail: - for (i = 0; i < VL_NUM_COMPONENTS; i++) { - if (resources[i]) - pscreen->resource_destroy(pscreen, resources[i]); - } + for (i = 0; i < VL_NUM_COMPONENTS; i++) + pipe_resource_reference(&resources[i], NULL); return result; } diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c index 9c5bd8a15b2..507a267b5fe 100644 --- a/src/gallium/targets/d3dadapter9/drm.c +++ b/src/gallium/targets/d3dadapter9/drm.c @@ -107,7 +107,7 @@ drm_destroy( struct d3dadapter9_context *ctx ) if (drm->dev) pipe_loader_release(&drm->dev, 1); - close(drm->fd); + /* The pipe loader takes ownership of the fd */ FREE(ctx); } diff --git a/src/gallium/targets/d3dadapter9/meson.build b/src/gallium/targets/d3dadapter9/meson.build index bd05b4f9692..bc72b1110a0 100644 --- a/src/gallium/targets/d3dadapter9/meson.build +++ b/src/gallium/targets/d3dadapter9/meson.build @@ -53,7 +53,7 @@ libgallium_nine = shared_library( libswkmsdri, ], dependencies : [ - dep_selinux, dep_expat, dep_libdrm, dep_llvm, + dep_selinux, dep_expat, dep_libdrm, dep_llvm, dep_thread, driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau, driver_i915, driver_svga, ], diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index 9c43fa1e8fd..83f439071f8 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -43,9 +43,17 @@ LOCAL_SHARED_LIBRARIES := \ libbacktrace \ libdl \ libglapi \ - libexpat \ libz +# If Android version >=8 MESA should static link libexpat else should dynamic link +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +LOCAL_STATIC_LIBRARIES := \ + libexpat +else +LOCAL_SHARED_LIBRARIES += \ + libexpat +endif + $(foreach d, $(MESA_BUILD_GALLIUM), $(eval LOCAL_CFLAGS += $(patsubst HAVE_%,-D%,$(d)))) # sort GALLIUM_LIBS to remove any duplicates diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c index fda1ba7870e..20cfc86ebe0 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c @@ -257,8 +257,8 @@ static void si_compute_cmask(const struct radeon_info *info, unsigned base_align = num_pipes * pipe_interleave_bytes; - unsigned width = align(config->info.width, cl_width*8); - unsigned height = align(config->info.height, cl_height*8); + unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8); + unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8); unsigned slice_elements = (width * height) / (8*8); /* Each element of CMASK is a nibble. */ diff --git a/src/gallium/winsys/sw/dri/dri_sw_winsys.c b/src/gallium/winsys/sw/dri/dri_sw_winsys.c index 40007200a5d..d519bcfedd3 100644 --- a/src/gallium/winsys/sw/dri/dri_sw_winsys.c +++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.c @@ -26,8 +26,12 @@ * **************************************************************************/ +#if !defined(ANDROID) || ANDROID_API_LEVEL >= 26 +/* Android's libc began supporting shm in Oreo */ +#define HAVE_SHM #include #include +#endif #include "pipe/p_compiler.h" #include "pipe/p_format.h" @@ -83,6 +87,7 @@ dri_sw_is_displaytarget_format_supported( struct sw_winsys *ws, return TRUE; } +#ifdef HAVE_SHM static char * alloc_shm(struct dri_sw_displaytarget *dri_sw_dt, unsigned size) { @@ -101,6 +106,7 @@ alloc_shm(struct dri_sw_displaytarget *dri_sw_dt, unsigned size) return addr; } +#endif static struct sw_displaytarget * dri_sw_displaytarget_create(struct sw_winsys *winsys, @@ -131,8 +137,11 @@ dri_sw_displaytarget_create(struct sw_winsys *winsys, size = dri_sw_dt->stride * nblocksy; dri_sw_dt->shmid = -1; + +#ifdef HAVE_SHM if (ws->lf->put_image_shm) dri_sw_dt->data = alloc_shm(dri_sw_dt, size); +#endif if(!dri_sw_dt->data) dri_sw_dt->data = align_malloc(size, alignment); @@ -156,8 +165,10 @@ dri_sw_displaytarget_destroy(struct sw_winsys *ws, struct dri_sw_displaytarget *dri_sw_dt = dri_sw_displaytarget(dt); if (dri_sw_dt->shmid >= 0) { +#ifdef HAVE_SHM shmdt(dri_sw_dt->data); shmctl(dri_sw_dt->shmid, IPC_RMID, 0); +#endif } else { align_free(dri_sw_dt->data); } diff --git a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c index 3fe1b1a7313..9564d9424b1 100644 --- a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c +++ b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c @@ -176,6 +176,8 @@ kms_sw_displaytarget_create(struct sw_winsys *ws, list_inithead(&kms_sw_dt->planes); kms_sw_dt->ref_count = 1; + kms_sw_dt->mapped = MAP_FAILED; + kms_sw_dt->ro_mapped = MAP_FAILED; kms_sw_dt->format = format; @@ -262,7 +264,7 @@ kms_sw_displaytarget_map(struct sw_winsys *ws, prot = (flags == PIPE_TRANSFER_READ) ? PROT_READ : (PROT_READ | PROT_WRITE); void **ptr = (flags == PIPE_TRANSFER_READ) ? &kms_sw_dt->ro_mapped : &kms_sw_dt->mapped; - if (!*ptr) { + if (*ptr == MAP_FAILED) { void *tmp = mmap(0, kms_sw_dt->size, prot, MAP_SHARED, kms_sw->fd, map_req.offset); if (tmp == MAP_FAILED) @@ -332,6 +334,8 @@ kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd, FREE(kms_sw_dt); return NULL; } + kms_sw_dt->mapped = MAP_FAILED; + kms_sw_dt->ro_mapped = MAP_FAILED; kms_sw_dt->size = lseek_ret; kms_sw_dt->ref_count = 1; kms_sw_dt->handle = handle; @@ -368,10 +372,14 @@ kms_sw_displaytarget_unmap(struct sw_winsys *ws, DEBUG_PRINT("KMS-DEBUG: unmapped buffer %u (was %p)\n", kms_sw_dt->handle, kms_sw_dt->mapped); DEBUG_PRINT("KMS-DEBUG: unmapped buffer %u (was %p)\n", kms_sw_dt->handle, kms_sw_dt->ro_mapped); - munmap(kms_sw_dt->mapped, kms_sw_dt->size); - kms_sw_dt->mapped = NULL; - munmap(kms_sw_dt->ro_mapped, kms_sw_dt->size); - kms_sw_dt->ro_mapped = NULL; + if (kms_sw_dt->mapped != MAP_FAILED) { + munmap(kms_sw_dt->mapped, kms_sw_dt->size); + kms_sw_dt->mapped = MAP_FAILED; + } + if (kms_sw_dt->ro_mapped != MAP_FAILED) { + munmap(kms_sw_dt->ro_mapped, kms_sw_dt->size); + kms_sw_dt->ro_mapped = MAP_FAILED; + } } static struct sw_displaytarget * diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c index aad6430c417..d55e4c7126a 100644 --- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c @@ -313,7 +313,7 @@ virgl_drm_winsys_resource_cache_create(struct virgl_winsys *qws, struct virgl_hw_res *res, *curr_res; struct list_head *curr, *next; int64_t now; - int ret; + int ret = 0; /* only store binds for vertex/index/const buffers */ if (bind != VIRGL_BIND_CONSTANT_BUFFER && bind != VIRGL_BIND_INDEX_BUFFER && @@ -617,13 +617,26 @@ static void virgl_drm_add_res(struct virgl_drm_winsys *qdws, { unsigned hash = res->res_handle & (sizeof(cbuf->is_handle_added)-1); - if (cbuf->cres > cbuf->nres) { - cbuf->nres += 256; - cbuf->res_bo = realloc(cbuf->res_bo, cbuf->nres * sizeof(struct virgl_hw_buf*)); - if (!cbuf->res_bo) { - fprintf(stderr,"failure to add relocation %d, %d\n", cbuf->cres, cbuf->nres); + if (cbuf->cres >= cbuf->nres) { + unsigned new_nres = cbuf->nres + 256; + void *new_ptr = REALLOC(cbuf->res_bo, + cbuf->nres * sizeof(struct virgl_hw_buf*), + new_nres * sizeof(struct virgl_hw_buf*)); + if (!new_ptr) { + fprintf(stderr,"failure to add relocation %d, %d\n", cbuf->cres, new_nres); + return; + } + cbuf->res_bo = new_ptr; + + new_ptr = REALLOC(cbuf->res_hlist, + cbuf->nres * sizeof(uint32_t), + new_nres * sizeof(uint32_t)); + if (!new_ptr) { + fprintf(stderr,"failure to add hlist relocation %d, %d\n", cbuf->cres, cbuf->nres); return; } + cbuf->res_hlist = new_ptr; + cbuf->nres = new_nres; } cbuf->res_bo[cbuf->cres] = NULL; diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c index d25f9a3bd9e..21349205143 100644 --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c @@ -243,8 +243,10 @@ int virgl_vtest_send_transfer_cmd(struct virgl_vtest_winsys *vws, vtest_hdr[VTEST_CMD_LEN] = VCMD_TRANSFER_HDR_SIZE; vtest_hdr[VTEST_CMD_ID] = vcmd; + /* The host expects the size in dwords so calculate the rounded up + * value here. */ if (vcmd == VCMD_TRANSFER_PUT) - vtest_hdr[VTEST_CMD_LEN] += data_size + 3 / 4; + vtest_hdr[VTEST_CMD_LEN] += (data_size + 3) / 4; cmd[0] = handle; cmd[1] = level; diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c index 9a96c6eb83f..d1fd6050a71 100644 --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c @@ -536,7 +536,7 @@ virgl_cs_create_fence(struct virgl_winsys *vws) res = virgl_vtest_winsys_resource_cache_create(vws, PIPE_BUFFER, PIPE_FORMAT_R8_UNORM, - PIPE_BIND_CUSTOM, + VIRGL_BIND_CUSTOM, 8, 1, 1, 0, 0, 0, 8); return (struct pipe_fence_handle *)res; diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am index 5233257fb40..8f9d80c9f41 100644 --- a/src/glx/Makefile.am +++ b/src/glx/Makefile.am @@ -19,9 +19,6 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -if HAVE_SHARED_GLAPI -SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la -endif SUBDIRS = @@ -181,7 +178,7 @@ GL_LIBS = \ $(LIBDRM_LIBS) \ libglx.la \ $(top_builddir)/src/mapi/glapi/libglapi.la \ - $(SHARED_GLAPI_LIB) \ + $(top_builddir)/src/mapi/shared-glapi/libglapi.la \ $(GL_LIB_DEPS) GL_LDFLAGS = \ diff --git a/src/glx/apple/apple_glx_log.c b/src/glx/apple/apple_glx_log.c index a3f446c26f2..ea39d30954e 100644 --- a/src/glx/apple/apple_glx_log.c +++ b/src/glx/apple/apple_glx_log.c @@ -97,6 +97,7 @@ void _apple_glx_vlog(int level, const char *file, const char *function, fprintf(stderr, "%-9s %24s:%-4d %s(%"PRIu64"): ", _asl_level_string(level), file, line, function, thread); vfprintf(stderr, fmt, args2); + va_end(args2); } msg = asl_new(ASL_TYPE_MSG); diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c index a575862670f..e85a8c92846 100644 --- a/src/glx/glxextensions.c +++ b/src/glx/glxextensions.c @@ -152,7 +152,7 @@ static const struct extension_info known_glx_extensions[] = { { GLX(ATI_pixel_format_float), VER(0,0), N, N, N, N }, { GLX(INTEL_swap_event), VER(0,0), Y, N, N, N }, { GLX(MESA_copy_sub_buffer), VER(0,0), Y, N, N, N }, - { GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N }, + { GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, N, Y }, { GLX(MESA_query_renderer), VER(0,0), Y, N, N, Y }, { GLX(MESA_swap_control), VER(0,0), Y, N, N, Y }, { GLX(NV_float_buffer), VER(0,0), N, N, N, N }, diff --git a/src/intel/Android.common.mk b/src/intel/Android.common.mk index 12cea6e5472..12bd8947e2e 100644 --- a/src/intel/Android.common.mk +++ b/src/intel/Android.common.mk @@ -38,7 +38,17 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa -LOCAL_SHARED_LIBRARIES := libexpat libz +LOCAL_SHARED_LIBRARIES := libz + +# If Android version >=8 MESA should static link libexpat else should dynamic link +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +LOCAL_STATIC_LIBRARIES := \ + libexpat +LOCAL_HEADER_LIBRARIES += liblog_headers +else +LOCAL_SHARED_LIBRARIES += \ + libexpat +endif LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_genxml diff --git a/src/intel/Android.compiler.mk b/src/intel/Android.compiler.mk index c2b01221dfc..41af7b20b9c 100644 --- a/src/intel/Android.compiler.mk +++ b/src/intel/Android.compiler.mk @@ -28,7 +28,7 @@ # --------------------------------------- include $(CLEAR_VARS) - +LOCAL_CFLAGS += -Wno-error LOCAL_MODULE := libmesa_intel_compiler LOCAL_MODULE_CLASS := STATIC_LIBRARIES diff --git a/src/intel/Android.dev.mk b/src/intel/Android.dev.mk index cd2ed66a176..3011ee232ed 100644 --- a/src/intel/Android.dev.mk +++ b/src/intel/Android.dev.mk @@ -33,5 +33,8 @@ LOCAL_C_INCLUDES := $(MESA_TOP)/include/drm-uapi LOCAL_SRC_FILES := $(DEV_FILES) +LOCAL_CFLAGS := \ + -Wno-gnu-variable-sized-type-not-at-end + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/intel/Android.vulkan.mk b/src/intel/Android.vulkan.mk index 09dc22875a1..8dc20149784 100644 --- a/src/intel/Android.vulkan.mk +++ b/src/intel/Android.vulkan.mk @@ -38,7 +38,10 @@ VULKAN_COMMON_INCLUDES := \ $(MESA_TOP)/src/intel \ $(MESA_TOP)/include/drm-uapi \ $(MESA_TOP)/src/intel/vulkan \ - frameworks/native/vulkan/include + frameworks/native/vulkan/include \ + frameworks/native/libs/nativebase/include \ + frameworks/native/libs/nativewindow/include \ + frameworks/native/libs/arect/include # libmesa_anv_entrypoints with header and dummy.c # @@ -74,6 +77,8 @@ LOCAL_EXPORT_C_INCLUDE_DIRS := \ LOCAL_SHARED_LIBRARIES := libdrm +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -107,6 +112,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -120,13 +127,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_SRC_FILES := $(VULKAN_GEN75_FILES) LOCAL_CFLAGS := -DGEN_VERSIONx10=75 - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers LOCAL_C_INCLUDES := $(ANV_INCLUDES) LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -140,13 +149,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_SRC_FILES := $(VULKAN_GEN8_FILES) LOCAL_CFLAGS := -DGEN_VERSIONx10=80 - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers LOCAL_C_INCLUDES := $(ANV_INCLUDES) LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -160,13 +171,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_SRC_FILES := $(VULKAN_GEN9_FILES) LOCAL_CFLAGS := -DGEN_VERSIONx10=90 - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers LOCAL_C_INCLUDES := $(ANV_INCLUDES) LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -180,13 +193,15 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_SRC_FILES := $(VULKAN_GEN10_FILES) LOCAL_CFLAGS := -DGEN_VERSIONx10=100 - +LOCAL_HEADER_LIBRARIES += libcutils_headers libsystem_headers LOCAL_C_INCLUDES := $(ANV_INCLUDES) LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -207,6 +222,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_anv_entrypoints libmesa_genxml LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -217,7 +234,7 @@ include $(BUILD_STATIC_LIBRARY) include $(CLEAR_VARS) LOCAL_MODULE := libmesa_vulkan_common LOCAL_MODULE_CLASS := STATIC_LIBRARIES - +LOCAL_CFLAGS += -Wno-error intermediates := $(call local-generated-sources-dir) LOCAL_SRC_FILES := $(VULKAN_FILES) @@ -260,6 +277,8 @@ $(intermediates)/vulkan/anv_extensions.h: LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) @@ -309,5 +328,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \ LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) libz libsync liblog +LOCAL_HEADER_LIBRARIES += libcutils_headers libhardware_headers + include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am index b00cc8cc2cb..00624084e6f 100644 --- a/src/intel/Makefile.tools.am +++ b/src/intel/Makefile.tools.am @@ -21,7 +21,9 @@ noinst_PROGRAMS += \ tools/aubinator \ - tools/aubinator_error_decode + tools/aubinator_error_decode \ + tools/error2aub + tools_aubinator_SOURCES = \ tools/aubinator.c \ @@ -59,3 +61,23 @@ tools_aubinator_error_decode_LDADD = \ tools_aubinator_error_decode_CFLAGS = \ $(AM_CFLAGS) \ $(ZLIB_CFLAGS) + + +tools_error2aub_SOURCES = \ + tools/gen_context.h \ + tools/gen8_context.h \ + tools/gen10_context.h \ + tools/aub_write.h \ + tools/aub_write.c \ + tools/error2aub.c + +tools_error2aub_CFLAGS = \ + $(AM_CFLAGS) \ + $(ZLIB_CFLAGS) + +tools_error2aub_LDADD = \ + dev/libintel_dev.la \ + $(PTHREAD_LIBS) \ + $(DLOPEN_LIBS) \ + $(ZLIB_LIBS) \ + -lm diff --git a/src/intel/Makefile.vulkan.am b/src/intel/Makefile.vulkan.am index 9555d98095b..d5112633b5c 100644 --- a/src/intel/Makefile.vulkan.am +++ b/src/intel/Makefile.vulkan.am @@ -104,7 +104,7 @@ noinst_LTLIBRARIES += $(VULKAN_PER_GEN_LIBS) VULKAN_CFLAGS = \ $(AM_CFLAGS) \ - -msse2 + -msse2 -mstackrealign VULKAN_CPPFLAGS = \ -I$(top_srcdir)/src/compiler \ diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c index c36ec703b18..ffe26076e16 100644 --- a/src/intel/blorp/blorp.c +++ b/src/intel/blorp/blorp.c @@ -75,18 +75,6 @@ brw_blorp_surface_info_init(struct blorp_context *blorp, if (format == ISL_FORMAT_UNSUPPORTED) format = surf->surf->format; - if (format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) { - /* Unfortunately, ISL_FORMAT_R24_UNORM_X8_TYPELESS it isn't supported as - * a render target, which would prevent us from blitting to 24-bit - * depth. The miptree consists of 32 bits per pixel, arranged as 24-bit - * depth values interleaved with 8 "don't care" bits. Since depth - * values don't require any blending, it doesn't matter how we interpret - * the bit pattern as long as we copy the right amount of data, so just - * map it as 8-bit BGRA. - */ - format = ISL_FORMAT_B8G8R8A8_UNORM; - } - info->surf = *surf->surf; info->addr = surf->addr; diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index c85ec8543a9..7cc580abd06 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -776,6 +776,14 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos, * grid of samples with in a pixel. Sample number layout shows the * rectangular grid of samples roughly corresponding to the real sample * locations with in a pixel. + * + * In the case of 2x MSAA, the layout of sample indices is reversed from + * the layout of sample numbers: + * + * sample index layout : --------- sample number layout : --------- + * | 0 | 1 | | 1 | 0 | + * --------- --------- + * * In case of 4x MSAA, layout of sample indices matches the layout of * sample numbers: * --------- @@ -819,7 +827,9 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos, key->x_scale * key->y_scale)); sample = nir_f2i32(b, sample); - if (tex_samples == 8) { + if (tex_samples == 2) { + sample = nir_isub(b, nir_imm_int(b, 1), sample); + } else if (tex_samples == 8) { sample = nir_iand(b, nir_ishr(b, nir_imm_int(b, 0x64210573), nir_ishl(b, sample, nir_imm_int(b, 2))), nir_imm_int(b, 0xf)); @@ -984,14 +994,14 @@ convert_color(struct nir_builder *b, nir_ssa_def *color, nir_ssa_def *value; if (key->dst_format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) { - /* The destination image is bound as R32_UNORM but the data needs to be + /* The destination image is bound as R32_UINT but the data needs to be * in R24_UNORM_X8_TYPELESS. The bottom 24 are the actual data and the * top 8 need to be zero. We can accomplish this by simply multiplying * by a factor to scale things down. */ - float factor = (float)((1 << 24) - 1) / (float)UINT32_MAX; - value = nir_fmul(b, nir_fsat(b, nir_channel(b, color, 0)), - nir_imm_float(b, factor)); + unsigned factor = (1 << 24) - 1; + value = nir_fsat(b, nir_channel(b, color, 0)); + value = nir_f2i32(b, nir_fmul(b, value, nir_imm_float(b, factor))); } else if (key->dst_format == ISL_FORMAT_L8_UNORM_SRGB) { value = nir_format_linear_to_srgb(b, nir_channel(b, color, 0)); } else if (key->dst_format == ISL_FORMAT_R8G8B8_UNORM_SRGB) { @@ -1976,7 +1986,7 @@ try_blorp_blit(struct blorp_batch *batch, isl_format_rgbx_to_rgba(params->dst.view.format); } else if (params->dst.view.format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) { wm_prog_key->dst_format = params->dst.view.format; - params->dst.view.format = ISL_FORMAT_R32_UNORM; + params->dst.view.format = ISL_FORMAT_R32_UINT; } else if (params->dst.view.format == ISL_FORMAT_A4B4G4R4_UNORM) { params->dst.view.swizzle = isl_swizzle_compose(params->dst.view.swizzle, @@ -2240,6 +2250,17 @@ blorp_blit(struct blorp_batch *batch, } } + /* ISL_FORMAT_R24_UNORM_X8_TYPELESS it isn't supported as a render target, + * which requires shader math to render to it. Blitting Z24X8 to Z24X8 + * is fairly common though, so we'd like to avoid it. Since we don't need + * to blend depth values, we can simply pick a renderable format with the + * right number of bits-per-pixel, like 8-bit BGRA. + */ + if (dst_surf->surf->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS && + src_surf->surf->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) { + src_format = dst_format = ISL_FORMAT_B8G8R8A8_UNORM; + } + brw_blorp_surface_info_init(batch->blorp, ¶ms.src, src_surf, src_level, src_layer, src_format, false); brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, dst_surf, dst_level, diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 50341ab0ecf..6da2485ad07 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -1628,6 +1628,29 @@ blorp_emit_gen8_hiz_op(struct blorp_batch *batch, */ blorp_emit_3dstate_multisample(batch, params); + /* According to the SKL PRM formula for WM_INT::ThreadDispatchEnable, the + * 3DSTATE_WM::ForceThreadDispatchEnable field can force WM thread dispatch + * even when WM_HZ_OP is active. However, WM thread dispatch is normally + * disabled for HiZ ops and it appears that force-enabling it can lead to + * GPU hangs on at least Skylake. Since we don't know the current state of + * the 3DSTATE_WM packet, just emit a dummy one prior to 3DSTATE_WM_HZ_OP. + */ + blorp_emit(batch, GENX(3DSTATE_WM), wm); + + /* From the BDW PRM Volume 7, Depth Buffer Clear: + * + * The clear value must be between the min and max depth values + * (inclusive) defined in the CC_VIEWPORT. If the depth buffer format is + * D32_FLOAT, then +/-DENORM values are also allowed. + * + * Set the bounds to match our hardware limits, [0.0, 1.0]. + */ + if (params->depth.enabled && params->hiz_op == ISL_AUX_OP_FAST_CLEAR) { + assert(params->depth.clear_color.f32[0] >= 0.0f); + assert(params->depth.clear_color.f32[0] <= 1.0f); + blorp_emit_cc_viewport(batch); + } + /* If we can't alter the depth stencil config and multiple layers are * involved, the HiZ op will fail. This is because the op requires that a * new config is emitted for each additional layer. diff --git a/src/intel/common/gen_batch_decoder.c b/src/intel/common/gen_batch_decoder.c index f5be0018afc..f2510e21b4f 100644 --- a/src/intel/common/gen_batch_decoder.c +++ b/src/intel/common/gen_batch_decoder.c @@ -642,7 +642,6 @@ decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx, int count) { struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p); - struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type); uint32_t state_offset = 0; @@ -664,12 +663,28 @@ decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx, return; } + struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type); + if (strcmp(struct_type, "BLEND_STATE") == 0) { + /* Blend states are different from the others because they have a header + * struct called BLEND_STATE which is followed by a variable number of + * BLEND_STATE_ENTRY structs. + */ + fprintf(ctx->fp, "%s\n", struct_type); + ctx_print_group(ctx, state, state_addr, state_map); + + state_addr += state->dw_length * 4; + state_map += state->dw_length * 4; + + struct_type = "BLEND_STATE_ENTRY"; + state = gen_spec_find_struct(ctx->spec, struct_type); + } + for (int i = 0; i < count; i++) { fprintf(ctx->fp, "%s %d\n", struct_type, i); - ctx_print_group(ctx, state, state_offset, state_map); + ctx_print_group(ctx, state, state_addr, state_map); state_addr += state->dw_length * 4; - state_map += state->dw_length; + state_map += state->dw_length * 4; } } diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c index a978f2f5818..8990d208207 100644 --- a/src/intel/common/gen_debug.c +++ b/src/intel/common/gen_debug.c @@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = { { "nohiz", DEBUG_NO_HIZ }, { "color", DEBUG_COLOR }, { "reemit", DEBUG_REEMIT }, + { "heur32", DEBUG_HEUR32 }, { NULL, 0 } }; diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h index 72d7ca20a39..c2ca2e2ebd6 100644 --- a/src/intel/common/gen_debug.h +++ b/src/intel/common/gen_debug.h @@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_NO_HIZ (1ull << 39) #define DEBUG_COLOR (1ull << 40) #define DEBUG_REEMIT (1ull << 41) +#define DEBUG_HEUR32 (1ull << 42) /* These flags are not compatible with the disk shader cache */ #define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME @@ -90,7 +91,7 @@ extern uint64_t INTEL_DEBUG; /* These flags may affect program generation */ #define DEBUG_DISK_CACHE_MASK \ (DEBUG_NO16 | DEBUG_NO_DUAL_OBJECT_GS | DEBUG_NO8 | DEBUG_SPILL_FS | \ - DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32) + DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_HEUR32) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c index ec0a486b101..04de7b7bb81 100644 --- a/src/intel/common/gen_decoder.c +++ b/src/intel/common/gen_decoder.c @@ -811,6 +811,18 @@ iter_more_groups(const struct gen_field_iterator *iter) } } +static void +iter_start_field(struct gen_field_iterator *iter, struct gen_field *field) +{ + iter->field = field; + + int group_member_offset = iter_group_offset_bits(iter, iter->group_iter); + + iter->start_bit = group_member_offset + iter->field->start; + iter->end_bit = group_member_offset + iter->field->end; + iter->struct_desc = NULL; +} + static void iter_advance_group(struct gen_field_iterator *iter) { @@ -825,32 +837,20 @@ iter_advance_group(struct gen_field_iterator *iter) } } - iter->field = iter->group->fields; + iter_start_field(iter, iter->group->fields); } static bool iter_advance_field(struct gen_field_iterator *iter) { if (iter_more_fields(iter)) { - iter->field = iter->field->next; + iter_start_field(iter, iter->field->next); } else { if (!iter_more_groups(iter)) return false; iter_advance_group(iter); } - - if (iter->field->name) - snprintf(iter->name, sizeof(iter->name), "%s", iter->field->name); - else - memset(iter->name, 0, sizeof(iter->name)); - - int group_member_offset = iter_group_offset_bits(iter, iter->group_iter); - - iter->start_bit = group_member_offset + iter->field->start; - iter->end_bit = group_member_offset + iter->field->end; - iter->struct_desc = NULL; - return true; } @@ -991,7 +991,7 @@ gen_field_iterator_init(struct gen_field_iterator *iter, iter->p_bit = p_bit; int length = gen_group_get_length(iter->group, iter->p); - iter->p_end = length > 0 ? &p[length] : NULL; + iter->p_end = length >= 0 ? &p[length] : NULL; iter->print_colors = print_colors; } @@ -1001,15 +1001,19 @@ gen_field_iterator_next(struct gen_field_iterator *iter) /* Initial condition */ if (!iter->field) { if (iter->group->fields) - iter->field = iter->group->fields; + iter_start_field(iter, iter->group->fields); else - iter->field = iter->group->next->fields; + iter_start_field(iter, iter->group->next->fields); bool result = iter_decode_field(iter); - if (iter->p_end) - assert(result); + if (!result && iter->p_end) { + /* We're dealing with a non empty struct of length=0 (BLEND_STATE on + * Gen 7.5) + */ + assert(iter->group->dw_length == 0); + } - return true; + return result; } if (!iter_advance_field(iter)) diff --git a/src/intel/common/gen_sample_positions.h b/src/intel/common/gen_sample_positions.h index f0ce95dd1fb..da48dcb5ed0 100644 --- a/src/intel/common/gen_sample_positions.h +++ b/src/intel/common/gen_sample_positions.h @@ -42,10 +42,10 @@ prefix##0YOffset = 0.5; * c 1 */ #define GEN_SAMPLE_POS_2X(prefix) \ -prefix##0XOffset = 0.25; \ -prefix##0YOffset = 0.25; \ -prefix##1XOffset = 0.75; \ -prefix##1YOffset = 0.75; +prefix##0XOffset = 0.75; \ +prefix##0YOffset = 0.75; \ +prefix##1XOffset = 0.25; \ +prefix##1YOffset = 0.25; /** * Sample positions: diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index c510d34ce2e..824aa637f9a 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -38,6 +38,15 @@ struct ra_regs; struct nir_shader; struct brw_program; +struct brw_simd32_heuristics_control { + bool grouped_sends_check; + int max_grouped_sends; + bool inst_count_check; + float inst_count_ratio; + bool mrt_check; + int max_mrts; +}; + struct brw_compiler { const struct gen_device_info *devinfo; @@ -118,6 +127,8 @@ struct brw_compiler { * whether nir_opt_large_constants will be run. */ bool supports_shader_constants; + + struct brw_simd32_heuristics_control simd32_heuristics_control; }; /** diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 20b89035e1f..329fb00ec8f 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -393,6 +393,25 @@ fs_inst::can_do_source_mods(const struct gen_device_info *devinfo) return true; } +bool +fs_inst::can_do_cmod() +{ + if (!backend_instruction::can_do_cmod()) + return false; + + /* The accumulator result appears to get used for the conditional modifier + * generation. When negating a UD value, there is a 33rd bit generated for + * the sign in the accumulator value, so now you can't check, for example, + * equality with a 32-bit value. See piglit fs-op-neg-uvec4. + */ + for (unsigned i = 0; i < sources; i++) { + if (type_is_unsigned_int(src[i].type) && src[i].negate) + return false; + } + + return true; +} + bool fs_inst::can_change_types() const { @@ -5115,6 +5134,25 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo, } } + if (devinfo->gen < 6) { + /* From the G45 PRM, Volume 4 Page 361: + * + * "Operand Alignment Rule: With the exceptions listed below, a + * source/destination operand in general should be aligned to even + * 256-bit physical register with a region size equal to two 256-bit + * physical registers." + * + * Normally we enforce this by allocating virtual registers to the + * even-aligned class. But we need to handle payload registers. + */ + for (unsigned i = 0; i < inst->sources; i++) { + if (inst->src[i].file == FIXED_GRF && (inst->src[i].nr & 1) && + inst->size_read(i) > REG_SIZE) { + max_width = MIN2(max_width, 8); + } + } + } + /* From the IVB PRMs: * "When an instruction is SIMD32, the low 16 bits of the execution mask * are applied for both halves of the SIMD32 instruction. If different @@ -6321,6 +6359,7 @@ fs_visitor::optimize() if (OPT(lower_load_payload)) { split_virtual_grfs(); OPT(register_coalesce); + OPT(lower_simd_width); OPT(compute_to_mrf); OPT(dead_code_eliminate); } @@ -7059,6 +7098,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, char **error_str) { const struct gen_device_info *devinfo = compiler->devinfo; + bool simd16_failed = false; + bool simd16_spilled = false; nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true); @@ -7126,10 +7167,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, shader_time_index16); v16.import_uniforms(&v8); if (!v16.run_fs(allow_spilling, use_rep_send)) { + simd16_failed = true; compiler->shader_perf_log(log_data, "SIMD16 shader failed to compile: %s", v16.fail_msg); } else { + simd16_spilled = v16.spilled_any_registers; simd16_cfg = v16.cfg; prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs; prog_data->reg_blocks_16 = brw_register_blocks(v16.grf_used); @@ -7137,9 +7180,17 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, } /* Currently, the compiler only supports SIMD32 on SNB+ */ + const brw_simd32_heuristics_control *ctrl = &compiler->simd32_heuristics_control; + uint64_t mrts = shader->info.outputs_written << FRAG_RESULT_DATA0; + if (v8.max_dispatch_width >= 32 && !use_rep_send && compiler->devinfo->gen >= 6 && - unlikely(INTEL_DEBUG & DEBUG_DO32)) { + (unlikely(INTEL_DEBUG & DEBUG_DO32) || + (unlikely(INTEL_DEBUG & DEBUG_HEUR32) && + !simd16_failed && !simd16_spilled && + (!ctrl->mrt_check || + (ctrl->mrt_check && + u_count_bits64(&mrts) <= ctrl->max_mrts))))) { /* Try a SIMD32 compile */ fs_visitor v32(compiler, log_data, mem_ctx, key, &prog_data->base, prog, shader, 32, @@ -7150,9 +7201,12 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, "SIMD32 shader failed to compile: %s", v32.fail_msg); } else { - simd32_cfg = v32.cfg; - prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs; - prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used); + if (likely(!(INTEL_DEBUG & DEBUG_HEUR32)) || + v32.run_heuristic(ctrl)) { + simd32_cfg = v32.cfg; + prog_data->dispatch_grf_start_reg_32 = v32.payload.num_regs; + prog_data->reg_blocks_32 = brw_register_blocks(v32.grf_used); + } } } @@ -7231,13 +7285,49 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, } if (simd32_cfg) { - prog_data->dispatch_32 = true; - prog_data->prog_offset_32 = g.generate_code(simd32_cfg, 32); + uint32_t offset = g.generate_code(simd32_cfg, 32); + + if (unlikely(INTEL_DEBUG & DEBUG_DO32) || + (unlikely(INTEL_DEBUG & DEBUG_HEUR32) && + (!simd16_cfg || + (simd16_cfg && + (!ctrl->inst_count_check || + (ctrl->inst_count_check && + (float)g.get_inst_count(32) / (float)g.get_inst_count(16) <= ctrl->inst_count_ratio)))))) { + prog_data->dispatch_32 = true; + prog_data->prog_offset_32 = offset; + } } return g.get_assembly(); } +bool +fs_visitor::run_heuristic(const struct brw_simd32_heuristics_control *ctrl) { + int grouped_sends = 0; + int max_grouped_sends = 0; + bool pass = true; + + foreach_block_and_inst(block, fs_inst, inst, cfg) { + if (inst->opcode >= SHADER_OPCODE_TEX && inst->opcode <= SHADER_OPCODE_SAMPLEINFO_LOGICAL) { + ++grouped_sends; + } else if (grouped_sends > 0) { + if (grouped_sends > max_grouped_sends) { + max_grouped_sends = grouped_sends; + } + grouped_sends = 0; + } + } + + if (ctrl->grouped_sends_check) { + if (max_grouped_sends > ctrl->max_grouped_sends) { + pass = false; + } + } + + return pass; +} + fs_reg * fs_visitor::emit_cs_work_group_id_setup() { diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index d56e33715ee..615aff25ba9 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -281,6 +281,8 @@ class fs_visitor : public backend_shader void dump_instruction(backend_instruction *inst); void dump_instruction(backend_instruction *inst, FILE *file); + bool run_heuristic(const struct brw_simd32_heuristics_control *ctrl); + const void *const key; const struct brw_sampler_prog_key_data *key_tex; @@ -392,6 +394,7 @@ class fs_generator void enable_debug(const char *shader_name); int generate_code(const cfg_t *cfg, int dispatch_width); + int get_inst_count(int dispatch_width); const unsigned *get_assembly(); private: @@ -484,6 +487,7 @@ class fs_generator struct brw_stage_prog_data * const prog_data; unsigned dispatch_width; /**< 8, 16 or 32 */ + int inst_count[3]; /* for 8, 16 and 32 */ exec_list discard_halt_patches; unsigned promoted_constants; diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 7bee2aa0b9b..0cafaf50e56 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -795,7 +795,7 @@ namespace brw { !gen_device_info_is_9lp(shader->devinfo)) return false; - if (type_sz(type > 4)) + if (type_sz(type) > 4) return true; if (opcode == BRW_OPCODE_MUL && diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp index 5b74f267359..5fb522f810f 100644 --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp @@ -211,9 +211,17 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) /* A CMP with a second source of zero can match with anything. A CMP * with a second source that is not zero can only match with an ADD * instruction. + * + * Only apply this optimization to float-point sources. It can fail for + * integers. For inputs a = 0x80000000, b = 4, int(0x80000000) < 4, but + * int(0x80000000) - 4 overflows and results in 0x7ffffffc. that's not + * less than zero, so the flags get set differently than for (a < b). */ if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) { - progress = cmod_propagate_cmp_to_add(devinfo, block, inst) || progress; + if (brw_reg_type_is_floating_point(inst->src[0].type) && + cmod_propagate_cmp_to_add(devinfo, block, inst)) + progress = true; + continue; } diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index e265d59ccbe..ed97935de91 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2464,6 +2464,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) fill_count, promoted_constants, before_size, after_size); + inst_count[ffs(dispatch_width) - 4] = before_size / 16; + return start_offset; } @@ -2472,3 +2474,13 @@ fs_generator::get_assembly() { return brw_get_program(p, &prog_data->program_size); } + +int +fs_generator::get_inst_count(int dispatch_width) +{ + if (dispatch_width == 8 || dispatch_width == 16 || dispatch_width == 32) { + return inst_count[ffs(dispatch_width) - 4]; + } else { + return 0; + } +} \ No newline at end of file diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 92dad269a34..07e7224e0f8 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -354,6 +354,7 @@ class fs_inst : public backend_instruction { unsigned components_read(unsigned i) const; unsigned size_read(int arg) const; bool can_do_source_mods(const struct gen_device_info *devinfo); + bool can_do_cmod(); bool can_change_types() const; bool has_source_and_destination_hazard() const; diff --git a/src/intel/compiler/brw_ir_vec4.h b/src/intel/compiler/brw_ir_vec4.h index e401d8b4d16..65b1e4f3b53 100644 --- a/src/intel/compiler/brw_ir_vec4.h +++ b/src/intel/compiler/brw_ir_vec4.h @@ -291,6 +291,7 @@ class vec4_instruction : public backend_instruction { int swizzle, int swizzle_mask); void reswizzle(int dst_writemask, int swizzle); bool can_do_source_mods(const struct gen_device_info *devinfo); + bool can_do_cmod(); bool can_do_writemask(const struct gen_device_info *devinfo); bool can_change_types() const; bool has_source_and_destination_hazard() const; diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 31ffbe613ec..29ad68fdb2a 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -713,18 +713,6 @@ brw_nir_link_shaders(const struct brw_compiler *compiler, nir_validate_shader(*producer); nir_validate_shader(*consumer); - const bool p_is_scalar = - compiler->scalar_stage[(*producer)->info.stage]; - const bool c_is_scalar = - compiler->scalar_stage[(*consumer)->info.stage]; - - if (p_is_scalar && c_is_scalar) { - NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out); - NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in); - *producer = brw_nir_optimize(*producer, compiler, p_is_scalar); - *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar); - } - NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out); NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in); @@ -741,7 +729,12 @@ brw_nir_link_shaders(const struct brw_compiler *compiler, NIR_PASS_V(*consumer, nir_lower_indirect_derefs, brw_nir_no_indirect_mask(compiler, (*consumer)->info.stage)); + const bool p_is_scalar = + compiler->scalar_stage[(*producer)->info.stage]; *producer = brw_nir_optimize(*producer, compiler, p_is_scalar); + + const bool c_is_scalar = + compiler->scalar_stage[(*consumer)->info.stage]; *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar); } } diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h index ac12ab3d2dd..46d66198a1d 100644 --- a/src/intel/compiler/brw_reg.h +++ b/src/intel/compiler/brw_reg.h @@ -376,6 +376,15 @@ brw_int_type(unsigned sz, bool is_signed) } } +static inline bool +type_is_unsigned_int(enum brw_reg_type tp) +{ + return tp == BRW_REGISTER_TYPE_UB || + tp == BRW_REGISTER_TYPE_UW || + tp == BRW_REGISTER_TYPE_UD || + tp == BRW_REGISTER_TYPE_UQ; +} + /** * Construct a brw_reg. * \param file one of the BRW_x_REGISTER_FILE values diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 4e242e03032..e2fa58502f0 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -256,6 +256,26 @@ vec4_instruction::can_do_source_mods(const struct gen_device_info *devinfo) return true; } +bool +vec4_instruction::can_do_cmod() +{ + if (!backend_instruction::can_do_cmod()) + return false; + + /* The accumulator result appears to get used for the conditional modifier + * generation. When negating a UD value, there is a 33rd bit generated for + * the sign in the accumulator value, so now you can't check, for example, + * equality with a 32-bit value. See piglit fs-op-neg-uvec4. + */ + for (unsigned i = 0; i < 3; i++) { + if (src[i].file != BAD_FILE && + type_is_unsigned_int(src[i].type) && src[i].negate) + return false; + } + + return true; +} + bool vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo) { diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index d506b675776..888cb358fd1 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -929,8 +929,21 @@ generate_tes_add_indirect_urb_offset(struct brw_codegen *p, brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, dst, header); + + /* Uniforms will have a stride <0;4,1>, and we need to convert to <0;1,0>. + * Other values get <4;1,0>. + */ + struct brw_reg restrided_offset; + if (offset.vstride == BRW_VERTICAL_STRIDE_0 && + offset.width == BRW_WIDTH_4 && + offset.hstride == BRW_HORIZONTAL_STRIDE_1) { + restrided_offset = stride(offset, 0, 1, 0); + } else { + restrided_offset = stride(offset, 4, 1, 0); + } + /* m0.3-0.4: 128-bit-granular offsets into the URB from the handles */ - brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0)); + brw_MOV(p, vec2(get_element_ud(dst, 3)), restrided_offset); brw_pop_insn_state(p); } diff --git a/src/intel/compiler/brw_vec4_tes.cpp b/src/intel/compiler/brw_vec4_tes.cpp index 35aff0f4b78..cf1bff42aa9 100644 --- a/src/intel/compiler/brw_vec4_tes.cpp +++ b/src/intel/compiler/brw_vec4_tes.cpp @@ -185,9 +185,19 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) first_component /= 2; if (indirect_offset.file != BAD_FILE) { + src_reg clamped_indirect_offset = src_reg(this, glsl_type::uvec4_type); + + /* Page 190 of "Volume 7: 3D Media GPGPU Engine (Haswell)" says the + * valid range of the offset is [0, 0FFFFFFFh]. + */ + emit_minmax(BRW_CONDITIONAL_L, + dst_reg(clamped_indirect_offset), + retype(indirect_offset, BRW_REGISTER_TYPE_UD), + brw_imm_ud(0x0fffffffu)); + header = src_reg(this, glsl_type::uvec4_type); emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), - input_read_header, indirect_offset); + input_read_header, clamped_indirect_offset); } else { /* Arbitrarily only push up to 24 vec4 slots worth of data, * which is 12 registers (since each holds 2 vec4 slots). diff --git a/src/intel/genxml/gen10.xml b/src/intel/genxml/gen10.xml index 541e4405716..be18708df00 100644 --- a/src/intel/genxml/gen10.xml +++ b/src/intel/genxml/gen10.xml @@ -3553,6 +3553,14 @@ + + + + + + + + diff --git a/src/intel/genxml/gen11.xml b/src/intel/genxml/gen11.xml index bd3800e4b79..8ed46860cd0 100644 --- a/src/intel/genxml/gen11.xml +++ b/src/intel/genxml/gen11.xml @@ -3551,6 +3551,14 @@ + + + + + + + + diff --git a/src/intel/genxml/gen75.xml b/src/intel/genxml/gen75.xml index 5b01fd45400..dfc3d891498 100644 --- a/src/intel/genxml/gen75.xml +++ b/src/intel/genxml/gen75.xml @@ -2314,9 +2314,9 @@ - - - + + + diff --git a/src/intel/genxml/gen8.xml b/src/intel/genxml/gen8.xml index 4ed41d15612..330366b7ed0 100644 --- a/src/intel/genxml/gen8.xml +++ b/src/intel/genxml/gen8.xml @@ -2553,9 +2553,9 @@ - - - + + + diff --git a/src/intel/genxml/gen9.xml b/src/intel/genxml/gen9.xml index 318ae89d5e7..0c7c606c6b6 100644 --- a/src/intel/genxml/gen9.xml +++ b/src/intel/genxml/gen9.xml @@ -3491,6 +3491,14 @@ + + + + + + + + diff --git a/src/intel/isl/isl_gen7.c b/src/intel/isl/isl_gen7.c index 4fa9851233f..a9db21fba52 100644 --- a/src/intel/isl/isl_gen7.c +++ b/src/intel/isl/isl_gen7.c @@ -294,6 +294,29 @@ isl_gen6_filter_tiling(const struct isl_device *dev, */ if (ISL_DEV_GEN(dev) < 7 && isl_format_get_layout(info->format)->bpb >= 128) *flags &= ~ISL_TILING_Y0_BIT; + + /* From the BDW and SKL PRMs, Volume 2d, + * RENDER_SURFACE_STATE::Width - Programming Notes: + * + * A known issue exists if a primitive is rendered to the first 2 rows and + * last 2 columns of a 16K width surface. If any geometry is drawn inside + * this square it will be copied to column X=2 and X=3 (arrangement on Y + * position will stay the same). If any geometry exceeds the boundaries of + * this 2x2 region it will be drawn normally. The issue also only occurs + * if the surface has TileMode != Linear. + * + * [Internal documentation notes that this issue isn't present on SKL GT4.] + * To prevent this rendering corruption, only allow linear tiling for + * surfaces with widths greater than 16K-2 pixels. + * + * TODO: Is this an issue for multisampled surfaces as well? + */ + if (info->width > 16382 && info->samples == 1 && + info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT && + (ISL_DEV_GEN(dev) == 8 || + (dev->info->is_skylake && dev->info->gt != 4))) { + *flags &= ISL_TILING_LINEAR_BIT; + } } void diff --git a/src/intel/meson.build b/src/intel/meson.build index ccaf16a76f9..b53728c026c 100644 --- a/src/intel/meson.build +++ b/src/intel/meson.build @@ -18,6 +18,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +c_sse2_args = ['-msse2', '-mstackrealign'] inc_intel = include_directories('.') subdir('blorp') diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c index 8989d558b66..3fec04c24c4 100644 --- a/src/intel/tools/aubinator.c +++ b/src/intel/tools/aubinator.c @@ -590,7 +590,7 @@ handle_memtrace_reg_write(uint32_t *p) uint32_t pphwsp_addr = context_descriptor & 0xfffff000; struct gen_batch_decode_bo pphwsp_bo = get_ggtt_batch_bo(NULL, pphwsp_addr); uint32_t *context = (uint32_t *)((uint8_t *)pphwsp_bo.map + - (pphwsp_bo.addr - pphwsp_addr) + + (pphwsp_addr - pphwsp_bo.addr) + pphwsp_size); uint32_t ring_buffer_head = context[5]; @@ -601,7 +601,7 @@ handle_memtrace_reg_write(uint32_t *p) struct gen_batch_decode_bo ring_bo = get_ggtt_batch_bo(NULL, ring_buffer_start); assert(ring_bo.size > 0); - void *commands = (uint8_t *)ring_bo.map + (ring_bo.addr - ring_buffer_start); + void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr); if (context_descriptor & 0x100 /* ppgtt */) { batch_ctx.get_bo = get_ppgtt_batch_bo; diff --git a/src/intel/tools/error2aub.c b/src/intel/tools/error2aub.c index 3407dcec0b7..68a5b96e109 100644 --- a/src/intel/tools/error2aub.c +++ b/src/intel/tools/error2aub.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "aub_write.h" @@ -205,7 +206,7 @@ main(int argc, char *argv[]) BO_TYPE_UNKNOWN = 0, BO_TYPE_BATCH, BO_TYPE_USER, - } bo_type; + } bo_type = BO_TYPE_UNKNOWN; uint64_t bo_addr; char *line = NULL; diff --git a/src/intel/tools/meson.build b/src/intel/tools/meson.build index 37bc04d8301..bef0af02f90 100644 --- a/src/intel/tools/meson.build +++ b/src/intel/tools/meson.build @@ -70,7 +70,7 @@ if with_tools.contains('intel') dependencies : [dep_dl, dep_thread], include_directories : [inc_common, inc_intel, inc_drm_uapi], link_with : [libintel_common, libmesa_util], - c_args : [c_vis_args, no_override_init_args], + c_args : [c_vis_args, no_override_init_args, c_sse2_args], install_dir: get_option('libexecdir'), install: true, ) diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index ab01d46cbeb..67f2f73aa11 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -1253,7 +1253,8 @@ anv_bo_cache_lookup(struct anv_bo_cache *cache, uint32_t gem_handle) (EXEC_OBJECT_WRITE | \ EXEC_OBJECT_ASYNC | \ EXEC_OBJECT_SUPPORTS_48B_ADDRESS | \ - EXEC_OBJECT_PINNED) + EXEC_OBJECT_PINNED | \ + ANV_BO_EXTERNAL) VkResult anv_bo_cache_alloc(struct anv_device *device, @@ -1311,6 +1312,7 @@ anv_bo_cache_import(struct anv_device *device, struct anv_bo **bo_out) { assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS)); + assert(bo_flags & ANV_BO_EXTERNAL); pthread_mutex_lock(&cache->mutex); @@ -1327,7 +1329,7 @@ anv_bo_cache_import(struct anv_device *device, * client has imported a BO twice in different ways and they get what * they have coming. */ - uint64_t new_flags = 0; + uint64_t new_flags = ANV_BO_EXTERNAL; new_flags |= (bo->bo.flags | bo_flags) & EXEC_OBJECT_WRITE; new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_ASYNC; new_flags |= (bo->bo.flags & bo_flags) & EXEC_OBJECT_SUPPORTS_48B_ADDRESS; @@ -1356,6 +1358,7 @@ anv_bo_cache_import(struct anv_device *device, if ((new_flags & EXEC_OBJECT_PINNED) && (bo->bo.flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) != (bo_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) { + pthread_mutex_unlock(&cache->mutex); return vk_errorf(device->instance, NULL, VK_ERROR_INVALID_EXTERNAL_HANDLE, "The same BO was imported on two different heaps"); @@ -1411,6 +1414,12 @@ anv_bo_cache_export(struct anv_device *device, assert(anv_bo_cache_lookup(cache, bo_in->gem_handle) == bo_in); struct anv_cached_bo *bo = (struct anv_cached_bo *)bo_in; + /* This BO must have been flagged external in order for us to be able + * to export it. This is done based on external options passed into + * anv_AllocateMemory. + */ + assert(bo->bo.flags & ANV_BO_EXTERNAL); + int fd = anv_gem_handle_to_fd(device, bo->bo.gem_handle); if (fd < 0) return vk_error(VK_ERROR_TOO_MANY_OBJECTS); diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c index a3bab8087b4..dd8df331349 100644 --- a/src/intel/vulkan/anv_android.c +++ b/src/intel/vulkan/anv_android.c @@ -128,7 +128,7 @@ anv_image_from_gralloc(VkDevice device_h, */ int dma_buf = gralloc_info->handle->data[0]; - uint64_t bo_flags = 0; + uint64_t bo_flags = ANV_BO_EXTERNAL; if (device->instance->physicalDevice.supports_48bit_addresses) bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; if (device->instance->physicalDevice.use_softpin) @@ -234,7 +234,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID( *grallocUsage = 0; intel_logd("%s: format=%d, usage=0x%x", __func__, format, imageUsage); - /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags + /* WARNING: Android's libvulkan.so hardcodes the VkImageUsageFlags * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags. * The relevant code in libvulkan/swapchain.cpp contains this fun comment: * @@ -247,7 +247,7 @@ VkResult anv_GetSwapchainGrallocUsageANDROID( * dEQP-VK.wsi.android.swapchain.*.image_usage to fail. */ - const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = { + VkPhysicalDeviceImageFormatInfo2KHR image_format_info = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, .format = format, .type = VK_IMAGE_TYPE_2D, @@ -255,6 +255,17 @@ VkResult anv_GetSwapchainGrallocUsageANDROID( .usage = imageUsage, }; + /* Android P and earlier doesn't check if the physical device supports a + * given format and usage combination before calling this function. Omit the + * storage requirement to make the tests pass. + */ +#if ANDROID_API_LEVEL <= 28 + if (format == VK_FORMAT_R8G8B8A8_SRGB || + format == VK_FORMAT_R5G6B5_UNORM_PACK16) { + image_format_info.usage &= ~VK_IMAGE_USAGE_STORAGE_BIT; + } +#endif + VkImageFormatProperties2KHR image_format_props = { .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR, }; @@ -268,19 +279,13 @@ VkResult anv_GetSwapchainGrallocUsageANDROID( "inside %s", __func__); } - /* Reject STORAGE here to avoid complexity elsewhere. */ - if (imageUsage & VK_IMAGE_USAGE_STORAGE_BIT) { - return vk_errorf(device->instance, device, VK_ERROR_FORMAT_NOT_SUPPORTED, - "VK_IMAGE_USAGE_STORAGE_BIT unsupported for gralloc " - "swapchain"); - } - if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) *grallocUsage |= GRALLOC_USAGE_HW_RENDER; if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE; diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index c47a81c8a4d..e08e07ad7bd 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -531,7 +531,7 @@ emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, bbs) { bbs.DWordLength = cmd_buffer->device->info.gen < 8 ? gen7_length : gen8_length; - bbs._2ndLevelBatchBuffer = _1stlevelbatch; + bbs.SecondLevelBatchBuffer = Firstlevelbatch; bbs.AddressSpaceIndicator = ASI_PPGTT; bbs.BatchBufferStartAddress = (struct anv_address) { bo, offset }; } @@ -894,8 +894,17 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) * It doesn't matter where it points now so long as has a valid * relocation. We'll adjust it later as part of the chaining * process. + * + * We set the end of the batch a little short so we would be sure we + * have room for the chaining command. Since we're about to emit the + * chaining command, let's set it back where it should go. */ + cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; + assert(cmd_buffer->batch.start == batch_bo->bo.map); + assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); + emit_batch_buffer_start(cmd_buffer, &batch_bo->bo, 0); + assert(cmd_buffer->batch.start == batch_bo->bo.map); } else { cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; } @@ -1088,7 +1097,7 @@ anv_execbuf_add_bo(struct anv_execbuf *exec, obj->relocs_ptr = 0; obj->alignment = 0; obj->offset = bo->offset; - obj->flags = bo->flags | extra_flags; + obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags; obj->rsvd1 = 0; obj->rsvd2 = 0; } diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index cd67cc636b2..1cc632bdb0d 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -155,7 +155,7 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device, .addr = { .buffer = buffer->address.bo, .offset = buffer->address.offset + offset, - .mocs = device->default_mocs, + .mocs = anv_mocs_for_bo(device, buffer->address.bo), }, }; @@ -208,7 +208,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device, .addr = { .buffer = image->planes[plane].address.bo, .offset = image->planes[plane].address.offset + surface->offset, - .mocs = device->default_mocs, + .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo), }, }; @@ -218,7 +218,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device, blorp_surf->aux_addr = (struct blorp_address) { .buffer = image->planes[plane].address.bo, .offset = image->planes[plane].address.offset + aux_surface->offset, - .mocs = device->default_mocs, + .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo), }; blorp_surf->aux_usage = aux_usage; @@ -532,81 +532,86 @@ void anv_CmdBlitImage( const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource; const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource; - get_blorp_surf_for_anv_image(cmd_buffer->device, - src_image, src_res->aspectMask, - srcImageLayout, ISL_AUX_USAGE_NONE, &src); - get_blorp_surf_for_anv_image(cmd_buffer->device, - dst_image, dst_res->aspectMask, - dstImageLayout, ISL_AUX_USAGE_NONE, &dst); - - struct anv_format_plane src_format = - anv_get_format_plane(&cmd_buffer->device->info, src_image->vk_format, - src_res->aspectMask, src_image->tiling); - struct anv_format_plane dst_format = - anv_get_format_plane(&cmd_buffer->device->info, dst_image->vk_format, - dst_res->aspectMask, dst_image->tiling); - - unsigned dst_start, dst_end; - if (dst_image->type == VK_IMAGE_TYPE_3D) { - assert(dst_res->baseArrayLayer == 0); - dst_start = pRegions[r].dstOffsets[0].z; - dst_end = pRegions[r].dstOffsets[1].z; - } else { - dst_start = dst_res->baseArrayLayer; - dst_end = dst_start + anv_get_layerCount(dst_image, dst_res); - } - - unsigned src_start, src_end; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(src_res->baseArrayLayer == 0); - src_start = pRegions[r].srcOffsets[0].z; - src_end = pRegions[r].srcOffsets[1].z; - } else { - src_start = src_res->baseArrayLayer; - src_end = src_start + anv_get_layerCount(src_image, src_res); - } - - bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end); - float src_z_step = (float)(src_end + 1 - src_start) / - (float)(dst_end + 1 - dst_start); + assert(anv_image_aspects_compatible(src_res->aspectMask, + dst_res->aspectMask)); + + uint32_t aspect_bit; + anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) { + get_blorp_surf_for_anv_image(cmd_buffer->device, + src_image, 1U << aspect_bit, + srcImageLayout, ISL_AUX_USAGE_NONE, &src); + get_blorp_surf_for_anv_image(cmd_buffer->device, + dst_image, 1U << aspect_bit, + dstImageLayout, ISL_AUX_USAGE_NONE, &dst); + + struct anv_format_plane src_format = + anv_get_format_plane(&cmd_buffer->device->info, src_image->vk_format, + 1U << aspect_bit, src_image->tiling); + struct anv_format_plane dst_format = + anv_get_format_plane(&cmd_buffer->device->info, dst_image->vk_format, + 1U << aspect_bit, dst_image->tiling); + + unsigned dst_start, dst_end; + if (dst_image->type == VK_IMAGE_TYPE_3D) { + assert(dst_res->baseArrayLayer == 0); + dst_start = pRegions[r].dstOffsets[0].z; + dst_end = pRegions[r].dstOffsets[1].z; + } else { + dst_start = dst_res->baseArrayLayer; + dst_end = dst_start + anv_get_layerCount(dst_image, dst_res); + } - if (flip_z) { - src_start = src_end; - src_z_step *= -1; - } + unsigned src_start, src_end; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(src_res->baseArrayLayer == 0); + src_start = pRegions[r].srcOffsets[0].z; + src_end = pRegions[r].srcOffsets[1].z; + } else { + src_start = src_res->baseArrayLayer; + src_end = src_start + anv_get_layerCount(src_image, src_res); + } - unsigned src_x0 = pRegions[r].srcOffsets[0].x; - unsigned src_x1 = pRegions[r].srcOffsets[1].x; - unsigned dst_x0 = pRegions[r].dstOffsets[0].x; - unsigned dst_x1 = pRegions[r].dstOffsets[1].x; - bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1); + bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end); + float src_z_step = (float)(src_end + 1 - src_start) / + (float)(dst_end + 1 - dst_start); - unsigned src_y0 = pRegions[r].srcOffsets[0].y; - unsigned src_y1 = pRegions[r].srcOffsets[1].y; - unsigned dst_y0 = pRegions[r].dstOffsets[0].y; - unsigned dst_y1 = pRegions[r].dstOffsets[1].y; - bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1); + if (flip_z) { + src_start = src_end; + src_z_step *= -1; + } - const unsigned num_layers = dst_end - dst_start; - anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, - dst_res->aspectMask, - dst.aux_usage, - dst_res->mipLevel, - dst_start, num_layers); - - for (unsigned i = 0; i < num_layers; i++) { - unsigned dst_z = dst_start + i; - unsigned src_z = src_start + i * src_z_step; - - blorp_blit(&batch, &src, src_res->mipLevel, src_z, - src_format.isl_format, src_format.swizzle, - &dst, dst_res->mipLevel, dst_z, - dst_format.isl_format, dst_format.swizzle, - src_x0, src_y0, src_x1, src_y1, - dst_x0, dst_y0, dst_x1, dst_y1, - blorp_filter, flip_x, flip_y); + unsigned src_x0 = pRegions[r].srcOffsets[0].x; + unsigned src_x1 = pRegions[r].srcOffsets[1].x; + unsigned dst_x0 = pRegions[r].dstOffsets[0].x; + unsigned dst_x1 = pRegions[r].dstOffsets[1].x; + bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1); + + unsigned src_y0 = pRegions[r].srcOffsets[0].y; + unsigned src_y1 = pRegions[r].srcOffsets[1].y; + unsigned dst_y0 = pRegions[r].dstOffsets[0].y; + unsigned dst_y1 = pRegions[r].dstOffsets[1].y; + bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1); + + const unsigned num_layers = dst_end - dst_start; + anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, + 1U << aspect_bit, + dst.aux_usage, + dst_res->mipLevel, + dst_start, num_layers); + + for (unsigned i = 0; i < num_layers; i++) { + unsigned dst_z = dst_start + i; + unsigned src_z = src_start + i * src_z_step; + + blorp_blit(&batch, &src, src_res->mipLevel, src_z, + src_format.isl_format, src_format.swizzle, + &dst, dst_res->mipLevel, dst_z, + dst_format.isl_format, dst_format.swizzle, + src_x0, src_y0, src_x1, src_y1, + dst_x0, dst_y0, dst_x1, dst_y1, + blorp_filter, flip_x, flip_y); + } } - } blorp_batch_finish(&batch); @@ -663,12 +668,12 @@ void anv_CmdCopyBuffer( struct blorp_address src = { .buffer = src_buffer->address.bo, .offset = src_buffer->address.offset + pRegions[r].srcOffset, - .mocs = cmd_buffer->device->default_mocs, + .mocs = anv_mocs_for_bo(cmd_buffer->device, src_buffer->address.bo), }; struct blorp_address dst = { .buffer = dst_buffer->address.bo, .offset = dst_buffer->address.offset + pRegions[r].dstOffset, - .mocs = cmd_buffer->device->default_mocs, + .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo), }; blorp_buffer_copy(&batch, src, dst, pRegions[r].size); @@ -721,7 +726,7 @@ void anv_CmdUpdateBuffer( struct blorp_address dst = { .buffer = dst_buffer->address.bo, .offset = dst_buffer->address.offset + dstOffset, - .mocs = cmd_buffer->device->default_mocs, + .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo), }; blorp_buffer_copy(&batch, src, dst, copy_size); @@ -1431,7 +1436,8 @@ anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer, .buffer = image->planes[0].address.bo, .offset = image->planes[0].address.offset + image->planes[0].shadow_surface.offset, - .mocs = cmd_buffer->device->default_mocs, + .mocs = anv_mocs_for_bo(cmd_buffer->device, + image->planes[0].address.bo), }, }; @@ -1599,6 +1605,24 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, ISL_AUX_USAGE_NONE, &stencil); } + /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear": + * + * "The following is required when performing a depth buffer clear with + * using the WM_STATE or 3DSTATE_WM: + * + * * If other rendering operations have preceded this clear, a + * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit + * enabled must be issued before the rectangle primitive used for + * the depth buffer clear operation. + * * [...]" + * + * Even though the PRM only says that this is required if using 3DSTATE_WM + * and a 3DPRIMITIVE, it appears to also sometimes hang when doing a clear + * with WM_HZ_OP. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT; + blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil, level, base_layer, layer_count, area.offset.x, area.offset.y, @@ -1613,18 +1637,22 @@ anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, /* From the SKL PRM, Depth Buffer Clear: * - * Depth Buffer Clear Workaround - * Depth buffer clear pass using any of the methods (WM_STATE, 3DSTATE_WM - * or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL command with - * DEPTH_STALL bit and Depth FLUSH bits “set” before starting to render. - * DepthStall and DepthFlush are not needed between consecutive depth clear - * passes nor is it required if the depth-clear pass was done with - * “full_surf_clear” bit set in the 3DSTATE_WM_HZ_OP. + * "Depth Buffer Clear Workaround + * + * Depth buffer clear pass using any of the methods (WM_STATE, + * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL + * command with DEPTH_STALL bit and Depth FLUSH bits “set” before + * starting to render. DepthStall and DepthFlush are not needed between + * consecutive depth clear passes nor is it required if the depth-clear + * pass was done with “full_surf_clear” bit set in the + * 3DSTATE_WM_HZ_OP." + * + * Even though the PRM provides a bunch of conditions under which this is + * supposedly unnecessary, we choose to perform the flush unconditionally + * just to be safe. */ - if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { - cmd_buffer->state.pending_pipe_bits |= - ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT; - } + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT; } void diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index c40b94d69f3..50d16b8204b 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -610,20 +610,33 @@ VkResult anv_CreateInstance( else instance->alloc = default_alloc; - if (pCreateInfo->pApplicationInfo && - pCreateInfo->pApplicationInfo->apiVersion != 0) { - instance->apiVersion = pCreateInfo->pApplicationInfo->apiVersion; - } else { - anv_EnumerateInstanceVersion(&instance->apiVersion); + instance->app_info = (struct anv_app_info) { .api_version = 0 }; + if (pCreateInfo->pApplicationInfo) { + const VkApplicationInfo *app = pCreateInfo->pApplicationInfo; + + instance->app_info.app_name = + vk_strdup(&instance->alloc, app->pApplicationName, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + instance->app_info.app_version = app->applicationVersion; + + instance->app_info.engine_name = + vk_strdup(&instance->alloc, app->pEngineName, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + instance->app_info.engine_version = app->engineVersion; + + instance->app_info.api_version = app->apiVersion; } + if (instance->app_info.api_version == 0) + anv_EnumerateInstanceVersion(&instance->app_info.api_version); + instance->enabled_extensions = enabled_extensions; for (unsigned i = 0; i < ARRAY_SIZE(instance->dispatch.entrypoints); i++) { /* Vulkan requires that entrypoints for extensions which have not been * enabled must not be advertised. */ - if (!anv_entrypoint_is_enabled(i, instance->apiVersion, + if (!anv_entrypoint_is_enabled(i, instance->app_info.api_version, &instance->enabled_extensions, NULL)) { instance->dispatch.entrypoints[i] = NULL; } else if (anv_dispatch_table.entrypoints[i] != NULL) { @@ -669,6 +682,9 @@ void anv_DestroyInstance( anv_physical_device_finish(&instance->physicalDevice); } + vk_free(&instance->alloc, instance->app_info.app_name); + vk_free(&instance->alloc, instance->app_info.engine_name); + VG(VALGRIND_DESTROY_MEMPOOL(instance)); vk_debug_report_instance_destroy(&instance->debug_report_callbacks); @@ -841,6 +857,15 @@ void anv_GetPhysicalDeviceFeatures( pFeatures->vertexPipelineStoresAndAtomics = pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] && pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY]; + + struct anv_app_info *app_info = &pdevice->instance->app_info; + + /* The new DOOM and Wolfenstein games require depthBounds without + * checking for it. They seem to run fine without it so just claim it's + * there and accept the consequences. + */ + if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0) + pFeatures->depthBounds = true; } void anv_GetPhysicalDeviceFeatures2( @@ -909,6 +934,14 @@ void anv_GetPhysicalDeviceFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: { + VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features = + (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext; + features->vertexAttributeInstanceRateDivisor = VK_TRUE; + features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE; + break; + } + default: anv_debug_ignored_stype(ext->sType); break; @@ -1150,6 +1183,13 @@ void anv_GetPhysicalDeviceProperties2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { + VkPhysicalDeviceProtectedMemoryProperties *props = + (VkPhysicalDeviceProtectedMemoryProperties *)ext; + props->protectedNoFault = false; + break; + } + default: anv_debug_ignored_stype(ext->sType); break; @@ -1489,7 +1529,7 @@ anv_device_init_dispatch(struct anv_device *device) /* Vulkan requires that entrypoints for extensions which have not been * enabled must not be advertised. */ - if (!anv_entrypoint_is_enabled(i, device->instance->apiVersion, + if (!anv_entrypoint_is_enabled(i, device->instance->app_info.api_version, &device->instance->enabled_extensions, &device->enabled_extensions)) { device->dispatch.entrypoints[i] = NULL; @@ -2176,8 +2216,8 @@ VkResult anv_AllocateMemory( fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); - result = anv_bo_cache_import(device, &device->bo_cache, - fd_info->fd, bo_flags, &mem->bo); + result = anv_bo_cache_import(device, &device->bo_cache, fd_info->fd, + bo_flags | ANV_BO_EXTERNAL, &mem->bo); if (result != VK_SUCCESS) goto fail; @@ -2214,6 +2254,11 @@ VkResult anv_AllocateMemory( */ close(fd_info->fd); } else { + const VkExportMemoryAllocateInfoKHR *fd_info = + vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR); + if (fd_info && fd_info->handleTypes) + bo_flags |= ANV_BO_EXTERNAL; + result = anv_bo_cache_alloc(device, &device->bo_cache, pAllocateInfo->allocationSize, bo_flags, &mem->bo); diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py index cffc3e700cb..c2b79b51637 100644 --- a/src/intel/vulkan/anv_extensions.py +++ b/src/intel/vulkan/anv_extensions.py @@ -72,9 +72,9 @@ def __init__(self, version, enable): EXTENSIONS = [ Extension('VK_ANDROID_native_buffer', 5, 'ANDROID'), Extension('VK_KHR_16bit_storage', 1, 'device->info.gen >= 8'), - Extension('VK_KHR_8bit_storage', 1, 'device->info.gen >= 8'), + Extension('VK_KHR_8bit_storage', 1, 'device->info.gen >= 8 && !ANDROID'), Extension('VK_KHR_bind_memory2', 1, True), - Extension('VK_KHR_create_renderpass2', 1, True), + Extension('VK_KHR_create_renderpass2', 1, '!ANDROID'), Extension('VK_KHR_dedicated_allocation', 1, True), Extension('VK_KHR_descriptor_update_template', 1, True), Extension('VK_KHR_device_group', 1, True), @@ -123,7 +123,7 @@ def __init__(self, version, enable): 'device->has_context_priority'), Extension('VK_EXT_shader_viewport_index_layer', 1, True), Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen >= 9'), - Extension('VK_EXT_vertex_attribute_divisor', 2, True), + Extension('VK_EXT_vertex_attribute_divisor', 3, True), Extension('VK_EXT_post_depth_coverage', 1, 'device->info.gen >= 9'), ] diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 36d4ac13c75..28d70967b05 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -1095,7 +1095,7 @@ anv_image_fill_surface_state(struct anv_device *device, .size = surface->isl.size, .format = ISL_FORMAT_RAW, .stride = 1, - .mocs = device->default_mocs); + .mocs = anv_mocs_for_bo(device, address.bo)); state_inout->address = address, state_inout->aux_address = ANV_NULL_ADDRESS; state_inout->clear_address = ANV_NULL_ADDRESS; @@ -1196,7 +1196,8 @@ anv_image_fill_surface_state(struct anv_device *device, .aux_address = anv_address_physical(aux_address), .clear_address = anv_address_physical(clear_address), .use_clear_address = !anv_address_is_null(clear_address), - .mocs = device->default_mocs, + .mocs = anv_mocs_for_bo(device, + state_inout->address.bo), .x_offset_sa = tile_x_sa, .y_offset_sa = tile_y_sa); diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c index 06db5787a9c..f6b8ded20a9 100644 --- a/src/intel/vulkan/anv_intel.c +++ b/src/intel/vulkan/anv_intel.c @@ -64,7 +64,8 @@ VkResult anv_CreateDmaBufImageINTEL( .samples = 1, /* FIXME: Need a way to use X tiling to allow scanout */ .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT, .flags = 0, }}, pAllocator, &image_h); @@ -73,7 +74,7 @@ VkResult anv_CreateDmaBufImageINTEL( image = anv_image_from_handle(image_h); - uint64_t bo_flags = 0; + uint64_t bo_flags = ANV_BO_EXTERNAL; if (device->instance->physicalDevice.supports_48bit_addresses) bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; if (device->instance->physicalDevice.use_softpin) diff --git a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c index 5a971d9be39..71e511f34b7 100644 --- a/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c +++ b/src/intel/vulkan/anv_nir_lower_ycbcr_textures.c @@ -340,18 +340,16 @@ try_lower_tex_ycbcr(struct anv_pipeline_layout *layout, if (binding->immutable_samplers == NULL) return false; - unsigned texture_index = tex->texture_index; + assert(tex->texture_index == 0); + unsigned array_index = 0; if (deref->deref_type != nir_deref_type_var) { assert(deref->deref_type == nir_deref_type_array); nir_const_value *const_index = nir_src_as_const_value(deref->arr.index); if (!const_index) return false; - size_t hw_binding_size = - anv_descriptor_set_binding_layout_get_hw_size(binding); - texture_index += MIN2(const_index->u32[0], hw_binding_size - 1); + array_index = MIN2(const_index->u32[0], binding->array_size - 1); } - const struct anv_sampler *sampler = - binding->immutable_samplers[texture_index]; + const struct anv_sampler *sampler = binding->immutable_samplers[array_index]; if (sampler->conversion == NULL) return false; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 1660fcbbc87..1e7c87c70d0 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -555,6 +555,10 @@ anv_multialloc_alloc2(struct anv_multialloc *ma, return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope); } +/* Extra ANV-defined BO flags which won't be passed to the kernel */ +#define ANV_BO_EXTERNAL (1ull << 31) +#define ANV_BO_FLAG_MASK (1ull << 31) + struct anv_bo { uint32_t gem_handle; @@ -882,12 +886,21 @@ struct anv_physical_device { int master_fd; }; +struct anv_app_info { + const char* app_name; + uint32_t app_version; + const char* engine_name; + uint32_t engine_version; + uint32_t api_version; +}; + struct anv_instance { VK_LOADER_DATA _loader_data; VkAllocationCallbacks alloc; - uint32_t apiVersion; + struct anv_app_info app_info; + struct anv_instance_extension_table enabled_extensions; struct anv_dispatch_table dispatch; @@ -1003,6 +1016,7 @@ struct anv_device { struct anv_scratch_pool scratch_pool; uint32_t default_mocs; + uint32_t external_mocs; pthread_mutex_t mutex; pthread_cond_t queue_submit; @@ -1032,6 +1046,15 @@ anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) { anv_state_pool_free(anv_binding_table_pool(device), state); } +static inline uint32_t +anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo) +{ + if (bo->flags & ANV_BO_EXTERNAL) + return device->external_mocs; + else + return device->default_mocs; +} + static void inline anv_state_flush(struct anv_device *device, struct anv_state state) { @@ -1313,6 +1336,12 @@ _anv_combine_address(struct anv_batch *batch, void *location, .AgeforQUADLRU = 0 \ } +#define GEN8_EXTERNAL_MOCS (struct GEN8_MEMORY_OBJECT_CONTROL_STATE) { \ + .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle, \ + .TargetCache = L3DefertoPATforLLCeLLCselection, \ + .AgeforQUADLRU = 0 \ + } + /* Skylake: MOCS is now an index into an array of 62 different caching * configurations programmed by the kernel. */ @@ -1322,9 +1351,9 @@ _anv_combine_address(struct anv_batch *batch, void *location, .IndextoMOCSTables = 2 \ } -#define GEN9_MOCS_PTE { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 1 \ +#define GEN9_EXTERNAL_MOCS (struct GEN9_MEMORY_OBJECT_CONTROL_STATE) { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 1 \ } /* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */ @@ -1333,9 +1362,9 @@ _anv_combine_address(struct anv_batch *batch, void *location, .IndextoMOCSTables = 2 \ } -#define GEN10_MOCS_PTE { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 1 \ +#define GEN10_EXTERNAL_MOCS (struct GEN10_MEMORY_OBJECT_CONTROL_STATE) { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 1 \ } /* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */ @@ -1344,9 +1373,9 @@ _anv_combine_address(struct anv_batch *batch, void *location, .IndextoMOCSTables = 2 \ } -#define GEN11_MOCS_PTE { \ - /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ - .IndextoMOCSTables = 1 \ +#define GEN11_EXTERNAL_MOCS (struct GEN11_MEMORY_OBJECT_CONTROL_STATE) { \ + /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */ \ + .IndextoMOCSTables = 1 \ } struct anv_device_memory { diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index e0c0a42069f..f7fb137fdf5 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -971,7 +971,7 @@ VkResult anv_CreateSemaphore( } else { semaphore->permanent.type = ANV_SEMAPHORE_TYPE_BO; VkResult result = anv_bo_cache_alloc(device, &device->bo_cache, - 4096, 0, + 4096, ANV_BO_EXTERNAL, &semaphore->permanent.bo); if (result != VK_SUCCESS) { vk_free2(&device->alloc, pAllocator, semaphore); @@ -1120,7 +1120,8 @@ VkResult anv_ImportSemaphoreFdKHR( new_impl.type = ANV_SEMAPHORE_TYPE_BO; VkResult result = anv_bo_cache_import(device, &device->bo_cache, - fd, 0, &new_impl.bo); + fd, ANV_BO_EXTERNAL, + &new_impl.bo); if (result != VK_SUCCESS) return result; diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c index 1403601e9c0..5ed1d711689 100644 --- a/src/intel/vulkan/anv_wsi.c +++ b/src/intel/vulkan/anv_wsi.c @@ -216,28 +216,45 @@ VkResult anv_GetSwapchainImagesKHR( } VkResult anv_AcquireNextImageKHR( - VkDevice _device, + VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex) +{ + VkAcquireNextImageInfoKHR acquire_info = { + .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR, + .swapchain = swapchain, + .timeout = timeout, + .semaphore = semaphore, + .fence = fence, + .deviceMask = 0, + }; + + return anv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex); +} + +VkResult anv_AcquireNextImage2KHR( + VkDevice _device, + const VkAcquireNextImageInfoKHR* pAcquireInfo, + uint32_t* pImageIndex) { ANV_FROM_HANDLE(anv_device, device, _device); struct anv_physical_device *pdevice = &device->instance->physicalDevice; - VkResult result = wsi_common_acquire_next_image(&pdevice->wsi_device, - _device, - swapchain, - timeout, - semaphore, - pImageIndex); + VkResult result = wsi_common_acquire_next_image2(&pdevice->wsi_device, + _device, + pAcquireInfo, + pImageIndex); /* Thanks to implicit sync, the image is ready immediately. However, we * should wait for the current GPU state to finish. */ - if (fence != VK_NULL_HANDLE) - anv_QueueSubmit(anv_queue_to_handle(&device->queue), 0, NULL, fence); + if (pAcquireInfo->fence != VK_NULL_HANDLE) { + anv_QueueSubmit(anv_queue_to_handle(&device->queue), 0, NULL, + pAcquireInfo->fence); + } return result; } diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 3acfbb710c0..da51cb9781c 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -48,6 +48,7 @@ clamp_int64(int64_t x, int64_t min, int64_t max) void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) { + struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; uint32_t count = cmd_buffer->state.gfx.dynamic.scissor.count; const VkRect2D *scissors = cmd_buffer->state.gfx.dynamic.scissor.scissors; struct anv_state scissor_state = @@ -73,8 +74,8 @@ gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer) /* Do this math using int64_t so overflow gets clamped correctly. */ .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max), .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max), - .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max), - .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max) + .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, fb->height - 1), + .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, fb->width - 1) }; if (s->extent.width <= 0 || s->extent.height <= 0) { @@ -245,7 +246,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) ib.CutIndexEnable = pipeline->primitive_restart; #endif ib.IndexFormat = cmd_buffer->state.gfx.gen7.index_type; - ib.MemoryObjectControlState = GENX(MOCS); + ib.IndexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, + buffer->address.bo); ib.BufferStartingAddress = anv_address_add(buffer->address, offset); diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index ca2baf84a19..752d04f3013 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -565,7 +565,8 @@ void genX(CmdBindIndexBuffer)( anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) { ib.IndexFormat = vk_to_gen_index_type[indexType]; - ib.MemoryObjectControlState = GENX(MOCS); + ib.IndexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, + buffer->address.bo); ib.BufferStartingAddress = anv_address_add(buffer->address, offset); ib.BufferSize = buffer->size - offset; } diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index b7ed817d3a0..7cef4c55cf1 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -2493,6 +2493,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) uint32_t *p; uint32_t vb_emit = cmd_buffer->state.gfx.vb_dirty & pipeline->vb_used; + if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) + vb_emit |= pipeline->vb_used; assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); @@ -2514,12 +2516,11 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) struct GENX(VERTEX_BUFFER_STATE) state = { .VertexBufferIndex = vb, -#if GEN_GEN >= 8 - .MemoryObjectControlState = GENX(MOCS), -#else + .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, + buffer->address.bo), +#if GEN_GEN <= 7 .BufferAccessType = pipeline->vb[vb].instanced ? INSTANCEDATA : VERTEXDATA, .InstanceDataStepRate = pipeline->vb[vb].instance_divisor, - .VertexBufferMemoryObjectControlState = GENX(MOCS), #endif .AddressModifyEnable = true, @@ -2612,7 +2613,8 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) pipeline->depth_clamp_enable); } - if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) + if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_SCISSOR | + ANV_CMD_DIRTY_RENDER_TARGETS)) gen7_cmd_buffer_emit_scissor(cmd_buffer); genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); @@ -2633,12 +2635,11 @@ emit_vertex_bo(struct anv_cmd_buffer *cmd_buffer, .VertexBufferIndex = index, .AddressModifyEnable = true, .BufferPitch = 0, + .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, addr.bo), #if (GEN_GEN >= 8) - .MemoryObjectControlState = GENX(MOCS), .BufferStartingAddress = addr, .BufferSize = size #else - .VertexBufferMemoryObjectControlState = GENX(MOCS), .BufferStartingAddress = addr, .EndAddress = anv_address_add(addr, size), #endif @@ -3390,9 +3391,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) if (dw == NULL) return; - struct isl_depth_stencil_hiz_emit_info info = { - .mocs = device->default_mocs, - }; + struct isl_depth_stencil_hiz_emit_info info = { }; if (iview) info.view = &iview->planes[0].isl; @@ -3410,6 +3409,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) image->planes[depth_plane].address.bo, image->planes[depth_plane].address.offset + surface->offset); + info.mocs = + anv_mocs_for_bo(device, image->planes[depth_plane].address.bo); const uint32_t ds = cmd_buffer->state.subpass->depth_stencil_attachment->attachment; @@ -3441,6 +3442,8 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) image->planes[stencil_plane].address.bo, image->planes[stencil_plane].address.offset + surface->offset); + info.mocs = + anv_mocs_for_bo(device, image->planes[stencil_plane].address.bo); } isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info); diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index eaafcfa3b22..b51c1804659 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -158,11 +158,10 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer, .AddressModifyEnable = true, .BufferStartingAddress = { src, src_offset }, .BufferPitch = bs, + .VertexBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, src), #if (GEN_GEN >= 8) - .MemoryObjectControlState = GENX(MOCS), .BufferSize = size, #else - .VertexBufferMemoryObjectControlState = GENX(MOCS), .EndAddress = { src, src_offset + size - 1 }, #endif }); @@ -219,7 +218,7 @@ genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer, anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SO_BUFFER), sob) { sob.SOBufferIndex = 0; - sob.SOBufferObjectControlState = GENX(MOCS); + sob.SOBufferMOCS = anv_mocs_for_bo(cmd_buffer->device, dst), sob.SurfaceBaseAddress = (struct anv_address) { dst, dst_offset }; #if GEN_GEN >= 8 diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index ec47360764c..cb560dcec70 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -91,7 +91,8 @@ emit_vertex_input(struct anv_pipeline *pipeline, /* Pull inputs_read out of the VS prog data */ const uint64_t inputs_read = vs_prog_data->inputs_read; - const uint64_t double_inputs_read = vs_prog_data->double_inputs_read; + const uint64_t double_inputs_read = + vs_prog_data->double_inputs_read & inputs_read; assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0); const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0; const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0; @@ -115,7 +116,34 @@ emit_vertex_input(struct anv_pipeline *pipeline, GENX(3DSTATE_VERTEX_ELEMENTS)); if (!p) return; - memset(p + 1, 0, (num_dwords - 1) * 4); + + for (uint32_t i = 0; i < total_elems; i++) { + /* The SKL docs for VERTEX_ELEMENT_STATE say: + * + * "All elements must be valid from Element[0] to the last valid + * element. (I.e. if Element[2] is valid then Element[1] and + * Element[0] must also be valid)." + * + * The SKL docs for 3D_Vertex_Component_Control say: + * + * "Don't store this component. (Not valid for Component 0, but can + * be used for Component 1-3)." + * + * So we can't just leave a vertex element blank and hope for the best. + * We have to tell the VF hardware to put something in it; so we just + * store a bunch of zero. + * + * TODO: Compact vertex elements so we never end up with holes. + */ + struct GENX(VERTEX_ELEMENT_STATE) element = { + .Valid = true, + .Component0Control = VFCOMP_STORE_0, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, + }; + GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + i * 2], &element); + } for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) { const VkVertexInputAttributeDescription *desc = @@ -727,14 +755,14 @@ sanitize_ds_state(VkPipelineDepthStencilStateCreateInfo *state, { *stencilWriteEnable = state->stencilTestEnable; - /* If the depth test is disabled, we won't be writing anything. */ - if (!state->depthTestEnable) - state->depthWriteEnable = false; - - /* The Vulkan spec requires that if either depth or stencil is not present, - * the pipeline is to act as if the test silently passes. + /* If the depth test is disabled, we won't be writing anything. Make sure we + * treat the test as always passing later on as well. + * + * Also, the Vulkan spec requires that if either depth or stencil is not + * present, the pipeline is to act as if the test silently passes. In that + * case we won't write either. */ - if (!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { + if (!state->depthTestEnable || !(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { state->depthWriteEnable = false; state->depthCompareOp = VK_COMPARE_OP_ALWAYS; } @@ -1141,7 +1169,28 @@ emit_3dstate_vs(struct anv_pipeline *pipeline) vs.IllegalOpcodeExceptionEnable = false; vs.SoftwareExceptionEnable = false; vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1; - vs.VertexCacheDisable = false; + + if (GEN_GEN == 9 && devinfo->gt == 4 && + anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { + /* On Sky Lake GT4, we have experienced some hangs related to the VS + * cache and tessellation. It is unknown exactly what is happening + * but the Haswell docs for the "VS Reference Count Full Force Miss + * Enable" field of the "Thread Mode" register refer to a HSW bug in + * which the VUE handle reference count would overflow resulting in + * internal reference counting bugs. My (Jason's) best guess is that + * this bug cropped back up on SKL GT4 when we suddenly had more + * threads in play than any previous gen9 hardware. + * + * What we do know for sure is that setting this bit when + * tessellation shaders are in use fixes a GPU hang in Batman: Arkham + * City when playing with DXVK (https://bugs.freedesktop.org/107280). + * Disabling the vertex cache with tessellation shaders should only + * have a minor performance impact as the tessellation shaders are + * likely generating and processing far more geometry than the vertex + * stage. + */ + vs.VertexCacheDisable = true; + } vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length; vs.VertexURBEntryReadOffset = 0; diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index e35e9b85844..9b1d73b7259 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -347,6 +347,11 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer, sdi.Address.offset = slot_offset + j * sizeof(uint64_t); sdi.ImmediateData = 0ull; } + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { + sdi.Address.bo = &pool->bo; + sdi.Address.offset = slot_offset + j * sizeof(uint64_t) + 4; + sdi.ImmediateData = 0ull; + } } emit_query_availability(cmd_buffer, &pool->bo, slot_offset); } diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index b1014d9e797..847fff83949 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -93,6 +93,12 @@ genX(init_device_state)(struct anv_device *device) { GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, &GENX(MOCS)); +#if GEN_GEN >= 8 + GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->external_mocs, + &GENX(EXTERNAL_MOCS)); +#else + device->external_mocs = device->default_mocs; +#endif struct anv_batch batch; @@ -184,6 +190,24 @@ genX(init_device_state)(struct anv_device *device) #endif } +#if GEN_GEN >= 10 + /* A fixed function pipe flush is required before modifying this field */ + anv_batch_emit(&batch, GENX(PIPE_CONTROL), pipe) { + pipe.PipeControlFlushEnable = true; + } + + /* enable object level preemption */ + uint32_t csc1; + + anv_pack_struct(&csc1, GENX(CS_CHICKEN1), + .ReplayMode = ObjectLevelPreemption, + .ReplayModeMask = 1); + anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) { + lri.RegisterOffset = GENX(CS_CHICKEN1_num); + lri.DataDWord = csc1; + } +#endif + anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe); assert(batch.next <= batch.end); diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index e427c7471f4..49ff641f0ba 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -102,7 +102,7 @@ foreach g : [['70', ['gen7_cmd_buffer.c']], ['75', ['gen7_cmd_buffer.c']], inc_vulkan_wsi, ], c_args : [ - c_vis_args, no_override_init_args, '-msse2', + c_vis_args, no_override_init_args, c_sse2_args, '-DGEN_VERSIONx10=@0@'.format(_gen), ], dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers], @@ -146,7 +146,7 @@ anv_deps = [ anv_flags = [ c_vis_args, no_override_init_args, - '-msse2', + c_sse2_args, ] if with_platform_x11 diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c index 473fe6c9089..fc42b8ea9c4 100644 --- a/src/loader/loader_dri3_helper.c +++ b/src/loader/loader_dri3_helper.c @@ -1737,6 +1737,7 @@ dri3_get_buffer(__DRIdrawable *driDrawable, struct loader_dri3_drawable *draw) { struct loader_dri3_buffer *buffer; + bool fence_await = buffer_type == loader_dri3_buffer_back; int buf_id; if (buffer_type == loader_dri3_buffer_back) { @@ -1793,6 +1794,7 @@ dri3_get_buffer(__DRIdrawable *driDrawable, 0, 0, 0, 0, draw->width, draw->height); dri3_fence_trigger(draw->conn, new_buffer); + fence_await = true; } dri3_free_render_buffer(draw, buffer); } else if (buffer_type == loader_dri3_buffer_front) { @@ -1814,12 +1816,15 @@ dri3_get_buffer(__DRIdrawable *driDrawable, new_buffer->linear_buffer, 0, 0, draw->width, draw->height, 0, 0, 0); - } + } else + fence_await = true; } buffer = new_buffer; draw->buffers[buf_id] = buffer; } - dri3_fence_await(draw->conn, draw, buffer); + + if (fence_await) + dri3_fence_await(draw->conn, draw, buffer); /* * Do we need to preserve the content of a previous buffer? diff --git a/src/mapi/glapi/gen/KHR_robustness_es.xml b/src/mapi/glapi/gen/KHR_robustness_es.xml index 84f6fd2cdb9..82b7edf31a7 100644 --- a/src/mapi/glapi/gen/KHR_robustness_es.xml +++ b/src/mapi/glapi/gen/KHR_robustness_es.xml @@ -60,4 +60,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mesa/Android.libmesa_dricore.mk b/src/mesa/Android.libmesa_dricore.mk index 34fd8581c2d..792117767b4 100644 --- a/src/mesa/Android.libmesa_dricore.mk +++ b/src/mesa/Android.libmesa_dricore.mk @@ -49,7 +49,7 @@ ifeq ($(ARCH_X86_HAVE_SSE4_1),true) LOCAL_WHOLE_STATIC_LIBRARIES := \ libmesa_sse41 LOCAL_CFLAGS := \ - -msse4.1 \ + -msse4.1 -mstackrealign \ -DUSE_SSE41 endif diff --git a/src/mesa/Android.libmesa_sse41.mk b/src/mesa/Android.libmesa_sse41.mk index da40f43df69..de19a1fb48b 100644 --- a/src/mesa/Android.libmesa_sse41.mk +++ b/src/mesa/Android.libmesa_sse41.mk @@ -34,7 +34,7 @@ LOCAL_SRC_FILES += \ $(X86_SSE41_FILES) LOCAL_CFLAGS := \ - -msse4.1 + -msse4.1 -mstackrealign LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ diff --git a/src/mesa/drivers/dri/Android.mk b/src/mesa/drivers/dri/Android.mk index 53ff4b4f632..dc1f98364c8 100644 --- a/src/mesa/drivers/dri/Android.mk +++ b/src/mesa/drivers/dri/Android.mk @@ -49,11 +49,18 @@ MESA_DRI_WHOLE_STATIC_LIBRARIES := \ MESA_DRI_SHARED_LIBRARIES := \ libcutils \ libdl \ - libexpat \ libglapi \ liblog \ libz +# If Android version >=8 MESA should static link libexpat else should dynamic link +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +MESA_DRI_WHOLE_STATIC_LIBRARIES += \ + libexpat +else +MESA_DRI_SHARED_LIBRARIES += \ + libexpat +endif #----------------------------------------------- # Build drivers and libmesa_dri_common diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 324d087220a..3052f6f1495 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -274,6 +274,7 @@ LOCAL_LDFLAGS += $(MESA_DRI_LDFLAGS) LOCAL_CFLAGS := \ $(MESA_DRI_CFLAGS) +LOCAL_CFLAGS += -Wno-error ifeq ($(ARCH_X86_HAVE_SSE4_1),true) LOCAL_CFLAGS += \ -DUSE_SSE41 diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 889d4c68a2b..0afa7a2f216 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -44,7 +44,7 @@ AM_CFLAGS = \ $(WNO_OVERRIDE_INIT) \ $(LIBDRM_CFLAGS) \ $(VALGRIND_CFLAGS) \ - -msse2 + -msse2 -mstackrealign AM_CXXFLAGS = $(AM_CFLAGS) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 7476cee43a4..ad3a47ef035 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -187,6 +187,9 @@ blorp_surf_for_miptree(struct brw_context *brw, assert((surf->aux_usage == ISL_AUX_USAGE_NONE) == (surf->aux_addr.buffer == NULL)); + if (!is_render_target && brw->screen->devinfo.gen == 9) + gen9_apply_single_tex_astc5x5_wa(brw, mt->format, surf->aux_usage); + /* ISL wants real levels, not offset ones. */ *level -= mt->first_level; } @@ -382,7 +385,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw, enum isl_format src_isl_format = brw_blorp_to_isl_format(brw, src_format, false); enum isl_aux_usage src_aux_usage = - intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format); + intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format, + 0 /* The astc5x5 WA isn't needed */); /* We do format workarounds for some depth formats so we can't reliably * sample with HiZ. One of these days, we should fix that. */ @@ -1220,12 +1224,12 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb, x0 = fb->_Xmin; x1 = fb->_Xmax; - if (rb->Name != 0) { - y0 = fb->_Ymin; - y1 = fb->_Ymax; - } else { + if (fb->FlipY) { y0 = rb->Height - fb->_Ymax; y1 = rb->Height - fb->_Ymin; + } else { + y0 = fb->_Ymin; + y1 = fb->_Ymax; } /* If the clear region is empty, just return. */ @@ -1411,9 +1415,8 @@ brw_blorp_clear_depth_stencil(struct brw_context *brw, if (!(mask & (BUFFER_BITS_DEPTH_STENCIL))) return; - uint32_t x0, x1, y0, y1, rb_name, rb_height; + uint32_t x0, x1, y0, y1, rb_height; if (depth_rb) { - rb_name = depth_rb->Name; rb_height = depth_rb->Height; if (stencil_rb) { assert(depth_rb->Width == stencil_rb->Width); @@ -1421,18 +1424,17 @@ brw_blorp_clear_depth_stencil(struct brw_context *brw, } } else { assert(stencil_rb); - rb_name = stencil_rb->Name; rb_height = stencil_rb->Height; } x0 = fb->_Xmin; x1 = fb->_Xmax; - if (rb_name != 0) { - y0 = fb->_Ymin; - y1 = fb->_Ymax; - } else { + if (fb->FlipY) { y0 = rb_height - fb->_Ymax; y1 = rb_height - fb->_Ymin; + } else { + y0 = fb->_Ymin; + y1 = fb->_Ymax; } /* If the clear region is empty, just return. */ diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 09d45e30ecc..31284864973 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -496,7 +496,6 @@ bo_alloc_internal(struct brw_bufmgr *bufmgr, uint32_t stride) { struct brw_bo *bo; - unsigned int page_size = getpagesize(); int ret; struct bo_cache_bucket *bucket; bool alloc_from_cache; @@ -522,12 +521,12 @@ bo_alloc_internal(struct brw_bufmgr *bufmgr, * allocation up. */ if (bucket == NULL) { - bo_size = size; - if (bo_size < page_size) - bo_size = page_size; + unsigned int page_size = getpagesize(); + bo_size = size == 0 ? page_size : ALIGN(size, page_size); } else { bo_size = bucket->size; } + assert(bo_size); mtx_lock(&bufmgr->lock); /* Get a buffer out of the cache if available */ @@ -1490,7 +1489,7 @@ brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd) brw_bo_make_external(bo); if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle, - DRM_CLOEXEC, prime_fd) != 0) + DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0) return -errno; bo->reusable = false; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 136f1325c6e..7f0c5dd57d8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -695,7 +695,7 @@ brw_initialize_context_constants(struct brw_context *brw) /* ARB_viewport_array, OES_viewport_array */ if (devinfo->gen >= 6) { ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS; - ctx->Const.ViewportSubpixelBits = 0; + ctx->Const.ViewportSubpixelBits = 8; /* Cast to float before negating because MaxViewportWidth is unsigned. */ @@ -863,6 +863,19 @@ brw_process_driconf_options(struct brw_context *brw) ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20); driComputeOptionsSha1(&brw->screen->optionCache, ctx->Const.dri_config_options_sha1); + + brw->screen->compiler->simd32_heuristics_control.grouped_sends_check = + driQueryOptionb(&brw->optionCache, "simd32_heuristic_grouped_check"); + brw->screen->compiler->simd32_heuristics_control.max_grouped_sends = + driQueryOptioni(&brw->optionCache, "simd32_heuristic_grouped_sends"); + brw->screen->compiler->simd32_heuristics_control.inst_count_check = + driQueryOptionb(&brw->optionCache, "simd32_heuristic_inst_check"); + brw->screen->compiler->simd32_heuristics_control.inst_count_ratio = + driQueryOptionf(&brw->optionCache, "simd32_heuristic_inst_ratio"); + brw->screen->compiler->simd32_heuristics_control.mrt_check = + driQueryOptionb(&brw->optionCache, "simd32_heuristic_mrt_check"); + brw->screen->compiler->simd32_heuristics_control.max_mrts = + driQueryOptioni(&brw->optionCache, "simd32_heuristic_max_mrts"); } GLboolean diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 72be8f2a4d0..607f67f70e0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -168,6 +168,11 @@ enum brw_cache_id { BRW_MAX_CACHE }; +enum gen9_astc5x5_wa_tex_type { + GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0, + GEN9_ASTC5X5_WA_TEX_TYPE_AUX = 1 << 1, +}; + enum brw_state_id { /* brw_cache_ids must come first - see brw_program_cache.c */ BRW_STATE_URB_FENCE = BRW_MAX_CACHE, @@ -681,6 +686,7 @@ enum brw_query_kind { OA_COUNTERS, OA_COUNTERS_RAW, PIPELINE_STATS, + NULL_RENDERER, }; struct brw_perf_query_register_prog { @@ -826,6 +832,8 @@ struct brw_context GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */ + bool object_preemption; /**< Object level preemption enabled. */ + GLenum reduced_primitive; /** @@ -1226,6 +1234,7 @@ struct brw_context int n_active_oa_queries; int n_active_pipeline_stats_queries; + int n_active_null_renderers; /* The number of queries depending on running OA counters which * extends beyond brw_end_perf_query() since we need to wait until @@ -1315,6 +1324,8 @@ struct brw_context */ enum isl_aux_usage draw_aux_usage[MAX_DRAW_BUFFERS]; + enum gen9_astc5x5_wa_tex_type gen9_astc5x5_wa_tex_mask; + __DRIcontext *driContext; struct intel_screen *screen; }; @@ -1339,6 +1350,10 @@ void intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable); void intel_prepare_render(struct brw_context *brw); +void gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw, + mesa_format format, + enum isl_aux_usage aux_usage); + void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering, bool *draw_aux_buffer_disabled); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 855f1c7d744..ff69685ec2c 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1650,11 +1650,17 @@ enum brw_pixel_shader_coverage_mask_mode { #define GEN10_CACHE_MODE_SS 0x0e420 #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) -#define INSTPM 0x20c0 +#define INSTPM 0x20c0 /* Gen6-8 */ # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6) +# define INSTPM_GLOBAL_DEBUG_ENABLE (1 << 4) +# define INSTPM_MEDIA_INSTRUCTION_DISABLE (1 << 3) +# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE (1 << 2) +# define INSTPM_3D_STATE_INSTRUCTION_DISABLE (1 << 1) #define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */ # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4) +# define CSDBG2_MEDIA_INSTRUCTION_DISABLE (1 << 1) +# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE (1 << 0) #define GEN7_RPSTAT1 0xA01C #define GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT 7 @@ -1673,4 +1679,9 @@ enum brw_pixel_shader_coverage_mask_mode { # define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7) # define GLK_SCEC_BARRIER_MODE_MASK REG_MASK(1 << 7) +#define CS_CHICKEN1 0x2580 /* Gen9+ */ +# define GEN9_REPLAY_MODE_MIDBUFFER (0 << 0) +# define GEN9_REPLAY_MODE_MIDOBJECT (1 << 0) +# define GEN9_REPLAY_MODE_MASK REG_MASK(1 << 0) + #endif diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 71461d7b0a7..8536c040109 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -378,6 +378,68 @@ intel_disable_rb_aux_buffer(struct brw_context *brw, return found; } +/** Implement the ASTC 5x5 sampler workaround + * + * Gen9 sampling hardware has a bug where an ASTC 5x5 compressed surface + * cannot live in the sampler cache at the same time as an aux compressed + * surface. In order to work around the bug we have to stall rendering with a + * CS and pixel scoreboard stall (implicit in the CS stall) and invalidate the + * texture cache whenever one of ASTC 5x5 or aux compressed may be in the + * sampler cache and we're about to render with something which samples from + * the other. + * + * In the case of a single shader which textures from both ASTC 5x5 and + * a texture which is CCS or HiZ compressed, we have to resolve the aux + * compressed texture prior to rendering. This second part is handled in + * brw_predraw_resolve_inputs() below. + * + * We have observed this issue to affect CCS and HiZ sampling but whether or + * not it also affects MCS is unknown. Because MCS has no concept of a + * resolve (and doing one would be stupid expensive), we choose to simply + * ignore the possibility and hope for the best. + */ +static void +gen9_apply_astc5x5_wa_flush(struct brw_context *brw, + enum gen9_astc5x5_wa_tex_type curr_mask) +{ + assert(brw->screen->devinfo.gen == 9); + + if (((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && + (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX)) || + ((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) && + (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))) { + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL); + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); + } + + brw->gen9_astc5x5_wa_tex_mask = curr_mask; +} + +static enum gen9_astc5x5_wa_tex_type +gen9_astc5x5_wa_bits(mesa_format format, enum isl_aux_usage aux_usage) +{ + if (aux_usage != ISL_AUX_USAGE_NONE && + aux_usage != ISL_AUX_USAGE_MCS) + return GEN9_ASTC5X5_WA_TEX_TYPE_AUX; + + if (format == MESA_FORMAT_RGBA_ASTC_5x5 || + format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5) + return GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5; + + return 0; +} + +/* Helper for the gen9 ASTC 5x5 workaround. This version exists for BLORP's + * use-cases where only a single texture is bound. + */ +void +gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw, + mesa_format format, + enum isl_aux_usage aux_usage) +{ + gen9_apply_astc5x5_wa_flush(brw, gen9_astc5x5_wa_bits(format, aux_usage)); +} + static void mark_textures_used_for_txf(BITSET_WORD *used_for_txf, const struct gl_program *prog) @@ -417,8 +479,30 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering, mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current); } - /* Resolve depth buffer and render cache of each enabled texture. */ int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit; + + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits = 0; + if (brw->screen->devinfo.gen == 9) { + /* In order to properly implement the ASTC 5x5 workaround for an + * arbitrary draw or dispatch call, we have to walk the entire list of + * textures looking for ASTC 5x5. If there is any ASTC 5x5 in this draw + * call, all aux compressed textures must be resolved and have aux + * compression disabled while sampling. + */ + for (int i = 0; i <= maxEnabledUnit; i++) { + if (!ctx->Texture.Unit[i]._Current) + continue; + tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); + if (!tex_obj || !tex_obj->mt) + continue; + + astc5x5_wa_bits |= gen9_astc5x5_wa_bits(tex_obj->_Format, + tex_obj->mt->aux_usage); + } + gen9_apply_astc5x5_wa_flush(brw, astc5x5_wa_bits); + } + + /* Resolve depth buffer and render cache of each enabled texture. */ for (int i = 0; i <= maxEnabledUnit; i++) { if (!ctx->Texture.Unit[i]._Current) continue; @@ -452,7 +536,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering, intel_miptree_prepare_texture(brw, tex_obj->mt, view_format, min_level, num_levels, - min_layer, num_layers); + min_layer, num_layers, + astc5x5_wa_bits); /* If any programs are using it with texelFetch, we may need to also do * a prepare with an sRGB format to ensure texelFetch works "properly". @@ -463,7 +548,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering, if (txf_format != view_format) { intel_miptree_prepare_texture(brw, tex_obj->mt, txf_format, min_level, num_levels, - min_layer, num_layers); + min_layer, num_layers, + astc5x5_wa_bits); } } @@ -535,7 +621,8 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw, if (irb) { intel_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format, irb->mt_level, 1, - irb->mt_layer, irb->layer_count); + irb->mt_layer, irb->layer_count, + brw->gen9_astc5x5_wa_tex_mask); } } } diff --git a/src/mesa/drivers/dri/i965/brw_generate_mipmap.c b/src/mesa/drivers/dri/i965/brw_generate_mipmap.c index 32c2933f721..4125ae6e11c 100644 --- a/src/mesa/drivers/dri/i965/brw_generate_mipmap.c +++ b/src/mesa/drivers/dri/i965/brw_generate_mipmap.c @@ -105,6 +105,23 @@ brw_generate_mipmap(struct gl_context *ctx, GLenum target, last_layer = base_size->array_len - 1; } + /* The GL_EXT_texture_sRGB_decode extension's issues section says: + * + * "10) How is mipmap generation of sRGB textures affected by the + * TEXTURE_SRGB_DECODE_EXT parameter? + * + * RESOLVED: When the TEXTURE_SRGB_DECODE parameter is DECODE_EXT + * for an sRGB texture, mipmap generation should decode sRGB texels + * to a linear RGB color space, perform downsampling, then encode + * back to an sRGB color space. (Issue 24 in the EXT_texture_sRGB + * specification provides a rationale for why.) When the parameter + * is SKIP_DECODE_EXT instead, mipmap generation skips the encode + * and decode steps during mipmap generation. By skipping the + * encode and decode steps, sRGB mipmap generation should match + * the mipmap generation for a non-sRGB texture." + */ + bool do_srgb = tex_obj->Sampler.sRGBDecode == GL_DECODE_EXT; + for (unsigned dst_level = base_level + 1; dst_level <= last_level; dst_level++) { @@ -121,7 +138,7 @@ brw_generate_mipmap(struct gl_context *ctx, GLenum target, minify(base_size->width, dst_level), minify(base_size->height, dst_level), GL_LINEAR, false, false, - true, true); + do_srgb, do_srgb); } } } diff --git a/src/mesa/drivers/dri/i965/brw_multisample_state.h b/src/mesa/drivers/dri/i965/brw_multisample_state.h index 6cf324e561c..2142a17a484 100644 --- a/src/mesa/drivers/dri/i965/brw_multisample_state.h +++ b/src/mesa/drivers/dri/i965/brw_multisample_state.h @@ -38,13 +38,13 @@ /** * 1x MSAA has a single sample at the center: (0.5, 0.5) -> (0x8, 0x8). * - * 2x MSAA sample positions are (0.25, 0.25) and (0.75, 0.75): + * 2x MSAA sample positions are (0.75, 0.75) and (0.25, 0.25): * 4 c - * 4 0 - * c 1 + * 4 1 + * c 0 */ static const uint32_t -brw_multisample_positions_1x_2x = 0x0088cc44; +brw_multisample_positions_1x_2x = 0x008844cc; /** * Sample positions: diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index d45529fc0c7..f04fbf32c80 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -329,6 +329,12 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void) o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"), obj->pipeline_stats.bo ? "yes" : "no"); break; + case NULL_RENDERER: + DBG("%4d: %-6s %-8s NULL_RENDERER\n", + id, + o->Used ? "Dirty," : "New,", + o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,")); + break; default: unreachable("Unknown query type"); break; @@ -430,6 +436,10 @@ brw_get_perf_query_info(struct gl_context *ctx, *n_active = brw->perfquery.n_active_pipeline_stats_queries; break; + case NULL_RENDERER: + *n_active = brw->perfquery.n_active_null_renderers; + break; + default: unreachable("Unknown query type"); break; @@ -1019,6 +1029,7 @@ brw_begin_perf_query(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *obj = brw_perf_query(o); const struct brw_perf_query_info *query = obj->query; + const struct gen_device_info *devinfo = &brw->screen->devinfo; /* We can assume the frontend hides mistaken attempts to Begin a * query object multiple times before its End. Similarly if an @@ -1103,7 +1114,6 @@ brw_begin_perf_query(struct gl_context *ctx, /* If the OA counters aren't already on, enable them. */ if (brw->perfquery.oa_stream_fd == -1) { __DRIscreen *screen = brw->screen->driScrnPriv; - const struct gen_device_info *devinfo = &brw->screen->devinfo; /* The period_exponent gives a sampling period as follows: * sample_period = timestamp_period * 2^(period_exponent + 1) @@ -1249,6 +1259,23 @@ brw_begin_perf_query(struct gl_context *ctx, ++brw->perfquery.n_active_pipeline_stats_queries; break; + case NULL_RENDERER: + ++brw->perfquery.n_active_null_renderers; + if (devinfo->gen >= 9) { + brw_load_register_imm32(brw, CS_DEBUG_MODE2, + REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE) | + CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE); + } else { + brw_load_register_imm32(brw, INSTPM, + REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE | + INSTPM_MEDIA_INSTRUCTION_DISABLE) | + INSTPM_3D_RENDERER_INSTRUCTION_DISABLE | + INSTPM_MEDIA_INSTRUCTION_DISABLE); + } + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_LRI_WRITE_IMMEDIATE); + break; + default: unreachable("Unknown query type"); break; @@ -1269,6 +1296,7 @@ brw_end_perf_query(struct gl_context *ctx, { struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *obj = brw_perf_query(o); + const struct gen_device_info *devinfo = &brw->screen->devinfo; DBG("End(%d)\n", o->Id); @@ -1311,6 +1339,21 @@ brw_end_perf_query(struct gl_context *ctx, --brw->perfquery.n_active_pipeline_stats_queries; break; + case NULL_RENDERER: + if (--brw->perfquery.n_active_null_renderers == 0) { + if (devinfo->gen >= 9) { + brw_load_register_imm32(brw, CS_DEBUG_MODE2, + REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE)); + } else { + brw_load_register_imm32(brw, INSTPM, + REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE | + INSTPM_MEDIA_INSTRUCTION_DISABLE)); + } + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_LRI_WRITE_IMMEDIATE); + } + break; + default: unreachable("Unknown query type"); break; @@ -1336,6 +1379,9 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o) bo = obj->pipeline_stats.bo; break; + case NULL_RENDERER: + break; + default: unreachable("Unknown query type"); break; @@ -1386,6 +1432,8 @@ brw_is_perf_query_ready(struct gl_context *ctx, return (obj->pipeline_stats.bo && !brw_batch_references(&brw->batch, obj->pipeline_stats.bo) && !brw_bo_busy(obj->pipeline_stats.bo)); + case NULL_RENDERER: + return true; default: unreachable("Unknown query type"); @@ -1601,6 +1649,9 @@ brw_get_perf_query_data(struct gl_context *ctx, written = get_pipeline_stats_data(brw, obj, data_size, (uint8_t *)data); break; + case NULL_RENDERER: + break; + default: unreachable("Unknown query type"); break; @@ -1671,6 +1722,9 @@ brw_delete_perf_query(struct gl_context *ctx, } break; + case NULL_RENDERER: + break; + default: unreachable("Unknown query type"); break; @@ -2153,6 +2207,15 @@ get_register_queries_function(const struct gen_device_info *devinfo) return NULL; } +static void +fill_null_renderer_perf_query_info(struct brw_context *brw, + struct brw_perf_query_info *query) +{ + query->kind = NULL_RENDERER; + query->name = "Intel_Null_Hardware_Query"; + query->n_counters = 0; +} + static unsigned brw_init_perf_query_info(struct gl_context *ctx) { @@ -2211,6 +2274,10 @@ brw_init_perf_query_info(struct gl_context *ctx) enumerate_sysfs_metrics(brw); brw_perf_query_register_mdapi_oa_query(brw); + + struct brw_perf_query_info *null_query = + brw_perf_query_append_query_info(brw); + fill_null_renderer_perf_query_info(brw, null_query); } brw->perfquery.unaccumulated = diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index f6acf81b899..546d103d1a4 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -128,7 +128,7 @@ void brw_disk_cache_write_compute_program(struct brw_context *brw); void brw_disk_cache_write_render_programs(struct brw_context *brw); /*********************************************************************** - * brw_state.c + * brw_state_upload.c */ void brw_upload_render_state(struct brw_context *brw); void brw_render_state_finished(struct brw_context *brw); @@ -138,6 +138,7 @@ void brw_init_state(struct brw_context *brw); void brw_destroy_state(struct brw_context *brw); void brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline); +void brw_enable_obj_preemption(struct brw_context *brw, bool enable); static inline void brw_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 757426407c3..af48942927f 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -45,6 +45,28 @@ #include "brw_cs.h" #include "main/framebuffer.h" +void +brw_enable_obj_preemption(struct brw_context *brw, bool enable) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + assert(devinfo->gen >= 9); + + if (enable == brw->object_preemption) + return; + + /* A fixed function pipe flush is required before modifying this field */ + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE); + + bool replay_mode = enable ? + GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER; + + /* enable object level preemption */ + brw_load_register_imm32(brw, CS_CHICKEN1, + replay_mode | GEN9_REPLAY_MODE_MASK); + + brw->object_preemption = enable; +} + static void brw_upload_initial_gpu_state(struct brw_context *brw) { @@ -135,6 +157,9 @@ brw_upload_initial_gpu_state(struct brw_context *brw) ADVANCE_BATCH(); } } + + if (devinfo->gen >= 10) + brw_enable_obj_preemption(brw, true); } static inline const struct brw_tracked_state * diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 9397b637c79..12a3e3d7a6c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -603,7 +603,8 @@ static void brw_update_texture_surface(struct gl_context *ctx, view.usage |= ISL_SURF_USAGE_CUBE_BIT; enum isl_aux_usage aux_usage = - intel_miptree_texture_aux_usage(brw, mt, format); + intel_miptree_texture_aux_usage(brw, mt, format, + brw->gen9_astc5x5_wa_tex_mask); brw_emit_surface_state(brw, mt, mt->target, view, aux_usage, surf_offset, surf_index, @@ -1107,7 +1108,8 @@ update_renderbuffer_read_surfaces(struct brw_context *brw) }; enum isl_aux_usage aux_usage = - intel_miptree_texture_aux_usage(brw, irb->mt, format); + intel_miptree_texture_aux_usage(brw, irb->mt, format, + brw->gen9_astc5x5_wa_tex_mask); if (brw->draw_aux_usage[i] == ISL_AUX_USAGE_NONE) aux_usage = ISL_AUX_USAGE_NONE; diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c index bfa84fb9b77..3ce47f423ba 100644 --- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c +++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c @@ -68,10 +68,10 @@ gen6_get_sample_position(struct gl_context *ctx, * index layout in case of 2X and 4x MSAA, but they are different in * case of 8X MSAA. * - * 2X MSAA sample index / number layout - * --------- - * | 0 | 1 | - * --------- + * 8X MSAA sample index layout 8x MSAA sample number layout + * --------- --------- + * | 0 | 1 | | 1 | 0 | + * --------- --------- * * 4X MSAA sample index / number layout * --------- @@ -107,7 +107,7 @@ gen6_get_sample_position(struct gl_context *ctx, void gen6_set_sample_maps(struct gl_context *ctx) { - uint8_t map_2x[2] = {0, 1}; + uint8_t map_2x[2] = {1, 0}; uint8_t map_4x[4] = {0, 1, 2, 3}; uint8_t map_8x[8] = {3, 7, 5, 0, 1, 2, 4, 6}; uint8_t map_16x[16] = { 15, 10, 9, 7, 4, 1, 3, 13, diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 2e5f8e60ba9..e7259fc1b8d 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -118,6 +118,33 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, const struct gen_device_info *devinfo = &brw->screen->devinfo; unsigned offset = 0; + /* From the SKL PRM, Workarounds section (#878): + * + * Push constant buffer corruption possible. WA: Insert 2 zero-length + * PushConst_PS before every intended PushConst_PS update, issue a + * NULLPRIM after each of the zero len PC update to make sure CS commits + * them. + * + * This workaround is attempting to solve a pixel shader push constant + * synchronization issue. + * + * There's an unpublished WA that involves re-emitting + * 3DSTATE_PUSH_CONSTANT_ALLOC_PS for every 500-ish 3DSTATE_CONSTANT_PS + * packets. Since our counting methods may not be reliable due to + * context-switching and pre-emption, we instead choose to approximate this + * behavior by re-emitting the packet at the top of the batch. + */ + if (brw->ctx.NewDriverState == BRW_NEW_BATCH) { + /* SKL GT2 and GLK 2x6 have reliably demonstrated this issue thus far. + * We've also seen some intermittent failures from SKL GT4 and BXT in + * the past. + */ + if (!devinfo->is_skylake && + !devinfo->is_broxton && + !devinfo->is_geminilake) + return; + } + BEGIN_BATCH(10); OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2)); OUT_BATCH(vs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); @@ -154,6 +181,7 @@ const struct brw_tracked_state gen7_push_constant_space = { .dirty = { .mesa = 0, .brw = BRW_NEW_CONTEXT | + BRW_NEW_BATCH | /* Push constant workaround */ BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TESS_PROGRAMS, }, diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index ea5ad55be59..a54a46ed6b9 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -2806,7 +2806,7 @@ genX(upload_gs_state)(struct brw_context *brw) #if GEN_GEN < 7 gs.SOStatisticsEnable = true; if (gs_prog->info.has_transform_feedback_varyings) - gs.SVBIPayloadEnable = true; + gs.SVBIPayloadEnable = _mesa_is_xfb_active_and_unpaused(ctx); /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it * was previously done for gen6. @@ -3042,7 +3042,26 @@ set_blend_entry_bits(struct brw_context *brw, BLEND_ENTRY_GENXML *entry, int i, dstA = fix_dual_blend_alpha_to_one(dstA); } - entry->ColorBufferBlendEnable = true; + /* BRW_NEW_FS_PROG_DATA */ + const struct brw_wm_prog_data *wm_prog_data = + brw_wm_prog_data(brw->wm.base.prog_data); + + /* The Dual Source Blending documentation says: + * + * "If SRC1 is included in a src/dst blend factor and + * a DualSource RT Write message is not used, results + * are UNDEFINED. (This reflects the same restriction in DX APIs, + * where undefined results are produced if “o1” is not written + * by a PS – there are no default values defined). + * If SRC1 is not included in a src/dst blend factor, + * dual source blending must be disabled." + * + * There is no way to gracefully fix this undefined situation + * so we just disable the blending to prevent possible issues. + */ + entry->ColorBufferBlendEnable = + !ctx->Color.Blend[0]._UsesDualSrc || wm_prog_data->dual_src_blend; + entry->DestinationBlendFactor = blend_factor(dstRGB); entry->SourceBlendFactor = blend_factor(srcRGB); entry->DestinationAlphaBlendFactor = blend_factor(dstA); @@ -3188,6 +3207,7 @@ static const struct brw_tracked_state genX(blend_state) = { _NEW_MULTISAMPLE, .brw = BRW_NEW_BATCH | BRW_NEW_BLORP | + BRW_NEW_FS_PROG_DATA | BRW_NEW_STATE_BASE_ADDRESS, }, .emit = genX(upload_blend_state), @@ -4814,7 +4834,25 @@ genX(upload_ps_blend)(struct brw_context *brw) dstA = fix_dual_blend_alpha_to_one(dstA); } - pb.ColorBufferBlendEnable = true; + /* BRW_NEW_FS_PROG_DATA */ + const struct brw_wm_prog_data *wm_prog_data = + brw_wm_prog_data(brw->wm.base.prog_data); + + /* The Dual Source Blending documentation says: + * + * "If SRC1 is included in a src/dst blend factor and + * a DualSource RT Write message is not used, results + * are UNDEFINED. (This reflects the same restriction in DX APIs, + * where undefined results are produced if “o1” is not written + * by a PS – there are no default values defined). + * If SRC1 is not included in a src/dst blend factor, + * dual source blending must be disabled." + * + * There is no way to gracefully fix this undefined situation + * so we just disable the blending to prevent possible issues. + */ + pb.ColorBufferBlendEnable = + !color->Blend[0]._UsesDualSrc || wm_prog_data->dual_src_blend; pb.SourceAlphaBlendFactor = brw_translate_blend_factor(srcA); pb.DestinationAlphaBlendFactor = brw_translate_blend_factor(dstA); pb.SourceBlendFactor = brw_translate_blend_factor(srcRGB); @@ -4833,7 +4871,8 @@ static const struct brw_tracked_state genX(ps_blend) = { _NEW_MULTISAMPLE, .brw = BRW_NEW_BLORP | BRW_NEW_CONTEXT | - BRW_NEW_FRAGMENT_PROGRAM, + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_FS_PROG_DATA, }, .emit = genX(upload_ps_blend) }; @@ -5548,6 +5587,50 @@ static const struct brw_tracked_state genX(blend_constant_color) = { /* ---------------------------------------------------------------------- */ +#if GEN_GEN == 9 + +/** + * Implement workarounds for preemption: + * - WaDisableMidObjectPreemptionForGSLineStripAdj + * - WaDisableMidObjectPreemptionForTrifanOrPolygon + */ +static void +gen9_emit_preempt_wa(struct brw_context *brw) +{ + /* WaDisableMidObjectPreemptionForGSLineStripAdj + * + * WA: Disable mid-draw preemption when draw-call is a linestrip_adj and + * GS is enabled. + */ + bool object_preemption = + !(brw->primitive == _3DPRIM_LINESTRIP_ADJ && brw->gs.enabled); + + /* WaDisableMidObjectPreemptionForTrifanOrPolygon + * + * TriFan miscompare in Execlist Preemption test. Cut index that is on a + * previous context. End the previous, the resume another context with a + * tri-fan or polygon, and the vertex count is corrupted. If we prempt + * again we will cause corruption. + * + * WA: Disable mid-draw preemption when draw-call has a tri-fan. + */ + object_preemption = + object_preemption && !(brw->primitive == _3DPRIM_TRIFAN); + + brw_enable_obj_preemption(brw, object_preemption); +} + +static const struct brw_tracked_state gen9_preempt_wa = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_PRIMITIVE | BRW_NEW_GEOMETRY_PROGRAM, + }, + .emit = gen9_emit_preempt_wa, +}; +#endif + +/* ---------------------------------------------------------------------- */ + void genX(init_atoms)(struct brw_context *brw) { @@ -5852,6 +5935,9 @@ genX(init_atoms)(struct brw_context *brw) &genX(cut_index), &gen8_pma_fix, +#if GEN_GEN == 9 + &gen9_preempt_wa, +#endif }; #endif diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 65d2c64e319..82f0a89a61a 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -55,6 +55,8 @@ static void intel_batchbuffer_reset(struct brw_context *brw); +static void +brw_new_batch(struct brw_context *brw); static void dump_validation_list(struct intel_batchbuffer *batch) @@ -311,6 +313,8 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw) brw->batch.exec_count = brw->batch.saved.exec_count; brw->batch.map_next = brw->batch.saved.map_next; + if (USED_BATCH(brw->batch) == 0) + brw_new_batch(brw); } void diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index e6825955b0e..4da540dee94 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -327,6 +327,35 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer return intel_alloc_private_renderbuffer_storage(ctx, rb, internalFormat, width, height); } +static mesa_format +fallback_rgbx_to_rgba(struct intel_screen *screen, struct gl_renderbuffer *rb, + mesa_format original_format) +{ + mesa_format format = original_format; + + /* The base format and internal format must be derived from the user-visible + * format (that is, the gl_config's format), even if we internally use + * choose a different format for the renderbuffer. Otherwise, rendering may + * use incorrect channel write masks. + */ + rb->_BaseFormat = _mesa_get_format_base_format(original_format); + rb->InternalFormat = rb->_BaseFormat; + + if (!screen->mesa_format_supports_render[original_format]) { + /* The glRenderbufferStorage paths in core Mesa detect if the driver + * does not support the user-requested format, and then searches for + * a fallback format. The DRI code bypasses core Mesa, though. So we do + * the fallbacks here. + * + * We must support MESA_FORMAT_R8G8B8X8 on Android because the Android + * framework requires HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces. + */ + format = _mesa_format_fallback_rgbx_to_rgba(original_format); + assert(screen->mesa_format_supports_render[format]); + } + return format; +} + static void intel_image_target_renderbuffer_storage(struct gl_context *ctx, struct gl_renderbuffer *rb, @@ -349,8 +378,13 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, return; } + rb->Format = fallback_rgbx_to_rgba(brw->screen, rb, image->format); + + mesa_format chosen_format = rb->Format == image->format ? + image->format : rb->Format; + /* __DRIimage is opaque to the core so it has to be checked here */ - if (!brw->mesa_format_supports_render[image->format]) { + if (!brw->mesa_format_supports_render[chosen_format]) { _mesa_error(ctx, GL_INVALID_OPERATION, "glEGLImageTargetRenderbufferStorage(unsupported image format)"); return; @@ -365,15 +399,12 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, * content. */ irb->mt = intel_miptree_create_for_dri_image(brw, image, GL_TEXTURE_2D, - image->format, false); + rb->Format, false); if (!irb->mt) return; - rb->InternalFormat = image->internal_format; rb->Width = image->width; rb->Height = image->height; - rb->Format = image->format; - rb->_BaseFormat = _mesa_get_format_base_format(image->format); rb->NeedsFinishRenderTexture = true; irb->layer_count = 1; } @@ -434,27 +465,7 @@ intel_create_winsys_renderbuffer(struct intel_screen *screen, rb->ClassID = INTEL_RB_CLASS; rb->NumSamples = num_samples; - /* The base format and internal format must be derived from the user-visible - * format (that is, the gl_config's format), even if we internally use - * choose a different format for the renderbuffer. Otherwise, rendering may - * use incorrect channel write masks. - */ - rb->_BaseFormat = _mesa_get_format_base_format(format); - rb->InternalFormat = rb->_BaseFormat; - - rb->Format = format; - if (!screen->mesa_format_supports_render[rb->Format]) { - /* The glRenderbufferStorage paths in core Mesa detect if the driver - * does not support the user-requested format, and then searches for - * a falback format. The DRI code bypasses core Mesa, though. So we do - * the fallbacks here. - * - * We must support MESA_FORMAT_R8G8B8X8 on Android because the Android - * framework requires HAL_PIXEL_FORMAT_RGBX8888 winsys surfaces. - */ - rb->Format = _mesa_format_fallback_rgbx_to_rgba(rb->Format); - assert(screen->mesa_format_supports_render[rb->Format]); - } + rb->Format = fallback_rgbx_to_rgba(screen, rb, format); /* intel-specific methods */ rb->Delete = intel_delete_renderbuffer; diff --git a/src/mesa/drivers/dri/i965/intel_image.h b/src/mesa/drivers/dri/i965/intel_image.h index a8193c6def9..ca604159dc2 100644 --- a/src/mesa/drivers/dri/i965/intel_image.h +++ b/src/mesa/drivers/dri/i965/intel_image.h @@ -89,9 +89,6 @@ struct __DRIimageRec { GLuint tile_y; bool has_depthstencil; - /** The image was created with EGL_EXT_image_dma_buf_import. */ - bool dma_buf_imported; - /** Offset of the auxiliary compression surface in the bo. */ uint32_t aux_offset; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index a18d5ac3624..36681352ba7 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -2542,8 +2542,19 @@ can_texture_with_ccs(struct brw_context *brw, enum isl_aux_usage intel_miptree_texture_aux_usage(struct brw_context *brw, struct intel_mipmap_tree *mt, - enum isl_format view_format) + enum isl_format view_format, + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) { + assert(brw->screen->devinfo.gen == 9 || astc5x5_wa_bits == 0); + + /* On gen9, ASTC 5x5 textures cannot live in the sampler cache along side + * CCS or HiZ compressed textures. See gen9_apply_astc5x5_wa_flush() for + * details. + */ + if ((astc5x5_wa_bits & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && + mt->aux_usage != ISL_AUX_USAGE_MCS) + return ISL_AUX_USAGE_NONE; + switch (mt->aux_usage) { case ISL_AUX_USAGE_HIZ: if (intel_miptree_sample_with_hiz(brw, mt)) @@ -2601,10 +2612,12 @@ intel_miptree_prepare_texture(struct brw_context *brw, struct intel_mipmap_tree *mt, enum isl_format view_format, uint32_t start_level, uint32_t num_levels, - uint32_t start_layer, uint32_t num_layers) + uint32_t start_layer, uint32_t num_layers, + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) { enum isl_aux_usage aux_usage = - intel_miptree_texture_aux_usage(brw, mt, view_format); + intel_miptree_texture_aux_usage(brw, mt, view_format, astc5x5_wa_bits); + bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE; /* Clear color is specified as ints or floats and the conversion is done by @@ -3542,21 +3555,18 @@ intel_miptree_release_map(struct intel_mipmap_tree *mt, static bool can_blit_slice(struct intel_mipmap_tree *mt, - unsigned int level, unsigned int slice) + const struct intel_miptree_map *map) { /* See intel_miptree_blit() for details on the 32k pitch limit. */ - if (intel_miptree_blt_pitch(mt) >= 32768) - return false; - - return true; + const unsigned src_blt_pitch = intel_miptree_blt_pitch(mt); + const unsigned dst_blt_pitch = ALIGN(map->w * mt->cpp, 64); + return src_blt_pitch < 32768 && dst_blt_pitch < 32768; } static bool use_intel_mipree_map_blit(struct brw_context *brw, struct intel_mipmap_tree *mt, - GLbitfield mode, - unsigned int level, - unsigned int slice) + const struct intel_miptree_map *map) { const struct gen_device_info *devinfo = &brw->screen->devinfo; @@ -3564,19 +3574,19 @@ use_intel_mipree_map_blit(struct brw_context *brw, /* It's probably not worth swapping to the blit ring because of * all the overhead involved. */ - !(mode & GL_MAP_WRITE_BIT) && + !(map->mode & GL_MAP_WRITE_BIT) && !mt->compressed && (mt->surf.tiling == ISL_TILING_X || /* Prior to Sandybridge, the blitter can't handle Y tiling */ (devinfo->gen >= 6 && mt->surf.tiling == ISL_TILING_Y0) || /* Fast copy blit on skl+ supports all tiling formats. */ devinfo->gen >= 9) && - can_blit_slice(mt, level, slice)) + can_blit_slice(mt, map)) return true; if (mt->surf.tiling != ISL_TILING_LINEAR && mt->bo->size >= brw->max_gtt_map_object_size) { - assert(can_blit_slice(mt, level, slice)); + assert(can_blit_slice(mt, map)); return true; } @@ -3625,7 +3635,7 @@ intel_miptree_map(struct brw_context *brw, intel_miptree_map_etc(brw, mt, map, level, slice); } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) { intel_miptree_map_depthstencil(brw, mt, map, level, slice); - } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) { + } else if (use_intel_mipree_map_blit(brw, mt, map)) { intel_miptree_map_blit(brw, mt, map, level, slice); #if defined(USE_SSE41) } else if (!(mode & GL_MAP_WRITE_BIT) && diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index bb7df7ad235..08c129a4b8b 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -621,13 +621,15 @@ intel_miptree_access_raw(struct brw_context *brw, enum isl_aux_usage intel_miptree_texture_aux_usage(struct brw_context *brw, struct intel_mipmap_tree *mt, - enum isl_format view_format); + enum isl_format view_format, + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits); void intel_miptree_prepare_texture(struct brw_context *brw, struct intel_mipmap_tree *mt, enum isl_format view_format, uint32_t start_level, uint32_t num_levels, - uint32_t start_layer, uint32_t num_layers); + uint32_t start_layer, uint32_t num_layers, + enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits); void intel_miptree_prepare_image(struct brw_context *brw, struct intel_mipmap_tree *mt); diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index 6ed7895bc76..8a90b207add 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -181,7 +181,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, * tiled_to_linear a negative pitch so that it walks through the * client's data backwards as it walks through the renderbufer forwards. */ - if (rb->Name == 0) { + if (ctx->ReadBuffer->FlipY) { yoffset = rb->Height - yoffset - height; pixels += (ptrdiff_t) (height - 1) * dst_pitch; dst_pitch = -dst_pitch; @@ -249,7 +249,7 @@ intel_readpixels_blorp(struct gl_context *ctx, return brw_blorp_download_miptree(brw, irb->mt, rb->Format, swizzle, irb->mt_level, x, y, irb->mt_layer, w, h, 1, GL_TEXTURE_2D, format, type, - rb->Name == 0, pixels, packing); + ctx->ReadBuffer->FlipY, pixels, packing); } void diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index cb357419a77..2ceadd005b0 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -61,6 +61,33 @@ DRI_CONF_BEGIN DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") DRI_CONF_DESC_END DRI_CONF_OPT_END + + DRI_CONF_OPT_BEGIN_B(simd32_heuristic_grouped_check, "true") + DRI_CONF_DESC(en, "Enable/disable grouped texture fetch " + "check in the SIMD32 selection heuristic.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_V(simd32_heuristic_grouped_sends, int, 6, "1:999") + DRI_CONF_DESC(en, "How many grouped texture fetches should " + "the SIMD32 selection heuristic allow.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_B(simd32_heuristic_inst_check, "true") + DRI_CONF_DESC(en, "Enable/disable SIMD32/SIMD16 instruction " + "count ratio check in the SIMD32 selection " + "heuristic.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_V(simd32_heuristic_inst_ratio, float, 2.3, "1:999") + DRI_CONF_DESC(en, "SIMD32/SIMD16 instruction count ratio " + "the SIMD32 selection heuristic should allow.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_B(simd32_heuristic_mrt_check, "true") + DRI_CONF_DESC(en, "Enable/disable MRT write check in the " + "SIMD32 selection heuristic.") + DRI_CONF_OPT_END + DRI_CONF_OPT_BEGIN_V(simd32_heuristic_max_mrts, int, 1, "1:8") + DRI_CONF_DESC(en, "How many MRT writes should the SIMD32 " + "selection heuristic allow.") + DRI_CONF_OPT_END + DRI_CONF_MESA_NO_ERROR("false") DRI_CONF_SECTION_END @@ -283,6 +310,18 @@ static const struct intel_image_format intel_image_formats[] = { { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } }, + { __DRI_IMAGE_FOURCC_P010, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } }, + + { __DRI_IMAGE_FOURCC_P012, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } }, + + { __DRI_IMAGE_FOURCC_P016, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 2 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_GR1616, 4 } } }, + { __DRI_IMAGE_FOURCC_NV16, __DRI_IMAGE_COMPONENTS_Y_UV, 2, { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } }, @@ -958,7 +997,6 @@ intel_dup_image(__DRIimage *orig_image, void *loaderPrivate) image->tile_y = orig_image->tile_y; image->has_depthstencil = orig_image->has_depthstencil; image->data = loaderPrivate; - image->dma_buf_imported = orig_image->dma_buf_imported; image->aux_offset = orig_image->aux_offset; image->aux_pitch = orig_image->aux_pitch; @@ -1238,7 +1276,6 @@ intel_create_image_from_dma_bufs2(__DRIscreen *dri_screen, return NULL; } - image->dma_buf_imported = true; image->yuv_color_space = yuv_color_space; image->sample_range = sample_range; image->horizontal_siting = horizontal_siting; @@ -1275,9 +1312,9 @@ static bool intel_image_format_is_supported(const struct gen_device_info *devinfo, const struct intel_image_format *fmt) { - if (fmt->fourcc == __DRI_IMAGE_FOURCC_SARGB8888 || - fmt->fourcc == __DRI_IMAGE_FOURCC_SABGR8888) - return false; + /* Currently, all formats with an intel_image_format are available on all + * platforms so there's really nothing to check there. + */ #ifndef NDEBUG if (fmt->nplanes == 1) { @@ -1303,6 +1340,14 @@ intel_query_dma_buf_formats(__DRIscreen *_screen, int max, int num_formats = 0, i; for (i = 0; i < ARRAY_SIZE(intel_image_formats); i++) { + /* These two formats are valid DRI formats but do not exist in + * drm_fourcc.h in the Linux kernel. We don't want to accidentally + * advertise them through the EGL layer. + */ + if (intel_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SARGB8888 || + intel_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SABGR8888) + continue; + if (!intel_image_format_is_supported(&screen->devinfo, &intel_image_formats[i])) continue; diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index 3d948381f4a..98c92ccfba7 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -613,16 +613,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, if (image == NULL) return; - /* We support external textures only for EGLImages created with - * EGL_EXT_image_dma_buf_import. We may lift that restriction in the future. - */ - if (target == GL_TEXTURE_EXTERNAL_OES && !image->dma_buf_imported) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glEGLImageTargetTexture2DOES(external target is enabled only " - "for images created with EGL_EXT_image_dma_buf_import"); - return; - } - /* Disallow depth/stencil textures: we don't have a way to pass the * separate stencil miptree of a GL_DEPTH_STENCIL texture through. */ diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build index 761bb51d6fa..fe6a5ad55d1 100644 --- a/src/mesa/drivers/dri/i965/meson.build +++ b/src/mesa/drivers/dri/i965/meson.build @@ -142,7 +142,7 @@ foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110'] ['genX_blorp_exec.c', 'genX_state_upload.c', gen_xml_pack], include_directories : [inc_common, inc_intel, inc_dri_common], c_args : [ - c_vis_args, no_override_init_args, '-msse2', + c_vis_args, no_override_init_args, c_sse2_args, '-DGEN_VERSIONx10=@0@'.format(v), ], dependencies : [dep_libdrm, idep_nir_headers], @@ -183,8 +183,8 @@ libi965 = static_library( include_directories : [ inc_common, inc_intel, inc_dri_common, inc_util, inc_drm_uapi, ], - c_args : [c_vis_args, no_override_init_args, '-msse2'], - cpp_args : [cpp_vis_args, '-msse2'], + c_args : [c_vis_args, no_override_init_args, c_sse2_args], + cpp_args : [cpp_vis_args, c_sse2_args], link_with : [ i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler, libblorp, diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index d53225d63ab..b4cff8c2592 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -688,10 +688,10 @@ static void r200ColorMask( struct gl_context *ctx, if (!rrb) return; mask = radeonPackColor( rrb->cpp, - GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 0), - GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 1), - GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 2), - GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 3) ); + GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 0)*0xFF, + GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 1)*0xFF, + GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 2)*0xFF, + GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 3)*0xFF ); if (!(r && g && b && a)) diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index 8b72c98a3b2..410a78fc084 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -503,10 +503,10 @@ static void radeonColorMask( struct gl_context *ctx, return; mask = radeonPackColor( rrb->cpp, - GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 0), - GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 1), - GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 2), - GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 3) ); + GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 0)*0xFF, + GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 1)*0xFF, + GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 2)*0xFF, + GET_COLORMASK_BIT(ctx->Color.ColorMask, 0, 3)*0xFF ); if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) { RADEON_STATECHANGE( rmesa, msk ); diff --git a/src/mesa/gl.pc.in b/src/mesa/gl.pc.in index 181724b97bf..680f7427768 100644 --- a/src/mesa/gl.pc.in +++ b/src/mesa/gl.pc.in @@ -7,7 +7,7 @@ Name: gl Description: Mesa OpenGL library Requires.private: @GL_PC_REQ_PRIV@ Version: @PACKAGE_VERSION@ -Libs: -L${libdir} -l@GL_LIB@ +Libs: -L${libdir} -l@GL_PKGCONF_LIB@ Libs.private: @GL_PC_LIB_PRIV@ Cflags: -I${includedir} @GL_PC_CFLAGS@ glx_tls: @GLX_TLS@ diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index 25c3161f7d0..4d95a072793 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -335,6 +335,30 @@ _mesa_extension_supported(const struct gl_context *ctx, extension_index i) return (ctx->Version >= ext->version[ctx->API]) && base[ext->offset]; } +/** + * Compare two entries of the extensions table. Sorts first by year, + * then by name. + * + * Arguments are indices into _mesa_extension_table. + */ +static int +extension_compare(const void *p1, const void *p2) +{ + extension_index i1 = * (const extension_index *) p1; + extension_index i2 = * (const extension_index *) p2; + const struct mesa_extension *e1 = &_mesa_extension_table[i1]; + const struct mesa_extension *e2 = &_mesa_extension_table[i2]; + int res; + + res = (int)e1->year - (int)e2->year; + + if (res == 0) { + res = strcmp(e1->name, e2->name); + } + + return res; +} + /** * Construct the GL_EXTENSIONS string. Called the first time that @@ -372,8 +396,8 @@ _mesa_make_extension_string(struct gl_context *ctx) if (i->year <= maxYear && _mesa_extension_supported(ctx, k)) { - length += strlen(i->name) + 1; /* +1 for space */ - extension_indices[count++] = k; + length += strlen(i->name) + 1; /* +1 for space */ + ++count; } } for (k = 0; k < MAX_UNRECOGNIZED_EXTENSIONS; k++) @@ -385,6 +409,24 @@ _mesa_make_extension_string(struct gl_context *ctx) return NULL; } + /* Sort extensions in chronological order because idTech 2/3 games + * (e.g., Quake3 demo) store the extension list in a fixed size buffer. + * Some cases truncate, while others overflow the buffer. Resulting in + * misrendering and crashes, respectively. + * Address the former here, while the latter will be addressed by setting + * the MESA_EXTENSION_MAX_YEAR environment variable. + */ + j = 0; + for (k = 0; k < MESA_EXTENSION_COUNT; ++k) { + if (_mesa_extension_table[k].year <= maxYear && + _mesa_extension_supported(ctx, k)) { + extension_indices[j++] = k; + } + } + assert(j == count); + qsort(extension_indices, count, + sizeof *extension_indices, extension_compare); + /* Build the extension string.*/ for (j = 0; j < count; ++j) { const struct mesa_extension *i = &_mesa_extension_table[extension_indices[j]]; diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index cfe2174ef12..a9400d5d5ad 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1434,6 +1434,7 @@ framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb, if (!ctx->Extensions.MESA_framebuffer_flip_y) goto invalid_pname_enum; cannot_be_winsys_fbo = true; + break; default: goto invalid_pname_enum; } diff --git a/src/mesa/main/multisample.c b/src/mesa/main/multisample.c index f93a18832da..d30e5c92442 100644 --- a/src/mesa/main/multisample.c +++ b/src/mesa/main/multisample.c @@ -94,8 +94,8 @@ _mesa_GetMultisamplefv(GLenum pname, GLuint index, GLfloat * val) ctx->Driver.GetSamplePosition(ctx, ctx->DrawBuffer, index, val); - /* winsys FBOs are upside down */ - if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) + /* FBOs can be upside down (winsys always are)*/ + if (ctx->DrawBuffer->FlipY) val[1] = 1.0f - val[1]; return; diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index e8c28d86162..a2c0d2c3eca 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -922,6 +922,8 @@ read_pixels_es3_error_check(struct gl_context *ctx, GLenum format, GLenum type, case GL_RGBA: if (type == GL_FLOAT && data_type == GL_FLOAT) return GL_NO_ERROR; /* EXT_color_buffer_float */ + if (type == GL_HALF_FLOAT && data_type == GL_FLOAT) + return GL_NO_ERROR; if (type == GL_UNSIGNED_BYTE && data_type == GL_UNSIGNED_NORMALIZED) return GL_NO_ERROR; if (internalFormat == GL_RGB10_A2 && diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index b5d86d64d5b..a3ec7241986 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -1426,6 +1426,11 @@ get_tex_level_parameter_image(struct gl_context *ctx, _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE)); } + if (*params == 0 && pname == GL_TEXTURE_INTENSITY_SIZE) { + /* Gallium may store intensity as LA */ + *params = _mesa_get_format_bits(texFormat, + GL_TEXTURE_ALPHA_SIZE); + } } else { *params = 0; diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk index c6470e6289e..13d0da85882 100644 --- a/src/mesa/program/Android.mk +++ b/src/mesa/program/Android.mk @@ -41,7 +41,7 @@ endef include $(MESA_TOP)/src/mesa/Makefile.sources include $(CLEAR_VARS) - +LOCAL_CFLAGS += -Wno-error LOCAL_MODULE := libmesa_program LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_STATIC_LIBRARIES := libmesa_nir \ diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index 4d7f388cfb0..3bbe451399f 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -571,7 +571,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index16 state[], case STATE_FB_WPOS_Y_TRANSFORM: /* A driver may negate this conditional by using ZW swizzle * instead of XY (based on e.g. some other state). */ - if (_mesa_is_user_fbo(ctx->DrawBuffer)) { + if (!ctx->DrawBuffer->FlipY) { /* Identity (XY) followed by flipping Y upside down (ZW). */ value[0] = 1.0F; value[1] = 0.0F; diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 6455e612e4e..fa147b89688 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -92,7 +92,7 @@ st_upload_constants(struct st_context *st, struct gl_program *prog) /* update constants */ if (params && params->NumParameters) { struct pipe_constant_buffer cb; - const uint paramBytes = params->NumParameters * sizeof(GLfloat) * 4; + const uint paramBytes = params->NumParameterValues * sizeof(GLfloat); /* Update the constants which come from fixed-function state, such as * transformation matrices, fog factors, etc. The rest of the values in diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 289856cd72d..27e4da31581 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -163,6 +163,9 @@ st_convert_sampler(const struct st_context *st, const GLboolean is_integer = texobj->_IsIntegerFormat; GLenum texBaseFormat = _mesa_base_tex_image(texobj)->_BaseFormat; + if (texobj->StencilSampling) + texBaseFormat = GL_STENCIL_INDEX; + if (st->apply_texture_swizzle_to_border_color) { const struct st_texture_object *stobj = st_texture_object_const(texobj); /* XXX: clean that up to not use the sampler view at all */ diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 5a331f841de..a05e264476a 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -566,7 +566,11 @@ make_texture(struct st_context *st, dest = pipe_transfer_map(pipe, pt, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, width, height, &transfer); - + if (!dest) { + pipe_resource_reference(&pt, NULL); + _mesa_unmap_pbo_source(ctx, unpack); + return NULL; + } /* Put image into texture transfer. * Note that the image is actually going to be upside down in @@ -1173,6 +1177,13 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, return; } + /* Put glDrawPixels image into a texture */ + pt = make_texture(st, width, height, format, type, unpack, pixels); + if (!pt) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels"); + return; + } + /* * Get vertex/fragment shaders */ @@ -1199,13 +1210,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, st_upload_constants(st, &st->fp->Base); } - /* Put glDrawPixels image into a texture */ - pt = make_texture(st, width, height, format, type, unpack, pixels); - if (!pt) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels"); - return; - } - /* create sampler view for the image */ sv[0] = st_create_texture_sampler_view(st->pipe, pt); if (!sv[0]) { diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index dbaf7f6f8fe..c6d9731bb82 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -1229,7 +1229,7 @@ void st_init_extensions(struct pipe_screen *screen, screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_TEXTURE_2D, 0, 0, PIPE_BIND_SAMPLER_VIEW) && - screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_SRGB, + screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_SRGB, PIPE_TEXTURE_2D, 0, 0, PIPE_BIND_SAMPLER_VIEW) && screen->is_format_supported(screen, PIPE_FORMAT_R16_UNORM, diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index ae2c49960c9..83620fb3f83 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -388,7 +388,7 @@ st_glsl_to_nir(struct st_context *st, struct gl_program *prog, ~prev_stages & shader_program->data->linked_stages; nir->info.next_stage = stages_mask ? - (gl_shader_stage) ffs(stages_mask) : MESA_SHADER_FRAGMENT; + (gl_shader_stage) u_bit_scan(&stages_mask) : MESA_SHADER_FRAGMENT; } else { nir->info.next_stage = MESA_SHADER_FRAGMENT; } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index aec53309172..44a08901b81 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1227,6 +1227,10 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) st_src_reg a, b, c; st_dst_reg result_dst; + // there is no TGSI opcode for this + if (ir->type->is_integer_64()) + return false; + ir_expression *expr = ir->operands[mul_operand]->as_expression(); if (!expr || expr->operation != ir_binop_mul) return false; @@ -6072,6 +6076,34 @@ compile_tgsi_instruction(struct st_translate *t, } } +/* Invert SamplePos.y when rendering to the default framebuffer. */ +static void +emit_samplepos_adjustment(struct st_translate *t, int wpos_y_transform) +{ + struct ureg_program *ureg = t->ureg; + + assert(wpos_y_transform >= 0); + struct ureg_src trans_const = ureg_DECL_constant(ureg, wpos_y_transform); + struct ureg_src samplepos_sysval = t->systemValues[SYSTEM_VALUE_SAMPLE_POS]; + struct ureg_dst samplepos_flipped = ureg_DECL_temporary(ureg); + struct ureg_dst is_fbo = ureg_DECL_temporary(ureg); + + ureg_ADD(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y), + ureg_imm1f(ureg, 1), ureg_negate(samplepos_sysval)); + + /* If trans.x == 1, use samplepos.y, else use 1 - samplepos.y. */ + ureg_FSEQ(ureg, ureg_writemask(is_fbo, TGSI_WRITEMASK_Y), + ureg_scalar(trans_const, TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1)); + ureg_UCMP(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y), + ureg_src(is_fbo), samplepos_sysval, ureg_src(samplepos_flipped)); + ureg_MOV(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_X), + samplepos_sysval); + + /* Use the result in place of the system value. */ + t->systemValues[SYSTEM_VALUE_SAMPLE_POS] = ureg_src(samplepos_flipped); +} + + /** * Emit the TGSI instructions for inverting and adjusting WPOS. * This code is unavoidable because it also depends on whether @@ -6639,6 +6671,10 @@ st_translate_program( emit_wpos(st_context(ctx), t, proginfo, ureg, program->wpos_transform_const); + if (procType == PIPE_SHADER_FRAGMENT && + semName == TGSI_SEMANTIC_SAMPLEPOS) + emit_samplepos_adjustment(t, program->wpos_transform_const); + sysInputs &= ~(1ull << i); } } @@ -6940,7 +6976,8 @@ get_mesa_program_tgsi(struct gl_context *ctx, /* This must be done before the uniform storage is associated. */ if (shader->Stage == MESA_SHADER_FRAGMENT && (prog->info.inputs_read & VARYING_BIT_POS || - prog->info.system_values_read & (1ull << SYSTEM_VALUE_FRAG_COORD))) { + prog->info.system_values_read & (1ull << SYSTEM_VALUE_FRAG_COORD) || + prog->info.system_values_read & (1ull << SYSTEM_VALUE_SAMPLE_POS))) { static const gl_state_index16 wposTransformState[STATE_LENGTH] = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 69286b57916..9ed316b0f78 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -834,6 +834,7 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, struct st_context *shared_ctx = (struct st_context *) shared_stctxi; struct st_context *st; struct pipe_context *pipe; + struct gl_config* mode_ptr; struct gl_config mode; gl_api api; bool no_error = false; @@ -893,7 +894,13 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi, } st_visual_to_context_mode(&attribs->visual, &mode); - st = st_create_context(api, pipe, &mode, shared_ctx, + + if (attribs->visual.no_config) + mode_ptr = NULL; + else + mode_ptr = &mode; + + st = st_create_context(api, pipe, mode_ptr, shared_ctx, &attribs->options, no_error); if (!st) { *error = ST_CONTEXT_ERROR_NO_MEMORY; diff --git a/src/mesa/tnl/t_split_copy.c b/src/mesa/tnl/t_split_copy.c index cbb7eb409f9..085ae9a28c9 100644 --- a/src/mesa/tnl/t_split_copy.c +++ b/src/mesa/tnl/t_split_copy.c @@ -531,7 +531,7 @@ replay_init(struct copy_context *copy) for (offset = 0, i = 0; i < copy->nr_varying; i++) { const struct tnl_vertex_array *src = copy->varying[i].array; const struct gl_array_attributes *srcattr = src->VertexAttrib; - struct tnl_vertex_array *dst = ©->dstarray[i]; + struct tnl_vertex_array *dst = ©->dstarray[copy->varying[i].attr]; struct gl_vertex_buffer_binding *dstbind = ©->varying[i].dstbinding; struct gl_array_attributes *dstattr = ©->varying[i].dstattribs; diff --git a/src/util/Android.mk b/src/util/Android.mk index 9b6144268e6..7525ea1f5d9 100644 --- a/src/util/Android.mk +++ b/src/util/Android.mk @@ -41,8 +41,14 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary +# If Android version >=8 MESA should static link libexpat else should dynamic link +ifeq ($(shell test $(PLATFORM_SDK_VERSION) -ge 27; echo $$?), 0) +LOCAL_STATIC_LIBRARIES := \ + libexpat +else LOCAL_SHARED_LIBRARIES := \ libexpat +endif LOCAL_MODULE := libmesa_util diff --git a/src/util/Makefile.am b/src/util/Makefile.am index bafb57439ab..1e143083374 100644 --- a/src/util/Makefile.am +++ b/src/util/Makefile.am @@ -59,7 +59,8 @@ libmesautil_la_LIBADD = \ $(PTHREAD_LIBS) \ $(CLOCK_LIB) \ $(ZLIB_LIBS) \ - $(LIBATOMIC_LIBS) + $(LIBATOMIC_LIBS) \ + -lm libxmlconfig_la_SOURCES = $(XMLCONFIG_FILES) libxmlconfig_la_CFLAGS = \ diff --git a/src/util/bitscan.h b/src/util/bitscan.h index dc89ac93f28..cdfecafaf01 100644 --- a/src/util/bitscan.h +++ b/src/util/bitscan.h @@ -112,6 +112,31 @@ u_bit_scan64(uint64_t *mask) return i; } +/* Count bits set in mask */ +static inline int +u_count_bits(unsigned *mask) +{ + unsigned v = *mask; + int c; + v = v - ((v >> 1) & 0x55555555); + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + v = (v + (v >> 4)) & 0xF0F0F0F; + c = (int)((v * 0x1010101) >> 24); + return c; +} + +static inline int +u_count_bits64(uint64_t *mask) +{ + uint64_t v = *mask; + int c; + v = v - ((v >> 1) & 0x5555555555555555ull); + v = (v & 0x3333333333333333ull) + ((v >> 2) & 0x3333333333333333ull); + v = (v + (v >> 4)) & 0xF0F0F0F0F0F0F0Full; + c = (int)((v * 0x101010101010101ull) >> 56); + return c; +} + /* Determine if an unsigned value is a power of two. * * \note diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c index 87ddfb86b27..368ec417927 100644 --- a/src/util/disk_cache.c +++ b/src/util/disk_cache.c @@ -189,7 +189,7 @@ do { \ } while (0); struct disk_cache * -disk_cache_create(const char *gpu_name, const char *timestamp, +disk_cache_create(const char *gpu_name, const char *driver_id, uint64_t driver_flags) { void *local; @@ -387,9 +387,9 @@ disk_cache_create(const char *gpu_name, const char *timestamp, cache->driver_keys_blob_size = cv_size; /* Create driver id keys */ - size_t ts_size = strlen(timestamp) + 1; + size_t id_size = strlen(driver_id) + 1; size_t gpu_name_size = strlen(gpu_name) + 1; - cache->driver_keys_blob_size += ts_size; + cache->driver_keys_blob_size += id_size; cache->driver_keys_blob_size += gpu_name_size; /* We sometimes store entire structs that contains a pointers in the cache, @@ -409,7 +409,7 @@ disk_cache_create(const char *gpu_name, const char *timestamp, uint8_t *drv_key_blob = cache->driver_keys_blob; DRV_KEY_CPY(drv_key_blob, &cache_version, cv_size) - DRV_KEY_CPY(drv_key_blob, timestamp, ts_size) + DRV_KEY_CPY(drv_key_blob, driver_id, id_size) DRV_KEY_CPY(drv_key_blob, gpu_name, gpu_name_size) DRV_KEY_CPY(drv_key_blob, &ptr_size, ptr_size_size) DRV_KEY_CPY(drv_key_blob, &driver_flags, driver_flags_size) diff --git a/src/util/disk_cache.h b/src/util/disk_cache.h index 50bd9f41ac4..2a147cba615 100644 --- a/src/util/disk_cache.h +++ b/src/util/disk_cache.h @@ -26,11 +26,14 @@ #ifdef HAVE_DLFCN_H #include +#include +#include "util/build_id.h" #endif #include #include #include #include +#include "util/mesa-sha1.h" #ifdef __cplusplus extern "C" { @@ -100,7 +103,33 @@ disk_cache_get_function_timestamp(void *ptr, uint32_t* timestamp) if (stat(info.dli_fname, &st)) { return false; } + + if (!st.st_mtime) { + fprintf(stderr, "Mesa: The provided filesystem timestamp for the cache " + "is bogus! Disabling On-disk cache.\n"); + return false; + } + *timestamp = st.st_mtime; + + return true; +} + +static inline bool +disk_cache_get_function_identifier(void *ptr, struct mesa_sha1 *ctx) +{ + uint32_t timestamp; + +#ifdef HAVE_DL_ITERATE_PHDR + const struct build_id_note *note = NULL; + if ((note = build_id_find_nhdr_for_addr(ptr))) { + _mesa_sha1_update(ctx, build_id_data(note), build_id_length(note)); + } else +#endif + if (disk_cache_get_function_timestamp(ptr, ×tamp)) { + _mesa_sha1_update(ctx, ×tamp, sizeof(timestamp)); + } else + return false; return true; } #endif diff --git a/src/util/drirc b/src/util/drirc index 8ece875e34f..0cd04bcadd4 100644 --- a/src/util/drirc +++ b/src/util/drirc @@ -100,6 +100,14 @@ TODO: document the other workarounds.