diff --git a/.travis.yml b/.travis.yml index 18f114b7cc8..6214132289c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,198 +1,40 @@ language: c -dist: xenial +os: osx cache: - apt: true ccache: true env: global: - - XORG_RELEASES=https://xorg.freedesktop.org/releases/individual - - XCB_RELEASES=https://xcb.freedesktop.org/dist - - WAYLAND_RELEASES=https://wayland.freedesktop.org/releases - - XORGMACROS_VERSION=util-macros-1.19.0 - - GLPROTO_VERSION=glproto-1.4.17 - - DRI2PROTO_VERSION=dri2proto-2.8 - - LIBPCIACCESS_VERSION=libpciaccess-0.13.4 - - LIBDRM_VERSION=libdrm-2.4.97 - - XCBPROTO_VERSION=xcb-proto-1.13 - - RANDRPROTO_VERSION=randrproto-1.3.0 - - LIBXRANDR_VERSION=libXrandr-1.3.0 - - LIBXCB_VERSION=libxcb-1.13 - - LIBXSHMFENCE_VERSION=libxshmfence-1.2 - - LIBVDPAU_VERSION=libvdpau-1.1 - - LIBVA_VERSION=libva-1.7.0 - - LIBWAYLAND_VERSION=wayland-1.15.0 - - WAYLAND_PROTOCOLS_VERSION=wayland-protocols-1.8 - - PKG_CONFIG_PATH=$HOME/prefix/lib/pkgconfig:$HOME/prefix/share/pkgconfig - - LD_LIBRARY_PATH="$HOME/prefix/lib:$LD_LIBRARY_PATH" - - PATH="$HOME/prefix/bin:$PATH" - -matrix: - include: - - env: - - LABEL="macOS meson" - - BUILD=meson - - DRI_LOADERS="-Dplatforms=x11" - - GALLIUM_DRIVERS=swrast - os: osx + - PKG_CONFIG_PATH="" before_install: - - | - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then - HOMEBREW_NO_AUTO_UPDATE=1 brew install python3 ninja expat gettext - # Set PATH for homebrew pip3 installs - PATH="$HOME/Library/Python/3.6/bin:${PATH}" - # Set PKG_CONFIG_PATH for keg-only expat - PKG_CONFIG_PATH="/usr/local/opt/expat/lib/pkgconfig:${PKG_CONFIG_PATH}" - # Set PATH for keg-only gettext - PATH="/usr/local/opt/gettext/bin:${PATH}" - - # Install xquartz for prereqs ... - XQUARTZ_VERSION="2.7.11" - wget -nv https://dl.bintray.com/xquartz/downloads/XQuartz-${XQUARTZ_VERSION}.dmg - hdiutil attach XQuartz-${XQUARTZ_VERSION}.dmg - sudo installer -pkg /Volumes/XQuartz-${XQUARTZ_VERSION}/XQuartz.pkg -target / - hdiutil detach /Volumes/XQuartz-${XQUARTZ_VERSION} - # ... and set paths - PATH="/opt/X11/bin:${PATH}" - PKG_CONFIG_PATH="/opt/X11/share/pkgconfig:/opt/X11/lib/pkgconfig:${PKG_CONFIG_PATH}" - ACLOCAL="aclocal -I /opt/X11/share/aclocal -I /usr/local/share/aclocal" - fi + - HOMEBREW_NO_AUTO_UPDATE=1 brew install python3 ninja expat gettext + # Set PATH for homebrew pip3 installs + - PATH="$HOME/Library/Python/3.6/bin:${PATH}" + # Set PKG_CONFIG_PATH for keg-only expat + - PKG_CONFIG_PATH="/usr/local/opt/expat/lib/pkgconfig:${PKG_CONFIG_PATH}" + # Set PATH for keg-only gettext + - PATH="/usr/local/opt/gettext/bin:${PATH}" + + # Install xquartz for prereqs ... + - XQUARTZ_VERSION="2.7.11" + - wget -nv https://dl.bintray.com/xquartz/downloads/XQuartz-${XQUARTZ_VERSION}.dmg + - hdiutil attach XQuartz-${XQUARTZ_VERSION}.dmg + - sudo installer -pkg /Volumes/XQuartz-${XQUARTZ_VERSION}/XQuartz.pkg -target / + - hdiutil detach /Volumes/XQuartz-${XQUARTZ_VERSION} + # ... and set paths + - PKG_CONFIG_PATH="/opt/X11/share/pkgconfig:/opt/X11/lib/pkgconfig:${PKG_CONFIG_PATH}" install: - # Install a more modern meson from pip, since the version in the - # ubuntu repos is often quite old. - - if test "x$BUILD" = xmeson; then - pip3 install --user meson; - pip3 install --user mako; - fi - - # Install dependencies where we require specific versions (or where - # disallowed by Travis CI's package whitelisting). - - - | - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then - wget $XORG_RELEASES/util/$XORGMACROS_VERSION.tar.bz2 - tar -jxvf $XORGMACROS_VERSION.tar.bz2 - (cd $XORGMACROS_VERSION && ./configure --prefix=$HOME/prefix && make install) - - wget $XORG_RELEASES/proto/$GLPROTO_VERSION.tar.bz2 - tar -jxvf $GLPROTO_VERSION.tar.bz2 - (cd $GLPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install) - - wget $XORG_RELEASES/proto/$DRI2PROTO_VERSION.tar.bz2 - tar -jxvf $DRI2PROTO_VERSION.tar.bz2 - (cd $DRI2PROTO_VERSION && ./configure --prefix=$HOME/prefix && make install) - - wget $XCB_RELEASES/$XCBPROTO_VERSION.tar.bz2 - tar -jxvf $XCBPROTO_VERSION.tar.bz2 - (cd $XCBPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install) - - wget $XCB_RELEASES/$LIBXCB_VERSION.tar.bz2 - tar -jxvf $LIBXCB_VERSION.tar.bz2 - (cd $LIBXCB_VERSION && ./configure --prefix=$HOME/prefix && make install) - - wget $XORG_RELEASES/lib/$LIBPCIACCESS_VERSION.tar.bz2 - tar -jxvf $LIBPCIACCESS_VERSION.tar.bz2 - (cd $LIBPCIACCESS_VERSION && ./configure --prefix=$HOME/prefix && make install) - - wget https://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2 - tar -jxvf $LIBDRM_VERSION.tar.bz2 - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 --enable-freedreno --enable-etnaviv-experimental-api && make install) - - wget $XORG_RELEASES/proto/$RANDRPROTO_VERSION.tar.bz2 - tar -jxvf $RANDRPROTO_VERSION.tar.bz2 - (cd $RANDRPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install) - - wget $XORG_RELEASES/lib/$LIBXRANDR_VERSION.tar.bz2 - tar -jxvf $LIBXRANDR_VERSION.tar.bz2 - (cd $LIBXRANDR_VERSION && ./configure --prefix=$HOME/prefix && make install) - - wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2 - tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2 - (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install) - - wget https://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2 - tar -jxvf $LIBVDPAU_VERSION.tar.bz2 - (cd $LIBVDPAU_VERSION && ./configure --prefix=$HOME/prefix && make install) - - wget https://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2 - tar -jxvf $LIBVA_VERSION.tar.bz2 - (cd $LIBVA_VERSION && ./configure --prefix=$HOME/prefix --disable-wayland --disable-dummy-driver && make install) - - wget $WAYLAND_RELEASES/$LIBWAYLAND_VERSION.tar.xz - tar -axvf $LIBWAYLAND_VERSION.tar.xz - (cd $LIBWAYLAND_VERSION && ./configure --prefix=$HOME/prefix --enable-libraries --without-host-scanner --disable-documentation --disable-dtd-validation && make install) - - wget $WAYLAND_RELEASES/$WAYLAND_PROTOCOLS_VERSION.tar.xz - tar -axvf $WAYLAND_PROTOCOLS_VERSION.tar.xz - (cd $WAYLAND_PROTOCOLS_VERSION && ./configure --prefix=$HOME/prefix && make install) - - # Meson requires ninja >= 1.6, but xenial has 1.3.x - wget https://github.com/ninja-build/ninja/releases/download/v1.6.0/ninja-linux.zip - unzip ninja-linux.zip - mv ninja $HOME/prefix/bin/ - - # Generate this header since one is missing on the Travis instance - mkdir -p linux - printf "%s\n" \ - "#ifndef _LINUX_MEMFD_H" \ - "#define _LINUX_MEMFD_H" \ - "" \ - "#define MFD_CLOEXEC 0x0001U" \ - "#define MFD_ALLOW_SEALING 0x0002U" \ - "" \ - "#endif /* _LINUX_MEMFD_H */" > linux/memfd.h - - # Generate this header, including the missing SYS_memfd_create - # macro, which is not provided by the header in the Travis - # instance - mkdir -p sys - printf "%s\n" \ - "#ifndef _SYSCALL_H" \ - "#define _SYSCALL_H 1" \ - "" \ - "#include " \ - "" \ - "#ifndef _LIBC" \ - "# include " \ - "#endif" \ - "" \ - "#ifndef __NR_memfd_create" \ - "# define __NR_memfd_create 319 /* Taken from */" \ - "#endif" \ - "" \ - "#ifndef SYS_memfd_create" \ - "# define SYS_memfd_create __NR_memfd_create" \ - "#endif" \ - "" \ - "#endif" > sys/syscall.h - fi + - pip3 install --user meson + - pip3 install --user mako script: - if test "x$BUILD" = xmeson; then - if test -n "$LLVM_CONFIG"; then - # We need to control the version of llvm-config we're using, so we'll - # generate a native file to do so. This requires meson >=0.49 - # - echo -e "[binaries]\nllvm-config = '`which $LLVM_CONFIG`'" > native.file - - $LLVM_CONFIG --version - else - : > native.file - fi - - export CFLAGS="$CFLAGS -isystem`pwd`" - meson _build \ - --native-file=native.file \ - -Dbuild-tests=true \ - ${DRI_LOADERS} \ - -Ddri-drivers=${DRI_DRIVERS:-[]} \ - -Dgallium-drivers=${GALLIUM_DRIVERS:-[]} \ - -Dvulkan-drivers=${VULKAN_DRIVERS:-[]} - meson configure _build - ninja -C _build - ninja -C _build test - fi + - meson _build + -Dbuild-tests=true + -Dplatforms=x11 + -Dgallium-drivers=swrast + - ninja -C _build + - ninja -C _build test diff --git a/Android.common.mk b/Android.common.mk index 36d97c52dd1..ae4a9fc98dd 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -27,10 +27,13 @@ endif LOCAL_C_INCLUDES += \ $(MESA_TOP)/src \ - $(MESA_TOP)/include + $(MESA_TOP)/include \ + system/core/include/cutils \ + system/core/liblog/include MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION) LOCAL_CFLAGS += \ + -O3 \ -Wno-error \ -Werror=incompatible-pointer-types \ -Wno-unused-parameter \ @@ -78,14 +81,23 @@ LOCAL_CFLAGS += \ -fvisibility=hidden \ -fno-math-errno \ -fno-trapping-math \ - -Wno-sign-compare + -Wno-sign-compare \ + -Wno-self-assign \ + -Wno-constant-logical-operand \ + -Wno-format \ + -Wno-incompatible-pointer-types \ + -Wno-enum-conversion LOCAL_CPPFLAGS += \ -D__STDC_CONSTANT_MACROS \ -D__STDC_FORMAT_MACROS \ -D__STDC_LIMIT_MACROS \ -Wno-error=non-virtual-dtor \ - -Wno-non-virtual-dtor + -Wno-non-virtual-dtor \ + -Wno-delete-non-virtual-dtor \ + -Wno-overloaded-virtual \ + -Wno-missing-braces \ + -Wno-deprecated-register # mesa requires at least c99 compiler LOCAL_CONLYFLAGS += \ @@ -115,6 +127,9 @@ LOCAL_CFLAGS += -DHAVE_LIBDRM LOCAL_SHARED_LIBRARIES += libdrm endif +LOCAL_SHARED_LIBRARIES += libcutils \ + liblog + LOCAL_CFLAGS_32 += -DDEFAULT_DRIVER_DIR=\"/vendor/lib/$(MESA_DRI_MODULE_REL_PATH)\" LOCAL_CFLAGS_64 += -DDEFAULT_DRIVER_DIR=\"/vendor/lib64/$(MESA_DRI_MODULE_REL_PATH)\" LOCAL_PROPRIETARY_MODULE := true diff --git a/Android.mk b/Android.mk index d2b12ea4473..5fe028c6d19 100644 --- a/Android.mk +++ b/Android.mk @@ -110,6 +110,7 @@ endef # add subdirectories SUBDIRS := \ + src/freedreno \ src/gbm \ src/loader \ src/mapi \ diff --git a/Readme.md b/Readme.md new file mode 100644 index 00000000000..5df295abc3a --- /dev/null +++ b/Readme.md @@ -0,0 +1,2 @@ +Any security related issues should be reported by following the instructions here: +https://01.org/security diff --git a/VERSION b/VERSION index f5cbc1e7406..87c0f53ffeb 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -19.1.0-devel +19.1.6 diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore new file mode 100644 index 00000000000..0df54310da5 --- /dev/null +++ b/bin/.cherry-ignore @@ -0,0 +1,15 @@ +# fixes: The following commits do not apply cleanly on 19.1 branch, as they +# depend on other commits not present in the branch. +20b00e1ff24f974bc99e7ca9a720518da0ce5b89 panfrost: Make ctx->job useful +f6c44549ee2dd0f218deea1feba3965523609406 iris: Replace devinfo->gen with GEN_GEN +1cd13ccee7bc2733e7a56284dc02bdb1b1c40081 iris: Update fast clear colors on Gen9 with direct immediate writes. +# fixes: The following commit depends on commits 77a1070d366a and df4c2ec5e19b +# in order to compile, which did not land in the branch. +2d799250346331a93b21678dc5605cff74dfa3a1 iris: Avoid unnecessary resolves on transfer maps +# stable: Explicit 19.2 only nominations. +e73d863a66caac796ed5fb543a77f0b892df8573 radv: allow to enable VK_AMD_shader_ballot only on GFX8+ +f202ac27a99caf9009aa9d60e2e0d7f3b528e99f radv: add a new debug option called RADV_DEBUG=noshaderballot +a6ad9e8ccf970a0da68508eb2ce26b316045b9f0 radv: force enable VK_AMD_shader_ballot for Wolfenstein Youngblood +0813c27d8d4a7e9372a8a86d970b598fc4e3bfd1 radv/gfx10: don't initialize VGT_INSTANCE_STEP_RATE_0 +a4e6e59db82e61b47ef905f28dde80ae36a67d35 radv/gfx10: do not use NGG with NAVI14 +fe0ec41c4d36fd5a82e7579d89e34cce7423c4e5 radv: Change memory type order for GPUs without dedicated VRAM diff --git a/docs/relnotes/19.1.0.html b/docs/relnotes/19.1.0.html index c27e65ea096..7e65dd6db8d 100644 --- a/docs/relnotes/19.1.0.html +++ b/docs/relnotes/19.1.0.html @@ -14,7 +14,7 @@

The Mesa 3D Graphics Library

-

Mesa 19.1.0 Release Notes / TBD

+

Mesa 19.1.0 Release Notes / June 11, 2019

Mesa 19.1.0 is a new development release. People who are concerned @@ -32,7 +32,7 @@

Mesa 19.1.0 Release Notes / TBD

SHA256 checksums

-TBD.
+2a6c3af3a803389183168e449c536304cf03e0f82c4c9333077933543b9d02f3  mesa-19.1.0.tar.xz
 
@@ -69,15 +69,4542 @@

New features

Bug fixes

    -
  • TBD
  • + +
  • Bug 81843 - [SNB IVB HSW] ETC2 textures are not returned as compressed images
  • + +
  • Bug 99781 - Some Unity games fail assertion on startup in glXCreateContextAttribsARB
  • + +
  • Bug 100239 - Incorrect rendering in CS:GO
  • + +
  • Bug 100316 - Linking GLSL 1.30 shaders with invariant and deprecated variables triggers an 'mismatching invariant qualifiers' error
  • + +
  • Bug 104272 - [OpenGL CTS] [HSW] KHR-GL46.direct_state_access.textures_compressed_subimage assert fails
  • + +
  • Bug 104355 - Ivy Bridge ignores component mappings in texture views
  • + +
  • Bug 104602 - [apitrace] Graphical artifacts in Civilization VI on RX Vega
  • + +
  • Bug 107052 - [Regression][bisected]. Crookz - The Big Heist Demo can't be launched despite the "true" flag in "drirc"
  • + +
  • Bug 107505 - [lars] dEQP-GLES31.functional.geometry_shading.layered#render_with_default_layer_3d failure
  • + +
  • Bug 107510 - [GEN8+] up to 10% perf drop on several 3D benchmarks
  • + +
  • Bug 107563 - [RADV] Broken rendering in Unity demos
  • + +
  • Bug 107987 - [Debug mesa only]. Crash happens when calling drawArrays
  • + +
  • Bug 108250 - [GLSL] layout-location-struct.shader_test fails to link
  • + +
  • Bug 108457 - [OpenGL CTS] KHR-GL46.tessellation_shader.single.xfb_captures_data_from_correct_stage fails
  • + +
  • Bug 108540 - vkAcquireNextImageKHR blocks when timeout=0 in Wayland
  • + +
  • Bug 108766 - Mesa built with meson has RPATH entries
  • + +
  • Bug 108824 - Invalid handling when GL buffer is bound on one context and invalidated on another
  • + +
  • Bug 108841 - [RADV] SPIRV's control flow attributes do not propagate to LLVM
  • + +
  • Bug 108879 - [CIK] [regression] All opencl apps hangs indefinitely in si_create_context
  • + +
  • Bug 108999 - Calculating the scissors fields when the y is flipped (0 on top) can generate negative numbers that will cause assertion failure later on.
  • + +
  • Bug 109057 - texelFetch from GL_TEXTURE_2D_MULTISAMPLE with integer format fails
  • + +
  • Bug 109107 - gallium/st/va: change va max_profiles when using Radeon VCN Hardware
  • + +
  • Bug 109216 - 4-27% performance drop in Vulkan benchmarks
  • + +
  • Bug 109326 - mesa: Meson configuration summary should be printed
  • + +
  • Bug 109328 - [BSW BXT GLK] dEQP-VK.subgroups.arithmetic.subgroup regressions
  • + +
  • Bug 109391 - LTO Build fails
  • + +
  • Bug 109401 - [DXVK] Project Cars rendering problems
  • + +
  • Bug 109404 - [ANV] The Witcher 3 shadows flickering
  • + +
  • Bug 109443 - Build failure with MSVC when using Scons >= 3.0.2
  • + +
  • Bug 109451 - [IVB,SNB] LINE_STRIPs following a TRIANGLE_FAN fail to use primitive restart
  • + +
  • Bug 109543 - After upgrade mesa to 19.0.0~rc1 all vulkan based application stop working ["vulkan-cube" received SIGSEGV in radv_pipeline_init_blend_state at ../src/amd/vulkan/radv_pipeline.c:699]
  • + +
  • Bug 109561 - [regression, bisected] code re-factor causing games to stutter or lock-up system
  • + +
  • Bug 109573 - dEQP-VK.spirv_assembly.instruction.graphics.module.same_module
  • + +
  • Bug 109575 - Mesa-19.0.0-rc1 : Computer Crashes trying to run anything Vulkan
  • + +
  • Bug 109581 - [BISECTED] Nothing is Rendered on Sascha Willem's "subpasses" demo
  • + +
  • Bug 109594 - totem assert failure: totem: src/intel/genxml/gen9_pack.h:72: __gen_uint: La declaración `v <= max' no se cumple.
  • + +
  • Bug 109597 - wreckfest issues with transparent objects & skybox
  • + +
  • Bug 109601 - [Regression] RuneLite GPU rendering broken on 18.3.x
  • + +
  • Bug 109603 - nir_instr_as_deref: Assertion `parent && parent->type == nir_instr_type_deref' failed.
  • + +
  • Bug 109645 - build error on arm64: tegra_screen.c:33: /usr/include/xf86drm.h:41:10: fatal error: drm.h: No such file or directory
  • + +
  • Bug 109646 - New video compositor compute shader render glitches mpv
  • + +
  • Bug 109647 - /usr/include/xf86drm.h:40:10: fatal error: drm.h: No such file or directory
  • + +
  • Bug 109648 - AMD Raven hang during va-api decoding
  • + +
  • Bug 109659 - Missing OpenGL symbols in OSMesa Gallium when building with meson
  • + +
  • Bug 109698 - dri.pc contents invalid when built with meson
  • + +
  • Bug 109717 - [regression] Cull distance tests asserting
  • + +
  • Bug 109735 - [Regression] broken font with mesa_vulkan_overlay
  • + +
  • Bug 109738 - Child of Light shows only a black screen
  • + +
  • Bug 109739 - Mesa build fails when vulkan-overlay-layer option is enabled
  • + +
  • Bug 109742 - vdpau state tracker on nv92 started to hit assert after vl compute work
  • + +
  • Bug 109743 - Test fails: piglit.spec.arb_sample_shading.arb_sample_shading-builtin-gl-sample-mask-mrt-alpha
  • + +
  • Bug 109747 - Add framerate to vulkan-overlay-layer
  • + +
  • Bug 109759 - [BISECTED][REGRESSION][IVB, HSW] Font rendering problem in OpenGL
  • + +
  • Bug 109788 - vulkan-overlay-layer: Only installs 64bit version
  • + +
  • Bug 109810 - nir_opt_copy_prop_vars.c:454: error: unknown field ‘ssa’ specified in initializer
  • + +
  • Bug 109929 - tgsi_to_nir.c:2111: undefined reference to `gl_nir_lower_samplers_as_deref'
  • + +
  • Bug 109944 - [bisected] Android build test fails with: utils.c: error: use of undeclared identifier 'PACKAGE_VERSION'
  • + +
  • Bug 109945 - pan_assemble.c:51:46: error: passing argument 2 of ‘tgsi_to_nir’ from incompatible pointer type [-Werror=incompatible-pointer-types]
  • + +
  • Bug 109980 - [i915 CI][HSW] spec@arb_fragment_shader_interlock@arb_fragment_shader_interlock-image-load-store - fail
  • + +
  • Bug 109984 - unhandled VkStructureType VK_STRUCTURE_TYPE_RENDER_PASS_INPUT_ATTACHMENT_ASPECT_CREATE_INFO
  • + +
  • Bug 110134 - SIGSEGV while playing large hevc video in mpv
  • + +
  • Bug 110143 - Doom 3: BFG Edition - Steam and GOG.com - white flickering screen
  • + +
  • Bug 110201 - [ivb] mesa 19.0.0 breaks rendering in kitty
  • + +
  • Bug 110211 - If DESTDIR is set to an empty string, the dri drivers are not installed
  • + +
  • Bug 110216 - radv: Segfault when compiling compute shaders from Assassin's Creed Odyssey (regression, bisected)
  • + +
  • Bug 110221 - build error with meson
  • + +
  • Bug 110239 - Mesa SIGABRT: src/intel/genxml/gen9_pack.h:72: __gen_uint: Assertion `v <= max' failed
  • + +
  • Bug 110257 - Major artifacts in mpeg2 vaapi hw decoding
  • + +
  • Bug 110259 - radv: Sampling depth-stencil image in GENERAL layout returns nothing but zero (regression, bisected)
  • + +
  • Bug 110291 - Vega 64 GPU hang running Space Engineers
  • + +
  • Bug 110302 - [bisected][regression] piglit egl-create-pbuffer-surface and egl-gl-colorspace regressions
  • + +
  • Bug 110305 - Iris driver fails ext_packed_depth_stencil-getteximage test
  • + +
  • Bug 110311 - [IVB HSW SNB][regression][bisected] regressions on vec4 deqp/gl{es}cts tests
  • + +
  • Bug 110349 - radv: Dragon Quest XI (DXVK) has a graphical glitch (regression, bisected)
  • + +
  • Bug 110353 - weird colors seen in valley
  • + +
  • Bug 110355 - radeonsi: GTK elements become invisible in some applications (GIMP, LibreOffice)
  • + +
  • Bug 110356 - install_megadrivers.py creates new dangling symlink [bisected]
  • + +
  • Bug 110404 - Iris fails piglit.spec.ext_transform_feedback.immediate-reuse test
  • + +
  • Bug 110422 - AMD_DEBUG=forcedma will crash OpenGL aps with SIGFAULT on VegaM 8706G
  • + +
  • Bug 110441 - [llvmpipe] complex-loop-analysis-bug regression
  • + +
  • Bug 110443 - vaapi/vpp: wrong output for non 64-bytes align width (ex: 1200)
  • + +
  • Bug 110454 - [llvmpipe] piglit arb_color_buffer_float-render GL_RGBA8_SNORM failure with llvm-9
  • + +
  • Bug 110462 - Epic Games Launcher renders nothing with "-opengl" option
  • + +
  • Bug 110474 - [bisected][regression] vk cts fp16 arithmetic failures
  • + +
  • Bug 110497 - [DXVK][Regression][Bisected][SKL] Project Cars 2 crashes with Bug Splat when loading finishes
  • + +
  • Bug 110526 - [CTS] dEQP-VK.ycbcr.{conversion,format}.* fail
  • + +
  • Bug 110530 - [CTS] dEQP-VK.ycbcr.format.g8_b8_r8_3plane_420* reports VM faults on Vega10
  • + +
  • Bug 110535 - [bisected] [icl] GPU hangs on crucible func.miptree.r8g8b8a8-unorm.aspect-color.view-2d.levels01.array01.extent-512x512.upload-copy-with-draw tests
  • + +
  • Bug 110540 - [AMD TAHITI XT] valve artifact broken
  • + +
  • Bug 110573 - Mesa vulkan-radeon 19.0.3 system freeze and visual artifacts (RADV)
  • + +
  • Bug 110590 - [Regression][Bisected] GTAⅣ under wine fails with GLXBadFBConfig
  • + +
  • Bug 110632 - "glx: Fix synthetic error generation in __glXSendError" broke wine games on 32-bit
  • + +
  • Bug 110648 - Dota2 will not open using vulkan since 19.0 series
  • + +
  • Bug 110655 - VK_LAYER_MESA_OVERLAY_CONFIG=draw,fps renders sporadically
  • + +
  • Bug 110698 - tu_device.c:900:4: error: initializer element is not constant
  • + +
  • Bug 110701 - GPU faults in in Unigine Valley 1.0
  • + +
  • Bug 110721 - graphics corruption on steam client with mesa 19.1.0 rc3 on polaris
  • + +
  • Bug 110761 - Huge problems between Mesa and Electron engine apps
  • + +
  • Bug 110784 - [regression][bisected] Reverting 'expose 0 shader binary formats for compat profiles for Qt' causes get_program_binary failures on Iris
  • +
+

Changes

+

Adam Jackson (1):

+
    +
  • drisw: Try harder to probe whether MIT-SHM works
  • +
+ +

Albert Pal (1):

+
    +
  • Fix link release notes for 19.0.0.
  • +
+ +

Alejandro Piñeiro (12):

+
    +
  • blorp: introduce helper method blorp_nir_init_shader
  • +
  • nir, glsl: move pixel_center_integer/origin_upper_left to shader_info.fs
  • +
  • nir/xfb: add component_offset at nir_xfb_info
  • +
  • nir_types: add glsl_varying_count helper
  • +
  • nir/xfb: adding varyings on nir_xfb_info and gather_info
  • +
  • nir/xfb: sort varyings too
  • +
  • nir_types: add glsl_type_is_struct helper
  • +
  • nir/xfb: handle arrays and AoA of basic types
  • +
  • nir/linker: use nir_gather_xfb_info
  • +
  • nir/linker: fix ARRAY_SIZE query with xfb varyings
  • +
  • nir/xfb: move varyings info out of nir_xfb_info
  • +
  • docs: document MESA_GLSL=errors keyword
  • +
+ +

Alexander von Gluck IV (1):

+
    +
  • haiku: Fix hgl dispatch build. Tested under meson/scons.
  • +
+ +

Alexandros Frantzis (1):

+
    +
  • virgl: Fake MSAA when max samples is 1
  • +
+ +

Alok Hota (32):

+
    +
  • swr/rast: update SWR rasterizer shader stats
  • +
  • gallium/swr: Param defaults for unhandled PIPE_CAPs
  • +
  • gallium/aux: add PIPE_CAP_MAX_VARYINGS to u_screen
  • +
  • swr/rast: Convert system memory pointers to gfxptr_t
  • +
  • swr/rast: Disable use of __forceinline by default
  • +
  • swr/rast: Correctly align 64-byte spills/fills
  • +
  • swr/rast: Flip BitScanReverse index calculation
  • +
  • swr/rast: Move knob defaults to generated cpp file
  • +
  • swr/rast: FP consistency between POSH/RENDER pipes
  • +
  • swr/rast: Refactor scratch space variable names
  • +
  • swr/rast: convert DWORD->uint32_t, QWORD->uint64_t
  • +
  • swr/rast: simdlib cleanup, clipper stack space fixes
  • +
  • swr/rast: Add translation support to streamout
  • +
  • swr/rast: bypass size limit for non-sampled textures
  • +
  • swr/rast: Cleanup and generalize gen_archrast
  • +
  • swr/rast: Add initial SWTag proto definitions
  • +
  • swr/rast: Add string handling to AR event framework
  • +
  • swr/rast: Add general SWTag statistics
  • +
  • swr/rast: Fix autotools and scons codegen
  • +
  • swr/rast: Remove deprecated 4x2 backend code
  • +
  • swr/rast: AVX512 support compiled in by default
  • +
  • swr/rast: enforce use of tile offsets
  • +
  • swr/rast: add more llvm intrinsics
  • +
  • swr/rast: update guardband rects at draw setup
  • +
  • swr/rast: add SWR_STATIC_ASSERT() macro
  • +
  • swr/rast: add flat shading
  • +
  • swr/rast: add guards for cpuid on Linux
  • +
  • swr/rast: early exit on empty triangle mask
  • +
  • swr/rast: Cleanup and generalize gen_archrast
  • +
  • swr/rast: Add initial SWTag proto definitions
  • +
  • swr/rast: Add string handling to AR event framework
  • +
  • swr/rast: Add general SWTag statistics
  • +
+ +

Alyssa Rosenzweig (192):

+
    +
  • panfrost: Initial stub for Panfrost driver
  • +
  • panfrost: Implement Midgard shader toolchain
  • +
  • meson: Remove panfrost from default driver list
  • +
  • kmsro: Move DRM entrypoints to shared block
  • +
  • panfrost: Use u_pipe_screen_get_param_defaults
  • +
  • panfrost: Check in sources for command stream
  • +
  • panfrost: Include glue for out-of-tree legacy code
  • +
  • kmsro: Silence warning if missing
  • +
  • panfrost: Clean-up one-argument passing quirk
  • +
  • panfrost: Don't hardcode number of nir_ssa_defs
  • +
  • panfrost: Add kernel-agnostic resource management
  • +
  • panfrost: Remove if 0'd dead code
  • +
  • panfrost: Remove speculative if 0'd format bit code
  • +
  • panfrost: Elucidate texture op scheduling comment
  • +
  • panfrost: Specify supported draw modes per-context
  • +
  • panfrost: Fix build; depend on libdrm
  • +
  • panfrost: Backport driver to Mali T600/T700
  • +
  • panfrost: Identify MALI_OCCLUSION_PRECISE bit
  • +
  • panfrost: Implement PIPE_QUERY_OCCLUSION_COUNTER
  • +
  • panfrost: Don't align framebuffer dims
  • +
  • panfrost: Improve logging and patch memory leaks
  • +
  • panfrost: Fix various leaks unmapping resources
  • +
  • panfrost: Free imported BOs
  • +
  • panfrost: Swap order of tiled texture (de)alloc
  • +
  • panfrost: Cleanup mali_viewport (clipping) code
  • +
  • panfrost: Preserve w sign in perspective division
  • +
  • panfrost: Fix clipping region
  • +
  • panfrost: Stub out separate stencil functions
  • +
  • panfrost: Add pandecode (command stream debugger)
  • +
  • panfrost: Implement pantrace (command stream dump)
  • +
  • panfrost/midgard: Refactor tag lookahead code
  • +
  • panfrost/midgard: Fix nested/chained if-else
  • +
  • panfrost: Rectify doubleplusungood extended branch
  • +
  • panfrost/midgard: Emit extended branches
  • +
  • panfrost: Dynamically set discard branch targets
  • +
  • panfrost: Verify and print brx condition in disasm
  • +
  • panfrost: Use tiler fast path (performance boost)
  • +
  • panfrost/meson: Remove subdir for nondrm
  • +
  • panfrost/nondrm: Flag CPU-invisible regions
  • +
  • panfrost/nondrm: Make COHERENT_LOCAL explicit
  • +
  • panfrost/nondrm: Split out dump_counters
  • +
  • panfrost/midgard: Add fround(_even), ftrunc, ffma
  • +
  • panfrost: Decode render target swizzle/channels
  • +
  • panfrost: Add RGB565, RGB5A1 texture formats
  • +
  • panfrost: Identify 4-bit channel texture formats
  • +
  • panfrost: Expose perf counters in environment
  • +
  • panfrost/midgard: Allow flt to run on most units
  • +
  • panfrost: Import job data structures from v3d
  • +
  • panfrost: Decouple Gallium clear from FBD clear
  • +
  • panfrost: Cleanup cruft related to clears
  • +
  • panfrost/midgard: Don't force constant on VLUT
  • +
  • panfrost: Flush with offscreen rendering
  • +
  • panfrost/midgard: Promote smul to vmul
  • +
  • panfrost/midgard: Preview for data hazards
  • +
  • panfrost: List primitive restart enable bit
  • +
  • panfrost/drm: Cast pointer to u64 to fix warning
  • +
  • panfrost: Cleanup needless if in create_bo
  • +
  • panfrost: Combine has_afbc/tiled in layout enum
  • +
  • panfrost: Delay color buffer setup
  • +
  • panfrost: Determine framebuffer format bits late
  • +
  • panfrost: Allocate dedicated slab for linear BOs
  • +
  • panfrost: Support linear depth textures
  • +
  • panfrost: Document "depth-buffer writeback" bit
  • +
  • panfrost: Identify fragment_extra flags
  • +
  • util: Add a drm_find_modifier helper
  • +
  • v3d: Use shared drm_find_modifier util
  • +
  • vc4: Use shared drm_find_modifier util
  • +
  • freedreno: Use shared drm_find_modifier util
  • +
  • panfrost: Break out fragment to SFBD/MFBD files
  • +
  • panfrost: Remove staging SFBD for pan_context
  • +
  • panfrost: Remove staging MFBD
  • +
  • panfrost: Minor comment cleanup (version detection)
  • +
  • panfrost/mfbd: Implement linear depth buffers
  • +
  • panfrost/mfbd: Respect per-job depth write flag
  • +
  • panfrost: Comment spelling fix
  • +
  • panfrost: Allocate extra data for depth buffer
  • +
  • panfrost; Disable AFBC for depth buffers
  • +
  • panfrost: Compute viewport state on the fly
  • +
  • panfrost/midgard: Implement fpow
  • +
  • panfrost: Workaround buffer overrun with mip level
  • +
  • panfrost: Fix primconvert check
  • +
  • panfrost: Disable PIPE_CAP_TGSI_TEXCOORD
  • +
  • panfrost/decode: Respect primitive size pointers
  • +
  • panfrost: Replay more varying buffers
  • +
  • panfrost: Rewrite varying assembly
  • +
  • panfrost/midgard: Fix b2f32 swizzle for vectors
  • +
  • panfrost: Fix viewports
  • +
  • panfrost: Implement scissor test
  • +
  • panfrost/midgard: Add fcsel_i opcode
  • +
  • panfrost/midgard: Schedule ball/bany to vectors
  • +
  • panfrost/midgard: Add more ball/bany, iabs ops
  • +
  • panfrost/midgard: Map more bany/ball opcodes
  • +
  • panfrost/midgard: Lower bool_to_int32
  • +
  • panfrost/midgard: Lower f2b32 to fne
  • +
  • panfrost/midgard: Lower i2b32
  • +
  • panfrost/midgard: Implement b2i; improve b2f/f2b
  • +
  • panfrost/midgard: Lower source modifiers for ints
  • +
  • panfrost/midgard: Cleanup midgard_nir_algebraic.py
  • +
  • panfrost: Stub out ES3 caps/callbacks
  • +
  • panfrost/midgard: Add ult/ule ops
  • +
  • panfrost/midgard: Expand fge lowering to more types
  • +
  • panfrost/midgard: Handle i2b constant
  • +
  • panfrost/midgard: fpow is a two-part operation
  • +
  • panfrost: Preliminary work for mipmaps
  • +
  • panfrost: Fix vertex buffer corruption
  • +
  • panfrost/midgard: Disassemble `cube` texture op
  • +
  • panfrost/midgard: Add L/S op for writing cubemap coordinates
  • +
  • panfrost: Preliminary work for cubemaps
  • +
  • panfrost/decode: Decode all cubemap faces
  • +
  • panfrost: Include all cubemap faces in bitmap list
  • +
  • panfrost/midgard: Emit cubemap coordinates
  • +
  • panfrost: Implement command stream for linear cubemaps
  • +
  • panfrost: Extend tiling for cubemaps
  • +
  • panfrost: Implement missing texture formats
  • +
  • panfrost/decode: Print negative_start
  • +
  • panfrost: Clean index state between indexed draws
  • +
  • panfrost: Fix index calculation types and asserts
  • +
  • panfrost: Implement FIXED formats
  • +
  • panfrost: Remove support for legacy kernels
  • +
  • nir: Add "viewport vector" system values
  • +
  • panfrost: Implement system values
  • +
  • panfrost: Cleanup some indirection in pan_resource
  • +
  • panfrost: Respect box->width in tiled stores
  • +
  • panfrost: Size tiled temp buffers correctly
  • +
  • panfrost/decode: Add flags for tilebuffer readback
  • +
  • panfrost: Add tilebuffer load? branch
  • +
  • panfrost/midgard: Add umin/umax opcodes
  • +
  • panfrost/midgard: Add ilzcnt op
  • +
  • panfrost/midgard: Add ibitcount8 op
  • +
  • panfrost/midgard: Enable lower_find_lsb
  • +
  • panfrost: Remove "mali_unknown6" nonsense
  • +
  • panfrost/midgard: Drop dependence on mesa/st
  • +
  • panfrost: Cleanup indexed draw handling
  • +
  • nir: Add nir_lower_viewport_transform
  • +
  • panfrost/midgard: Use shared nir_lower_viewport_transform
  • +
  • panfrost: Track BO lifetime with jobs and reference counts
  • +
  • panfrost: Fixup vertex offsets to prevent shadow copy
  • +
  • panfrost/mdg: Use shared fsign lowering
  • +
  • panfrost/mdg/disasm: Print raw varying_parameters
  • +
  • panfrost/midgard: Pipe through varying arrays
  • +
  • panfrost/midgard: Implement indirect loads of varyings/UBOs
  • +
  • panfrost/midgard: Respect component of bcsel condition
  • +
  • panfrost/midgard: Remove useless MIR dump
  • +
  • panfrost: Respect backwards branches in RA
  • +
  • panfrost/midgard: Don't try to inline constants on branches
  • +
  • panfrost/midgard: imul can only run on *mul
  • +
  • panfrost: Disable indirect outputs for now
  • +
  • panfrost: Use actual imov instruction
  • +
  • panfrost/midgard: Dead code eliminate MIR
  • +
  • panfrost/midgard: Track loop depth
  • +
  • panfrost/midgard: Fix off-by-one in successor analysis
  • +
  • panfrost/midgard: Remove unused mir_next_block
  • +
  • panfrost/midgard: Update integer op list
  • +
  • panfrost/midgard: Document sign-extension/zero-extension bits (vector)
  • +
  • panfrost/midgard: Set integer mods
  • +
  • panfrost/midgard: Implement copy propagation
  • +
  • panfrost/midgard: Optimize MIR in progress loop
  • +
  • panfrost/midgard: Refactor opcode tables
  • +
  • panfrost/midgard: Add "op commutes?" property
  • +
  • panfrost/midgard: Remove assembler
  • +
  • panfrost/midgard: Reduce fmax(a, 0.0) to fmov.pos
  • +
  • panfrost/midgard: Extend copy propagation pass
  • +
  • panfrost/midgard: Optimize csel involving 0
  • +
  • panfrost/midgard: Copy prop for texture registers
  • +
  • panfrost/midgard: Identify inand
  • +
  • panfrost/midgard: Add new bitwise ops
  • +
  • Revert "panfrost/midgard: Extend copy propagation pass"
  • +
  • panfrost/midgard: Only copyprop without an outmod
  • +
  • panfrost/midgard: Fix regressions in -bjellyfish
  • +
  • panfrost/midgard: Fix tex propogation
  • +
  • panfrost/midgard: imov workaround
  • +
  • panfrost: Use fp32 (not fp16) varyings
  • +
  • panfrost/midgard: Safety check immediate precision degradations
  • +
  • panfrost: Workaround -bshadow regression
  • +
  • panfrost: Remove shader dump
  • +
  • panfrost/decode: Hit MRT blend shader enable bits
  • +
  • panfrost: Fix blend shader upload
  • +
  • panfrost/midgard: reg_mode_full -> reg_mode_32, etc
  • +
  • panfrost/midgard/disasm: Catch mask errors
  • +
  • panfrost/midgard/disasm: Extend print_reg to 8-bit
  • +
  • panfrost/midgard/disasm: Fill in .int mod
  • +
  • panfrost/midgard: Fix crash on unknown op
  • +
  • panfrost/midgard: Rename ilzcnt8 -> iclz
  • +
  • panfrost/midgard/disasm: Support 8-bit destination
  • +
  • panfrost/midgard/disasm: Print 8-bit sources
  • +
  • panfrost/midgard/disasm: Stub out 64-bit
  • +
  • panfrost/midgard/disasm: Handle dest_override generalized
  • +
  • panfrost: Support RGB565 FBOs
  • +
  • panfrost/midgard: Fix integer selection
  • +
  • panfrost/midgard: Fix RA when temp_count = 0
  • +
  • panfrost/midgard: Lower mixed csel (NIR)
  • +
  • panfrost/midgard: iabs cannot run on mul
  • +
+ +

Alyssa Ross (1):

+
    +
  • get_reviewer.pl: improve portability
  • +
+ +

Amit Pundir (1):

+
    +
  • mesa: android: freedreno: build libfreedreno_{drm,ir3} static libs
  • +
+ +

Andre Heider (5):

    -
  • TBD
  • +
  • iris: fix build with gallium nine
  • +
  • iris: improve PIPE_CAP_VIDEO_MEMORY bogus value
  • +
  • iris: add support for tgsi_to_nir
  • +
  • st/nine: enable csmt per default on iris
  • +
  • st/nine: skip position checks in SetCursorPosition()
+

Andreas Baierl (2):

+
    +
  • nir: add rcp(w) lowering for gl_FragCoord
  • +
  • lima/ppir: Add gl_FragCoord handling
  • +
+ +

Andres Gomez (12):

+
    +
  • mesa: INVALID_VALUE for wrong type or format in Clear*Buffer*Data
  • +
  • gitlab-ci: install distro's ninja
  • +
  • glsl: correctly validate component layout qualifier for dvec{3,4}
  • +
  • glsl/linker: always validate explicit location among inputs
  • +
  • glsl/linker: don't fail non static used inputs without matching outputs
  • +
  • glsl/linker: simplify xfb_offset vs xfb_stride overflow check
  • +
  • Revert "glsl: relax input->output validation for SSO programs"
  • +
  • glsl/linker: location aliasing requires types to have the same width
  • +
  • docs: drop Andres Gomez from the release cycles
  • +
  • glsl/linker: always validate explicit locations for first and last interfaces
  • +
  • docs/relnotes: add support for VK_KHR_shader_float16_int8
  • +
  • glsl/linker: check for xfb_offset aliasing
  • +
+ +

Andrii Simiklit (5):

+
    +
  • i965: consider a 'base level' when calculating width0, height0, depth0
  • +
  • i965: re-emit index buffer state on a reset option change.
  • +
  • util: clean the 24-bit unused field to avoid an issues
  • +
  • iris: make the TFB result visible to others
  • +
  • egl: return correct error code for a case req ver < 3 with forward-compatible
  • +
+ +

Antia Puentes (1):

+
    +
  • nir/linker: Fix TRANSFORM_FEEDBACK_BUFFER_INDEX
  • +
+ +

Anuj Phogat (7):

+
    +
  • i965/icl: Add WA_2204188704 to disable pixel shader panic dispatch
  • +
  • anv/icl: Add WA_2204188704 to disable pixel shader panic dispatch
  • +
  • intel: Add Elkhart Lake device info
  • +
  • intel: Add Elkhart Lake PCI-IDs
  • +
  • iris/icl: Set Enabled Texel Offset Precision Fix bit
  • +
  • iris/icl: Add WA_2204188704 to disable pixel shader panic dispatch
  • +
  • intel: Add support for Comet Lake
  • +
+ +

Axel Davy (49):

+
    +
  • st/nine: Ignore window size if error
  • +
  • st/nine: Ignore multisample quality level if no ms
  • +
  • st/nine: Disable depth write when nothing gets updated
  • +
  • st/nine: Do not advertise support for D15S1 and D24X4S4
  • +
  • st/nine: Do not advertise CANMANAGERESOURCE
  • +
  • st/nine: Change a few advertised caps
  • +
  • Revert "d3dadapter9: Support software renderer on any DRI device"
  • +
  • st/nine: Fix D3DWindowBuffer_release for old wine nine support
  • +
  • st/nine: Use FLT_MAX/2 for RCP clamping
  • +
  • st/nine: Upload managed textures only at draw using them
  • +
  • st/nine: Upload managed buffers only at draw using them
  • +
  • st/nine: Fix buffer/texture unbinding in nine_state_clear
  • +
  • st/nine: Finish if nooverwrite after normal mapping
  • +
  • st/nine: Always return OK on SetSoftwareVertexProcessing
  • +
  • st/nine: Enable modifiers on ps 1.X texcoords
  • +
  • st/nine: Ignore nooverwrite for systemmem
  • +
  • st/nine: Fix SINCOS input
  • +
  • st/nine: Optimize surface upload with conversion
  • +
  • st/nine: Optimize volume upload with conversion
  • +
  • st/nine: rename *_conversion to *_internal
  • +
  • st/nine: Refactor surface GetSystemMemPointer
  • +
  • st/nine: Refactor volume GetSystemMemPointer
  • +
  • st/nine: Support internal compressed format for surfaces
  • +
  • st/nine: Support internal compressed format for volumes
  • +
  • st/nine: Add drirc option to use data_internal for dynamic textures
  • +
  • drirc: Add Gallium nine workaround for Rayman Legends
  • +
  • st/nine: Recompile optimized shaders based on b/i consts
  • +
  • st/nine: Control shader constant inlining with drirc
  • +
  • st/nine: Regroup param->rel tests
  • +
  • st/nine: Refactor param->rel
  • +
  • st/nine: Compact nine_ff_get_projected_key
  • +
  • st/nine: Compact pixel shader key
  • +
  • st/nine: use helper ureg_DECL_sampler everywhere
  • +
  • st/nine: Manually upload vs and ps constants
  • +
  • st/nine: Refactor shader constants ureg_src computation
  • +
  • st/nine: Make swvp_on imply IS_VS
  • +
  • st/nine: Refactor ct_ctor
  • +
  • st/nine: Track constant slots used
  • +
  • st/nine: Refactor counting of constants
  • +
  • st/nine: Prepare constant compaction in nine_shader
  • +
  • st/nine: Propagate const_range to context
  • +
  • st/nine: Cache constant buffer size
  • +
  • st/nine: Handle const_ranges in nine_state
  • +
  • st/nine: Enable computing const_ranges
  • +
  • st/nine: Use TGSI_SEMANTIC_GENERIC for fog
  • +
  • st/nine: Optimize a bit writeonly buffers
  • +
  • st/nine: Throttle rendering similarly for thread_submit
  • +
  • st/nine: Check discard_delayed_release is set before allocating more
  • +
  • d3dadapter9: Revert to old throttling limit value
  • +
+ +

Bart Oldeman (1):

+
    +
  • gallium-xlib: query MIT-SHM before using it.
  • +
+ +

Bas Nieuwenhuizen (105):

+
    +
  • radv: Only look at pImmutableSamples if the descriptor has a sampler.
  • +
  • amd/common: Add gep helper for pointer increment.
  • +
  • amd/common: Implement ptr->int casts in ac_to_integer.
  • +
  • radv: Fix the shader info pass for not having the variable.
  • +
  • amd/common: Use correct writemask for shared memory stores.
  • +
  • amd/common: Fix stores to derefs with unknown variable.
  • +
  • amd/common: Handle nir_deref_type_ptr_as_array for shared memory.
  • +
  • amd/common: handle nir_deref_cast for shared memory from integers.
  • +
  • amd/common: Do not use 32-bit loads for shared memory.
  • +
  • amd/common: Implement global memory accesses.
  • +
  • radv: Do not use the bo list for local buffers.
  • +
  • radv: Implement VK_EXT_buffer_device_address.
  • +
  • radv: Use correct num formats to detect whether we should be use 1.0 or 1.
  • +
  • radv: Sync ETC2 whitelisted devices.
  • +
  • radv: Clean up a bunch of compiler warnings.
  • +
  • radv: Handle clip+cull distances more generally as compact arrays.
  • +
  • radv: Implement VK_EXT_depth_clip_enable.
  • +
  • radv: Disable depth clamping even without EXT_depth_range_unrestricted.
  • +
  • radv: Fix float16 interpolation set up.
  • +
  • radv: Allow interpolation on non-float types.
  • +
  • radv: Interpolate less aggressively.
  • +
  • turnip: Add driver skeleton (v2)
  • +
  • turnip: Fix up detection of device.
  • +
  • turnip: Gather some device info.
  • +
  • turnip: Remove abort.
  • +
  • turnip: Fix newly introduced warning.
  • +
  • turnip: Add buffer allocation & mapping support.
  • +
  • turnip: Report a memory type and heap.
  • +
  • turnip: Cargo cult the Intel heap size functionality.
  • +
  • turnip: Initialize memory type in requirements.
  • +
  • turnip: Disable more features.
  • +
  • turnip: Add 630 to the list.
  • +
  • turnip: Fix bo allocation after we stopped using libdrm_freedreno ...
  • +
  • turnip: Fix memory mapping.
  • +
  • turnip: Add image layout calculations.
  • +
  • turnip: Stop hardcoding the msm version check.
  • +
  • turnip: move tu_gem.c to tu_drm.c
  • +
  • turnip: Implement pipe-less param query.
  • +
  • turnip: Implement some format properties for RGBA8.
  • +
  • turnip: Remove some radv leftovers.
  • +
  • turnip: clean up TODO.
  • +
  • turnip: Implement some UUIDs.
  • +
  • turnip: Implement a slow bo list
  • +
  • turnip: Add a command stream.
  • +
  • turnip: Add msm queue support.
  • +
  • turnip: Make bo_list functions not static
  • +
  • turnip: Implement submission.
  • +
  • turnip: Fill command buffer
  • +
  • turnip: Shorten primary_cmd_stream name.
  • +
  • turnip: Add emit functions in a header.
  • +
  • turnip: Move stream functions to tu_cs.c
  • +
  • turnip: Add buffer memory binding.
  • +
  • turnip: Make tu6_emit_event_write shared.
  • +
  • turnip: Add tu6_rb_fmt_to_ifmt.
  • +
  • turnip: Implement buffer->buffer DMA copies.
  • +
  • turnip: Add image->buffer DMA copies.
  • +
  • turnip: Add buffer->image DMA copies.
  • +
  • turnip: Add todo for copies.
  • +
  • turnip: Fix GCC compiles.
  • +
  • turnip: Deconflict vk_format_table regeneration
  • +
  • gitlab-ci: Build turnip.
  • +
  • radeonsi: Remove implicit const cast.
  • +
  • radv: Allow fast clears with concurrent queue mask for some layouts.
  • +
  • vulkan/util: Handle enums that are in platform-specific headers.
  • +
  • vulkan: Update the XML and headers to 1.1.104
  • +
  • radv: Implement VK_EXT_host_query_reset.
  • +
  • radv: Use correct image view comparison for fast clears.
  • +
  • radv: Implement VK_EXT_pipeline_creation_feedback.
  • +
  • ac/nir: Return frag_coord as integer.
  • +
  • nir: Add access qualifiers on load_ubo intrinsic.
  • +
  • radv: Add non-uniform indexing lowering.
  • +
  • radv: Add bolist RADV_PERFTEST flag.
  • +
  • ac: Move has_local_buffers disable to radeonsi.
  • +
  • radv: Use local buffers for the global bo list.
  • +
  • radv: Support VK_EXT_inline_uniform_block.
  • +
  • radv: Add support for driconf.
  • +
  • vulkan/wsi: Add X11 adaptive sync support based on dri options.
  • +
  • radv: Add adaptive_sync driconfig option and enable it by default.
  • +
  • radv: Add logic for subsampled format descriptions.
  • +
  • radv: Add logic for multisample format descriptions.
  • +
  • radv: Add multiple planes to images.
  • +
  • radv: Add single plane image views & meta operations.
  • +
  • radv: Support different source & dest aspects for planar images in blit2d.
  • +
  • radv: Add ycbcr conversion structs.
  • +
  • radv: Add support for image views with multiple planes.
  • +
  • radv: Allow mixed src/dst aspects in copies.
  • +
  • ac/nir: Add support for planes.
  • +
  • radv: Add ycbcr samplers in descriptor set layouts.
  • +
  • radv: Update descriptor sets for multiple planes.
  • +
  • radv: Add ycbcr lowering pass.
  • +
  • radv: Run the new ycbcr lowering pass.
  • +
  • radv: Add hashing for the ycbcr samplers.
  • +
  • radv: Add ycbcr format features.
  • +
  • radv: Add ycbcr subsampled & multiplane formats to csv.
  • +
  • radv: Enable YCBCR conversion feature.
  • +
  • radv: Expose VK_EXT_ycbcr_image_arrays.
  • +
  • radv: Expose Vulkan 1.1 for Android.
  • +
  • radv: Fix hang width YCBCR array textures.
  • +
  • radv: Set is_array in lowered ycbcr tex instructions.
  • +
  • radv: Restrict YUVY formats to 1 layer.
  • +
  • radv: Disable subsampled formats.
  • +
  • radv: Implement cosited_even sampling.
  • +
  • radv: Do not use extra descriptor space for the 3rd plane.
  • +
  • nir: Actually propagate progress in nir_opt_move_load_ubo.
  • +
  • radv: Prevent out of bound shift on 32-bit builds.
  • +
+ +

Benjamin Gordon (1):

+
    +
  • configure.ac/meson.build: Add options for library suffixes
  • +
+ +

Benjamin Tissoires (1):

+
    +
  • CI: use wayland ci-templates repo to create the base image
  • +
+ +

Boyan Ding (3):

+
    +
  • gk110/ir: Add rcp f64 implementation
  • +
  • gk110/ir: Add rsq f64 implementation
  • +
  • gk110/ir: Use the new rcp/rsq in library
  • +
+ +

Boyuan Zhang (1):

+
    +
  • st/va: reverse qt matrix back to its original order
  • +
+ +

Brian Paul (51):

+
    +
  • st/mesa: whitespace/formatting fixes in st_cb_texture.c
  • +
  • svga: assorted whitespace and formatting fixes
  • +
  • svga: fix dma.pending > 0 test
  • +
  • mesa: fix display list corner case assertion
  • +
  • st/mesa: whitespace fixes in st_sampler_view.c
  • +
  • st/mesa: line wrapping, whitespace fixes in st_cb_texture.c
  • +
  • st/mesa: whitespace fixes in st_texture.h
  • +
  • svga: init fill variable to avoid compiler warning
  • +
  • svga: silence array out of bounds warning
  • +
  • st/wgl: init a variable to silence MinGW warning
  • +
  • gallium/util: whitespace cleanups in u_bitmask.[ch]
  • +
  • gallium/util: add some const qualifiers in u_bitmask.c
  • +
  • pipebuffer: use new pb_usage_flags enum type
  • +
  • pipebuffer: whitespace fixes in pb_buffer.h
  • +
  • winsys/svga: use new pb_usage_flags enum type
  • +
  • st/mesa: move, clean-up shader variant key decls/inits
  • +
  • st/mesa: whitespace, formatting fixes in st_cb_flush.c
  • +
  • svga: refactor draw_vgpu10() function
  • +
  • svga: remove SVGA_RELOC_READ flag in SVGA3D_BindGBSurface()
  • +
  • pipebuffer: s/PB_ALL_USAGE_FLAGS/PB_USAGE_ALL/
  • +
  • st/mesa: init hash keys with memset(), not designated initializers
  • +
  • intel/decoders: silence uninitialized variable warnings in gen_print_batch()
  • +
  • intel/compiler: silence unitialized variable warning in opt_vector_float()
  • +
  • st/mesa: move utility functions, macros into new st_util.h file
  • +
  • st/mesa: move around some code in st_context.c
  • +
  • st/mesa: add/improve sampler view comments
  • +
  • st/mesa: rename st_texture_release_sampler_view()
  • +
  • st/mesa: minor refactoring of texture/sampler delete code
  • +
  • docs: try to improve the Meson documentation (v2)
  • +
  • drisw: fix incomplete type compilation failure
  • +
  • gallium/winsys/kms: fix incomplete type compilation failure
  • +
  • nir: silence a couple new compiler warnings
  • +
  • docs: separate information for compiler selection and compiler options
  • +
  • docs: link to the meson_options.txt file gitlab.freedesktop.org
  • +
  • st/mesa: implement "zombie" sampler views (v2)
  • +
  • st/mesa: implement "zombie" shaders list
  • +
  • st/mesa: stop using pipe_sampler_view_release()
  • +
  • svga: stop using pipe_sampler_view_release()
  • +
  • llvmpipe: stop using pipe_sampler_view_release()
  • +
  • swr: remove call to pipe_sampler_view_release()
  • +
  • i915g: remove calls to pipe_sampler_view_release()
  • +
  • gallium/util: remove pipe_sampler_view_release()
  • +
  • nir: fix a few signed/unsigned comparison warnings
  • +
  • st/mesa: fix texture deletion context mix-up issues (v2)
  • +
  • nir: use {0} initializer instead of {} to fix MSVC build
  • +
  • util: no-op __builtin_types_compatible_p() for non-GCC compilers
  • +
  • docs: s/Aptril/April/
  • +
  • llvmpipe: init some vars to NULL to silence MinGW compiler warnings
  • +
  • glsl: work around MinGW 7.x compiler bug
  • +
  • svga: add SVGA_NO_LOGGING env var (v2)
  • +
  • glsl: fix typo in #warning message
  • +
+ +

Caio Marcelo de Oliveira Filho (61):

+
    +
  • nir: keep the phi order when splitting blocks
  • +
  • i965: skip bit6 swizzle detection in Gen8+
  • +
  • anv: skip bit6 swizzle detection in Gen8+
  • +
  • isl: assert that Gen8+ don't have bit6_swizzling
  • +
  • intel/compiler: use 0 as sampler in emit_mcs_fetch
  • +
  • nir: fix example in opt_peel_loop_initial_if description
  • +
  • iris: Fix uses of gl_TessLevel*
  • +
  • iris: Add support for TCS passthrough
  • +
  • iris: always include an extra constbuf0 if using UBOs
  • +
  • nir/copy_prop_vars: don't get confused by array_deref of vectors
  • +
  • nir/copy_prop_vars: add debug helpers
  • +
  • nir/copy_prop_vars: keep track of components in copy_entry
  • +
  • nir/copy_prop_vars: change test helper to get intrinsics
  • +
  • nir: nir_build_deref_follower accept array derefs of vectors
  • +
  • nir/copy_prop_vars: add tests for load/store elements of vectors
  • +
  • nir: fix MSVC build
  • +
  • st/nir: count num_uniforms for FS bultin shader
  • +
  • nir/copy_prop_vars: rename/refactor store_to_entry helper
  • +
  • nir/copy_prop_vars: use NIR_MAX_VEC_COMPONENTS
  • +
  • nir/copy_prop_vars: handle load/store of vector elements
  • +
  • nir/copy_prop_vars: add tests for indirect array deref
  • +
  • nir/copy_prop_vars: prefer using entries from equal derefs
  • +
  • nir/copy_prop_vars: handle indirect vector elements
  • +
  • anv: Implement VK_EXT_external_memory_host
  • +
  • nir: Add a pass to combine store_derefs to same vector
  • +
  • intel/nir: Combine store_derefs after vectorizing IO
  • +
  • intel/nir: Combine store_derefs to improve code from SPIR-V
  • +
  • nir: Handle array-deref-of-vector case in loop analysis
  • +
  • spirv: Add an execution environment to the options
  • +
  • intel/compiler: handle GLSL_TYPE_INTERFACE as GLSL_TYPE_STRUCT
  • +
  • spirv: Use interface type for block and buffer block
  • +
  • iris: Clean up compiler warnings about unused
  • +
  • nir: Take if_uses into account when repairing SSA
  • +
  • mesa: Extension boilerplate for NV_compute_shader_derivatives
  • +
  • glsl: Remove redundant conditions when asserting in_qualifier
  • +
  • glsl: Enable derivative builtins for NV_compute_shader_derivatives
  • +
  • glsl: Enable texture builtins for NV_compute_shader_derivatives
  • +
  • glsl: Parse and propagate derivative_group to shader_info
  • +
  • nir/algebraic: Lower CS derivatives to zero when no group defined
  • +
  • nir: Don't set LOD=0 for compute shader that has derivative group
  • +
  • intel/fs: Use TEX_LOGICAL whenever implicit lod is supported
  • +
  • intel/fs: Add support for CS to group invocations in quads
  • +
  • intel/fs: Don't loop when lowering CS intrinsics
  • +
  • intel/fs: Use NIR_PASS_V when lowering CS intrinsics
  • +
  • i965: Advertise NV_compute_shader_derivatives
  • +
  • gallium: Add PIPE_CAP_COMPUTE_SHADER_DERIVATIVES
  • +
  • iris: Enable NV_compute_shader_derivatives
  • +
  • spirv: Add support for DerivativeGroup capabilities
  • +
  • anv: Implement VK_NV_compute_shader_derivatives
  • +
  • docs: Add NV_compute_shader_derivatives to 19.1.0 relnotes
  • +
  • spirv: Add more to_string helpers
  • +
  • spirv: Tell which opcode or value is unhandled when failing
  • +
  • spirv: Rename vtn_decoration literals to operands
  • +
  • spirv: Handle SpvOpDecorateId
  • +
  • nir: Add option to lower tex to txl when shader don't support implicit LOD
  • +
  • intel/fs: Don't handle texop_tex for shaders without implicit LOD
  • +
  • spirv: Properly handle SpvOpAtomicCompareExchangeWeak
  • +
  • intel/fs: Assert when brw_fs_nir sees a nir_deref_instr
  • +
  • anv: Fix limits when VK_EXT_descriptor_indexing is used
  • +
  • nir: Fix nir_opt_idiv_const when negatives are involved
  • +
  • nir: Fix clone of nir_variable state slots
  • +
+ +

Carlos Garnacho (1):

+
    +
  • wayland/egl: Ensure EGL surface is resized on DRI update_buffers()
  • +
+ +

Chad Versace (17):

+
    +
  • turnip: Drop Makefile.am and Android.mk
  • +
  • turnip: Fix indentation in function signatures
  • +
  • turnip: Fix result of vkEnumerate*LayerProperties
  • +
  • turnip: Fix result of vkEnumerate*ExtensionProperties
  • +
  • turnip: Use vk_outarray in all relevant public functions
  • +
  • turnip: Fix a real -Wmaybe-uninitialized
  • +
  • turnip: Fix indentation
  • +
  • turnip: Require DRM device version >= 1.3
  • +
  • turnip: Add TODO for Android logging
  • +
  • turnip: Use vk_errorf() for initialization error messages
  • +
  • turnip: Replace fd_bo with tu_bo
  • +
  • turnip: Add TODO file
  • +
  • turnip: Fix 'unused' warnings
  • +
  • turnip: Don't return from tu_stub funcs
  • +
  • turnip: Annotate vkGetImageSubresourceLayout with tu_stub
  • +
  • turnip: Fix error behavior for VkPhysicalDeviceExternalImageFormatInfo
  • +
  • turnip: Use Vulkan 1.1 names instead of KHR
  • +
+ +

Charmaine Lee (5):

+
    +
  • svga: add svga shader type in the shader variant
  • +
  • svga: move host logging to winsys
  • +
  • st/mesa: purge framebuffers with current context after unbinding winsys buffers
  • +
  • mesa: unreference current winsys buffers when unbinding winsys buffers
  • +
  • svga: Remove unnecessary check for the pre flush bit for setting vertex buffers
  • +
+ +

Chenglei Ren (1):

+
    +
  • anv/android: fix missing dependencies issue during parallel build
  • +
+ +

Chia-I Wu (78):

+
    +
  • egl: fix KHR_partial_update without EXT_buffer_age
  • +
  • turnip: add .clang-format
  • +
  • turnip: use msm_drm.h from inc_freedreno
  • +
  • turnip: remove unnecessary libfreedreno_drm dep
  • +
  • turnip: add wrappers around DRM_MSM_GET_PARAM
  • +
  • turnip: add wrappers around DRM_MSM_SUBMITQUEUE_*
  • +
  • turnip: constify tu_device in tu_gem_*
  • +
  • turnip: preliminary support for tu_QueueWaitIdle
  • +
  • turnip: run sed and clang-format on tu_cs
  • +
  • turnip: document tu_cs
  • +
  • turnip: add tu_cs_add_bo
  • +
  • turnip: minor cleanup to tu_cs_end
  • +
  • turnip: update cs->start in tu_cs_end
  • +
  • turnip: inline tu_cs_check_space
  • +
  • turnip: add more tu_cs helpers
  • +
  • turnip: build drm_msm_gem_submit_bo array directly
  • +
  • turnip: add tu_bo_list_merge
  • +
  • turnip: add cmdbuf->bo_list to bo_list in queue submit
  • +
  • turnip: preliminary support for tu_BindImageMemory2
  • +
  • turnip: preliminary support for tu_image_view_init
  • +
  • turnip: preliminary support for tu_CmdBeginRenderPass
  • +
  • turnip: add tu_cs_reserve_space(_assert)
  • +
  • turnip: emit HW init in tu_BeginCommandBuffer
  • +
  • turnip: preliminary support for tu_GetRenderAreaGranularity
  • +
  • turnip: add tu_tiling_config
  • +
  • turnip: add internal helpers for tu_cs
  • +
  • turnip: add tu_cs_{reserve,add}_entry
  • +
  • turnip: specify initial size in tu_cs_init
  • +
  • turnip: never fail tu_cs_begin/tu_cs_end
  • +
  • turnip: add tu_cs_sanity_check
  • +
  • turnip: provide both emit_ib and emit_call
  • +
  • turnip: add tu_cs_mode
  • +
  • turnip: add TU_CS_MODE_SUB_STREAM
  • +
  • turnip: preliminary support for loadOp and storeOp
  • +
  • turnip: add a more complete format table
  • +
  • turnip: add functions to import/export prime fd
  • +
  • turnip: advertise VK_KHR_external_memory_capabilities
  • +
  • turnip: advertise VK_KHR_external_memory
  • +
  • turnip: add support for VK_KHR_external_memory_{fd,dma_buf}
  • +
  • turnip: fix VkClearValue packing
  • +
  • turnip: preliminary support for fences
  • +
  • turnip: respect color attachment formats
  • +
  • turnip: mark IBs for dumping
  • +
  • turnip: use 32-bit offset in tu_cs_entry
  • +
  • turnip: more/better asserts for tu_cs
  • +
  • turnip: add tu_cs_discard_entries
  • +
  • turnip: tu_cs_emit_array
  • +
  • turnip: fix tu_cs sub-streams
  • +
  • turnip: simplify tu_cs sub-streams usage
  • +
  • turnip: create a less dummy pipeline
  • +
  • turnip: parse VkPipelineDynamicStateCreateInfo
  • +
  • turnip: parse VkPipelineInputAssemblyStateCreateInfo
  • +
  • turnip: parse VkPipelineViewportStateCreateInfo
  • +
  • turnip: parse VkPipelineRasterizationStateCreateInfo
  • +
  • turnip: parse VkPipelineDepthStencilStateCreateInfo
  • +
  • turnip: parse VkPipeline{Multisample,ColorBlend}StateCreateInfo
  • +
  • turnip: preliminary support for shader modules
  • +
  • turnip: compile VkPipelineShaderStageCreateInfo
  • +
  • turnip: parse VkPipelineShaderStageCreateInfo
  • +
  • turnip: parse VkPipelineVertexInputStateCreateInfo
  • +
  • turnip: add draw_cs to tu_cmd_buffer
  • +
  • turnip: preliminary support for draw state binding
  • +
  • turnip: preliminary support for tu_CmdDraw
  • +
  • turnip: guard -Dvulkan-driver=freedreno
  • +
  • turnip: preliminary support for tu_GetImageSubresourceLayout
  • +
  • turnip: preliminary support for Wayland WSI
  • +
  • vulkan/wsi: move modifier array into wsi_wl_swapchain
  • +
  • vulkan/wsi: create wl_drm wrapper as needed
  • +
  • vulkan/wsi: refactor drm_handle_format
  • +
  • vulkan/wsi: add wsi_wl_display_drm
  • +
  • vulkan/wsi: add wsi_wl_display_dmabuf
  • +
  • vulkan/wsi: make wl_drm optional
  • +
  • virgl: handle fence_server_sync in winsys
  • +
  • virgl: hide fence internals from the driver
  • +
  • virgl: introduce virgl_drm_fence
  • +
  • virgl: fix fence fd version check
  • +
  • virgl: clear vertex_array_dirty
  • +
  • virgl: skip empty cmdbufs
  • +
+ +

Chris Forbes (3):

+
    +
  • glsl: add scaffolding for EXT_gpu_shader4
  • +
  • glsl: enable noperspective|flat|centroid for EXT_gpu_shader4
  • +
  • glsl: enable types for EXT_gpu_shader4
  • +
+ +

Chris Wilson (19):

+
    +
  • i965: Assert the execobject handles match for this device
  • +
  • iris: fix import from dri2/3
  • +
  • iris: IndexFormat = size/2
  • +
  • iris: Set resource modifier on handle
  • +
  • iris: Wrap userptr for creating bo
  • +
  • iris: AMD_pinned_memory
  • +
  • iris: Record reusability of bo on construction
  • +
  • iris: fix memzone_for_address since multibinder changes
  • +
  • iris: Tidy exporting the flink handle
  • +
  • iris: Fix assigning the output handle for exporting for KMS
  • +
  • iris: Merge two walks of the exec_bos list
  • +
  • iris: Tag each submitted batch with a syncobj
  • +
  • iris: Add fence support using drm_syncobj
  • +
  • iris: Wire up EGL_IMG_context_priority
  • +
  • iris: Use PIPE_BUFFER_STAGING for the query objects
  • +
  • iris: Use coherent allocation for PIPE_RESOURCE_STAGING
  • +
  • iris: Use streaming loads to read from tiled surfaces
  • +
  • iris: Push heavy memchecker code to DEBUG
  • +
  • iris: Adapt to variable ppGTT size
  • +
+ +

Christian Gmeiner (12):

+
    +
  • etnaviv: rs: mark used src resource as read from
  • +
  • etnaviv: blt: mark used src resource as read from
  • +
  • etnaviv: implement ETC2 block patching for HALTI0
  • +
  • etnaviv: keep track of mapped bo address
  • +
  • etnaviv: hook-up etc2 patching
  • +
  • etnaviv: enable ETC2 texture compression support for HALTI0 GPUs
  • +
  • etnaviv: fix resource usage tracking across different pipe_context's
  • +
  • etnaviv: fix compile warnings
  • +
  • st/dri: allow direct UYVY import
  • +
  • etnaviv: shrink struct etna_3d_state
  • +
  • nir: add lower_ftrunc
  • +
  • etnaviv: use the correct uniform dirty bits
  • +
+ +

Chuck Atkins (1):

+
    +
  • meson: Fix missing glproto dependency for gallium-glx
  • +
+ +

Connor Abbott (6):

+
    +
  • nir/serialize: Prevent writing uninitialized state_slot data
  • +
  • nir: Add a stripping pass for improved cacheability
  • +
  • radeonsi/nir: Use nir stripping pass
  • +
  • nir/search: Add automaton-based pre-searching
  • +
  • nir/search: Add debugging code to dump the pattern matched
  • +
  • nir/algebraic: Don't emit empty initializers for MSVC
  • +
+ +

Daniel Schürmann (2):

+
    +
  • nir: Define shifts according to SM5 specification.
  • +
  • nir: Use SM5 properties to optimize shift(a@32, iand(31, b))
  • +
+ +

Daniel Stone (2):

+
    +
  • panfrost: Properly align stride
  • +
  • vulkan/wsi/wayland: Respect non-blocking AcquireNextImage
  • +
+ +

Danylo Piliaiev (13):

+
    +
  • anv: Handle VK_ATTACHMENT_UNUSED in colorAttachment
  • +
  • radv: Handle VK_ATTACHMENT_UNUSED in CmdClearAttachment
  • +
  • anv: Fix VK_EXT_transform_feedback working with varyings packed in PSIZ
  • +
  • anv: Fix destroying descriptor sets when pool gets reset
  • +
  • anv: Treat zero size XFB buffer as disabled
  • +
  • glsl: Cross validate variable's invariance by explicit invariance only
  • +
  • i965,iris,anv: Make alpha to coverage work with sample mask
  • +
  • intel/fs: Make alpha test work with MRT and sample mask
  • +
  • st/mesa: Fix GL_MAP_COLOR with glDrawPixels GL_COLOR_INDEX
  • +
  • iris: Fix assert when using vertex attrib without buffer binding
  • +
  • intel/compiler: Do not reswizzle dst if instruction writes to flag register
  • +
  • drirc: Add workaround for Epic Games Launcher
  • +
  • anv: Do not emulate texture swizzle for INPUT_ATTACHMENT, STORAGE_IMAGE
  • +
+ +

Dave Airlie (63):

+
    +
  • virgl: enable elapsed time queries
  • +
  • virgl: ARB_query_buffer_object support
  • +
  • docs: update qbo support for virgl
  • +
  • glsl: glsl to nir fix uninit class member.
  • +
  • radv/llvm: initialise passes member.
  • +
  • radv: remove alloc parameter from pipeline init
  • +
  • iris: fix some hangs around null framebuffers
  • +
  • iris: fix crash in sparse vertex array
  • +
  • iris: add initial transform feedback overflow query paths (V3)
  • +
  • iris: fix cube texture view
  • +
  • iris: execute compute related query on compute batch.
  • +
  • iris: iris add load register reg32/64
  • +
  • iris: add conditional render support
  • +
  • iris: fix gpu calcs for timestamp queries
  • +
  • iris/WIP: add broadwell support
  • +
  • iris: limit gen8 to 8 samples
  • +
  • iris: setup gen8 caps
  • +
  • iris: add fs invocations query workaround for broadwell
  • +
  • iris: handle qbo fragment shader invocation workaround
  • +
  • st/mesa: add support for lowering fp64/int64 for nir drivers
  • +
  • softpipe: fix texture view crashes
  • +
  • nir/spirv: don't use bare types, remove assert in split vars for testing
  • +
  • nir/deref: remove casts of casts which are likely redundant (v3)
  • +
  • softpipe: fix 32-bit bitfield extract
  • +
  • softpipe: handle 32-bit bitfield inserts
  • +
  • softpipe: remove shadow_ref assert.
  • +
  • softpipe: fix integer texture swizzling for 1 vs 1.0f
  • +
  • nir/split_vars: fixup some more explicit_stride related issues.
  • +
  • draw: bail instead of assert on instance count (v2)
  • +
  • draw/gs: fix point size outputs from geometry shader.
  • +
  • draw/vs: partly fix basevertex/vertex id
  • +
  • softpipe: fix clears to only clear specified color buffers.
  • +
  • softpipe/draw: fix vertex id in soft paths.
  • +
  • softpipe: add indirect store buffer/image unit
  • +
  • nir/deref: fix struct wrapper casts. (v3)
  • +
  • nir: use proper array sizing define for vectors
  • +
  • intel/compiler: use defined size for vector components
  • +
  • iris: avoid use after free in shader destruction
  • +
  • ddebug: add compute functions to help hang detection
  • +
  • draw: add stream member to stats callback
  • +
  • tgsi: add support for geometry shader streams.
  • +
  • softpipe: add support for indexed queries.
  • +
  • draw: add support to tgsi paths for geometry streams. (v2)
  • +
  • softpipe: add support for vertex streams (v2)
  • +
  • virgl: add support for missing command buffer binding.
  • +
  • virgl: add support for ARB_multi_draw_indirect
  • +
  • virgl: add support for ARB_indirect_parameters
  • +
  • draw: fix undefined shift of (1 << 31)
  • +
  • swrast: fix undefined shift of 1 << 31
  • +
  • llvmpipe: fix undefined shift 1 << 31.
  • +
  • virgl/drm: cleanup buffer from handle creation (v2)
  • +
  • virgl/drm: handle flink name better.
  • +
  • virgl/drm: insert correct handles into the table. (v3)
  • +
  • intel/compiler: fix uninit non-static variable. (v2)
  • +
  • nir: fix bit_size in lower indirect derefs.
  • +
  • r600: reset tex array override even when no view bound
  • +
  • spirv: fix SpvOpBitSize return value.
  • +
  • nir: fix lower vars to ssa for larger vector sizes.
  • +
  • util/tests: add basic unit tests for bitset
  • +
  • util/bitset: fix bitset range mask calculations.
  • +
  • kmsro: add _dri.so to two of the kmsro drivers.
  • +
  • glsl: init packed in more constructors.
  • +
  • Revert "mesa: unreference current winsys buffers when unbinding winsys buffers"
  • +
+ +

David Riley (3):

+
    +
  • virgl: Store mapped hw resource with transfer object.
  • +
  • virgl: Allow transfer queue entries to be found and extended.
  • +
  • virgl: Re-use and extend queue transfers for intersecting buffer subdatas.
  • +
+ +

David Shao (1):

+
    +
  • meson: ensure that xmlpool_options.h is generated for gallium targets that need it
  • +
+ +

Deepak Rawat (2):

+
    +
  • winsys/drm: Fix out of scope variable usage
  • +
  • winsys/svga/drm: Fix 32-bit RPCI send message
  • +
+ +

Dominik Drees (1):

+
    +
  • Add no_aos_sampling GALLIVM_PERF option
  • +
+ +

Drew Davenport (1):

+
    +
  • util: Don't block SIGSYS for new threads
  • +
+ +

Dylan Baker (40):

+
    +
  • bump version for 19.0 branch
  • +
  • docs: Add relnotes stub for 19.1
  • +
  • gallium: wrap u_screen in extern "C" for c++
  • +
  • automake: Add --enable-autotools to distcheck flags
  • +
  • android,autotools,i965: Fix location of float64_glsl.h
  • +
  • meson: remove build_by_default : true
  • +
  • meson: fix style in intel/tools
  • +
  • meson: remove -std=c++11 from intel/tools
  • +
  • get-pick-list: Add --pretty=medium to the arguments for Cc patches
  • +
  • meson: Add dependency on genxml to anvil
  • +
  • meson/iris: Use current coding style
  • +
  • docs: Add release notes for 19.0.0
  • +
  • docs: Add SHA256 sums for 19.0.0
  • +
  • docs: update calendar, add news item, and link release notes for 19.0.0
  • +
  • bin/install_megadrivers.py: Correctly handle DESTDIR=''
  • +
  • bin/install_megadrivers.py: Fix regression for set DESTDIR
  • +
  • docs: Add release notes for 19.0.1
  • +
  • docs: Add SHA256 sums for mesa 19.0.1
  • +
  • docs: update calendar, add news item and link release notes for 19.0.1
  • +
  • meson: Error if LLVM doesn't have rtti when building clover
  • +
  • meson: Error if LLVM is turned off but clover it turned on
  • +
  • docs: Add release notes for 19.0.2
  • +
  • docs: Add sha256 sums for 19.0.2
  • +
  • docs: update calendar, and news item and link release notes for 19.0.2
  • +
  • Delete autotools
  • +
  • docs: drop most autoconf references
  • +
  • ci: Delete autotools build jobs
  • +
  • docs: add relnotes for 19.0.3
  • +
  • docs: Add SHA256 sums for mesa 19.0.3
  • +
  • docs: update calendar, and news item and link release notes for 19.0.3
  • +
  • meson: always define libglapi
  • +
  • glsl: fix general_ir_test with mingw
  • +
  • meson: switch gles1 and gles2 to auto options
  • +
  • meson: Make shader-cache a trillean instead of boolean
  • +
  • meson: make nm binary optional
  • +
  • util/tests: Use define instead of VLA
  • +
  • glsl/tests: define ssize_t on windows
  • +
  • tests/vma: fix build with MSVC
  • +
  • meson: Don't build glsl cache_test when shader cache is disabled
  • +
  • meson: Force the use of config-tool for llvm
  • +
+ +

Eduardo Lima Mitev (5):

+
    +
  • freedreno/a6xx: Silence compiler warnings
  • +
  • nir: Add ir3-specific version of most SSBO intrinsics
  • +
  • ir3/nir: Add a new pass 'ir3_nir_lower_io_offsets'
  • +
  • ir3/compiler: Enable lower_io_offsets pass and handle new SSBO intrinsics
  • +
  • ir3/lower_io_offsets: Try propagate SSBO's SHR into a previous shift instruction
  • +
+ +

El Christianito (1):

+
    +
  • drirc: add Budgie WM to adaptive-sync blacklist
  • +
+ +

Eleni Maria Stea (6):

+
    +
  • i965: Faking the ETC2 compression on Gen < 8 GPUs using two miptrees.
  • +
  • i965: Fixed the CopyImageSubData for ETC2 on Gen < 8
  • +
  • i965: Enabled the OES_copy_image extension on Gen 7 GPUs
  • +
  • i965: Removed the field etc_format from the struct intel_mipmap_tree
  • +
  • i965: fixed clamping in set_scissor_bits when the y is flipped
  • +
  • radv: consider MESA_VK_VERSION_OVERRIDE when setting the api version
  • +
+ +

Elie Tournier (3):

+
    +
  • virgl: Add a caps to advertise GLES backend
  • +
  • virgl: Set PIPE_CAP_DOUBLES when running on GLES This is a lie but no known app use fp64.
  • +
  • virgl: Return an error if we use fp64 on top of GLES
  • +
+ +

Emil Velikov (30):

+
    +
  • vc4: Declare the last cpu pointer as being modified in NEON asm.
  • +
  • docs: add release notes for 18.3.3
  • +
  • docs: add sha256 checksums for 18.3.3
  • +
  • docs: update calendar, add news item and link release notes for 18.3.3
  • +
  • anv: wire up the state_pool_padding test
  • +
  • docs: add release notes for 18.3.4
  • +
  • docs: add sha256 checksums for 18.3.4
  • +
  • docs: update calendar, add news item and link release notes for 18.3.4
  • +
  • egl/dri: de-duplicate dri2_load_driver*
  • +
  • meson: egl: correctly manage loader/xmlconfig
  • +
  • loader: use loader_open_device() to handle O_CLOEXEC
  • +
  • egl/android: bump the number of drmDevices to 64
  • +
  • docs: mention "Allow commits from members who can merge..."
  • +
  • egl/sl: split out swrast probe into separate function
  • +
  • egl/sl: use drmDevice API to enumerate available devices
  • +
  • egl/sl: use kms_swrast with vgem instead of a random GPU
  • +
  • docs: add release notes for 18.3.5
  • +
  • docs: add sha256 checksums for 18.3.5
  • +
  • docs: update calendar, add news item and link release notes for 18.3.5
  • +
  • docs: add release notes for 18.3.6
  • +
  • docs: add sha256 checksums for 18.3.6
  • +
  • docs: update calendar, add news item and link release notes for 18.3.6
  • +
  • turnip: drop dead close(master_fd)
  • +
  • vulkan/wsi: check if the display_fd given is master
  • +
  • vulkan/wsi: don't use DUMB_CLOSE for normal GEM handles
  • +
  • llvmpipe: add lp_fence_timedwait() helper
  • +
  • llvmpipe: correctly handle waiting in llvmpipe_fence_finish
  • +
  • egl/dri: flesh out and use dri2_create_drawable()
  • +
  • mapi: add static_date offset to MaxShaderCompilerThreadsKHR
  • +
  • mapi: correctly handle the full offset table
  • +
+ +

Emmanuel Gil Peyrot (1):

+
    +
  • docs: make bugs.html easier to find
  • +
+ +

Eric Anholt (121):

+
    +
  • v3d: Always enable the NEON utile load/store code.
  • +
  • v3d: Fix a release build set-but-unused compiler warning.
  • +
  • mesa: Skip partial InvalidateFramebuffer of packed depth/stencil.
  • +
  • v3d: Fix image_load_store clamping of signed integer stores.
  • +
  • nir: Move V3D's "the shader was TGSI, ignore FS output types" flag to NIR.
  • +
  • v3d: Fix precompile of FRAG_RESULT_DATA1 and higher outputs.
  • +
  • v3d: Store the actual mask of color buffers present in the key.
  • +
  • v3d: Fix dumping of shaders with alpha test.
  • +
  • v3d: Fix pack/unpack of VFPACK operand unpacks.
  • +
  • v3d: Fix input packing of .l for rounding/fdx/fdy.
  • +
  • v3d: Fix copy-propagation of input unpacks.
  • +
  • v3d: Whitespace consistency fix.
  • +
  • nir: Move panfrost's isign lowering to nir_opt_algebraic.
  • +
  • v3d: Use the NIR lowering for isign instead of rolling our own.
  • +
  • intel: Use the NIR lowering for isign.
  • +
  • freedreno: Use the NIR lowering for isign.
  • +
  • v3d: Clear the GMP on initialization of the simulator.
  • +
  • v3d: Sync indirect draws on the last rendering.
  • +
  • v3d: Use the early_fragment_tests flag for the shader's disable-EZ field.
  • +
  • v3d: Fix incorrect flagging of ldtmu as writing r4 on v3d 4.x.
  • +
  • v3d: Drop a perf note about merging unpack_half_*, which has been implemented.
  • +
  • v3d: Drop our hand-lowered nir_op_ffract.
  • +
  • v3d: Add a helper function for getting a nop register.
  • +
  • v3d: Refactor bcsel and if condition handling.
  • +
  • v3d: Do bool-to-cond for discard_if as well.
  • +
  • v3d: Kill off vir_PF(), which is hard to use right.
  • +
  • v3d: Fix f2b32 behavior.
  • +
  • v3d: Fix the check for "is the last thrsw inside control flow"
  • +
  • v3d: Add a function to describe what the c->execute.file check means.
  • +
  • v3d: Stop tracking num_inputs for VPM loads.
  • +
  • v3d: Delay emitting ldvpm on V3D 4.x until it's actually used.
  • +
  • v3d: Emit a simpler negate for the iabs implementation.
  • +
  • v3d: Move i2b and f2b support into emit_comparison.
  • +
  • kmsro: Add the rest of the current set of tinydrm drivers.
  • +
  • nir: Just return when asked to rewrite uses of an SSA def to itself.
  • +
  • v3d: Fix vir_is_raw_mov() for input unpacks.
  • +
  • v3d: Dump the VIR after register spilling if we were forced to.
  • +
  • v3d: Rematerialize MOVs of uniforms instead of spilling them.
  • +
  • v3d: Fix build of NEON code with Mesa's cflags not targeting NEON.
  • +
  • v3d: Restrict live intervals to the blocks reachable from any def.
  • +
  • v3d: Stop treating exec masking specially.
  • +
  • nir: Improve printing of load_input/store_output variable names.
  • +
  • v3d: Translate f2i(fround_even) as FTOIN.
  • +
  • v3d: Move the stores for fixed function VS output reads into NIR.
  • +
  • v3d: Fix temporary leaks of temp_registers and when spilling.
  • +
  • v3d: Do uniform rematerialization spilling before dropping threadcount
  • +
  • v3d: Switch implicit uniforms over to being any qinst->uniform != ~0.
  • +
  • v3d: Add support for vir-to-qpu of ldunif instructions to a temp.
  • +
  • v3d: Drop the old class bits splitting up the accumulators.
  • +
  • v3d: Add support for register-allocating a ldunif to a QFILE_TEMP.
  • +
  • v3d: Use ldunif instructions for uniforms.
  • +
  • v3d: Eliminate the TLB and TLBU files.
  • +
  • v3d: Drop the V3D 3.x vpm read dead code elimination.
  • +
  • v3d: Include a count of register pressure in the RA failure dumps.
  • +
  • st/dri: Set the PIPE_BIND_SHARED flag on create_image_with_modifiers.
  • +
  • util: Add a DAG datastructure.
  • +
  • vc4: Switch over to using the DAG datastructure for QIR scheduling.
  • +
  • v3d: Reuse list_for_each_entry_rev().
  • +
  • vc4: Reuse list_for_each_entry_rev().
  • +
  • v3d: Use the DAG datastructure for QPU instruction scheduling.
  • +
  • vc4: Switch the post-RA scheduler over to the DAG datastructure.
  • +
  • v3d: Disable PIPE_CAP_BLIT_BASED_TEXTURE_TRANSFER.
  • +
  • v3d: Fix leak of the mem_ctx after the DAG refactor.
  • +
  • v3d: Fix leak of the renderonly struct on screen destruction.
  • +
  • mesa/st: Make sure that prog_to_nir NIR gets freed.
  • +
  • mesa/st: Fix leaks of TGSI tokens in VP variants.
  • +
  • v3d: Always lay out shared tiled buffers with UIF_TOP set.
  • +
  • v3d: Allow the UIF modifier with renderonly.
  • +
  • v3d: Expose the dma-buf modifiers query.
  • +
  • v3d: Rename v3d_tmu_config_data to v3d_unit_data.
  • +
  • v3d: Move constant offsets to UBO addresses into the main uniform stream.
  • +
  • v3d: Upload all of UBO[0] if any indirect load occurs.
  • +
  • v3d: Remove some dead members of struct v3d_compile.
  • +
  • egl: Add a 565 pbuffer-only EGL config under X11.
  • +
  • dri3: Return the current swap interval from glXGetSwapIntervalMESA().
  • +
  • v3d: Add support for handling OOM signals from the simulator.
  • +
  • v3d: Bump the maximum texture size to 4k for V3D 4.x.
  • +
  • v3d: Don't try to use the TFU blit path if a scissor is enabled.
  • +
  • v3d: Add some more new packets for V3D 4.x.
  • +
  • st: Lower uniforms in st in the !PIPE_CAP_PACKED_UNIFORMS case as well.
  • +
  • vc4: Don't forget to set the range when scalarizing our uniforms.
  • +
  • vc4: Split UBO0 and UBO1 address uniform handling.
  • +
  • vc4: Upload CS/VS UBO uniforms together.
  • +
  • v3d: Add an optimization pass for redundant flags updates.
  • +
  • nir: Drop comments about the constant_index slots for load/stores.
  • +
  • nir: Drop remaining references to const_index in favor of the call to use.
  • +
  • nir: Add a comment about how intrinsic definitions work.
  • +
  • v3d: Add and use a define for the number of channels in a QPU invocation.
  • +
  • v3d: Drop a note for the future about PIPE_CAP_PACKED_UNIFORMS.
  • +
  • v3d: Include the number of max temps used in the shader-db output.
  • +
  • v3d: Replace the old shader-db env var output with the ARB_debug_output.
  • +
  • v3d: Add Compute Shader compilation support.
  • +
  • v3d: Add missing base offset to CS shared memory accesses.
  • +
  • v3d: Add missing dumping for the spill offset/size uniforms.
  • +
  • v3d: Detect the correct number of QPUs and use it to fix the spill size.
  • +
  • v3d: Use the new lower_to_scratch implementation for indirects on temps.
  • +
  • v3d: Only look up the 3rd texture gather offset for non-arrays.
  • +
  • v3d: Always set up the qregs for CSD payload.
  • +
  • v3d: Fix an invalid reuse of flags generation from before a thrsw.
  • +
  • v3d: Fix atomic cmpxchg in shaders on hardware.
  • +
  • nir: Fix deref offset calculation for structs.
  • +
  • nir: Use the nir_builder _imm helpers in setting up deref offsets.
  • +
  • gallium: Remove the pool pipebuffer manager.
  • +
  • gallium: Remove the ondemand pipebuffer manager.
  • +
  • gallium: Remove the "alt" pipebuffer manager interface.
  • +
  • gallium: Remove the malloc pipebuffer manager.
  • +
  • st/mesa: Don't set atomic counter size != 0 if MAX_SHADER_BUFFERS == 0.
  • +
  • v3d: Disable SSBOs and atomic counters on vertex shaders.
  • +
  • v3d: Fill in the ignored segment size fields to appease new simulator.
  • +
  • v3d: Apply the GFXH-930 workaround to the case where the VS loads attrs.
  • +
  • v3d: Assert that we do request the normal texturing return data.
  • +
  • v3d: Use _mesa_hash_table_remove_key() where appropriate.
  • +
  • vc4: Use _mesa_hash_table_remove_key() where appropriate.
  • +
  • v3d: Add a note about i/o indirection for future performance work.
  • +
  • v3d: Don't try to update the shadow texture for separate stencil.
  • +
  • Revert "v3d: Disable PIPE_CAP_BLIT_BASED_TEXTURE_TRANSFER."
  • +
  • v3d: Re-add support for memory_barrier_shared.
  • +
  • v3d: Fix detection of the last ldtmu before a new TMU op.
  • +
  • v3d: Fix detection of TMU write sequences in register spilling.
  • +
  • kmsro: Add support for V3D.
  • +
  • vc4: Fall back to renderonly if the vc4 driver doesn't have v3d.
  • +
+ +

Eric Engestrom (142):

+
    +
  • wsi/display: add comment
  • +
  • egl: use coherent variable names
  • +
  • gitlab-ci: add ubuntu container
  • +
  • gitlab-ci: add a meson vulkan build
  • +
  • gitlab-ci: add a make vulkan build
  • +
  • gitlab-ci: add a scons no-llvm build
  • +
  • gitlab-ci: add scons llvm 3.5 build
  • +
  • gitlab-ci: add scons SWR build
  • +
  • gitlab-ci: add meson loader/classic DRI build
  • +
  • gitlab-ci: add meson gallium SWR build
  • +
  • gitlab-ci: add meson gallium RadeonSI build
  • +
  • gitlab-ci: add meson gallium "other drivers" build
  • +
  • gitlab-ci: add meson gallium ST Clover (LLVM 5.0) build
  • +
  • gitlab-ci: add meson gallium ST Clover (LLVM 6.0) build
  • +
  • gitlab-ci: add meson gallium ST Clover (LLVM 7.0) build
  • +
  • gitlab-ci: add meson gallium ST "Other" build
  • +
  • gitlab-ci: add make loaders/classic DRI build
  • +
  • gitlab-ci: add make Gallium Drivers SWR build
  • +
  • gitlab-ci: add make Gallium Drivers RadeonSI build
  • +
  • gitlab-ci: add make Gallium Drivers "Other" build
  • +
  • gitlab-ci: add make Gallium ST Clover LLVM-3.9 build
  • +
  • gitlab-ci: add make Gallium ST Clover LLVM-4.0 build
  • +
  • gitlab-ci: add make Gallium ST Clover LLVM-5.0 build
  • +
  • gitlab-ci: add make Gallium ST Clover LLVM-6.0 build
  • +
  • gitlab-ci: add make Gallium ST Clover LLVM-7 build
  • +
  • gitlab-ci: add make Gallium ST Other build
  • +
  • travis: remove unused linux code path
  • +
  • travis: remove unused scons code path
  • +
  • gitlab-ci: add meson glvnd build
  • +
  • xvmc: fix string comparison
  • +
  • xvmc: fix string comparison
  • +
  • meson: add script to print the options before configuring a builddir
  • +
  • driconf: drop unused macro
  • +
  • travis: fix osx make build
  • +
  • gitlab-ci: workaround docker bug for users with uppercase characters
  • +
  • wsi: query the ICD's max dimensions instead of hard-coding them
  • +
  • gitlab-ci: limit ninja to 4 threads max
  • +
  • drm-uapi/README: remove explicit list of driver names
  • +
  • drm-uapi: use local files, not system libdrm
  • +
  • gbm: drop duplicate #defines
  • +
  • st/dri: drop duplicate #define
  • +
  • etnaviv: drop duplicate #define
  • +
  • anv/tests: compile to something sensible in release builds
  • +
  • util/tests: compile to something sensible in release builds
  • +
  • gitlab-ci: use ccache to speed up builds
  • +
  • tegra/meson: add missing dep_libdrm
  • +
  • tegra/autotools: add missing libdrm cflags
  • +
  • gitlab-ci: limit the automatic CI to master and MRs
  • +
  • gitlab-ci: automatically run the CI on pushes to `ci/*` branches
  • +
  • anv: sort extensions alphabetically
  • +
  • anv: sort vendors extensions after KHR and EXT
  • +
  • anv: make sure the extensions stay sorted
  • +
  • anv: drop unused imports
  • +
  • anv: use anv_shader_bin_write_to_blob()'s return value
  • +
  • gitlab-ci: always run the containers build
  • +
  • dri_interface: add missing #include
  • +
  • driinfo: add DTD to allow the xml to be validated
  • +
  • meson/swr: replace hard-coded path with current_build_dir()
  • +
  • egl/android: replace magic 0=CbCr,1=CrCb with simple enum
  • +
  • vulkan: use VkBase{In,Out}Structure instead of a custom struct
  • +
  • driconf: add DTD to allow the drirc xml (00-mesa-defaults.conf) to be validated
  • +
  • gitlab-ci: install xmllint to validate 00-mesa-defaults.conf
  • +
  • anv: simplify chained comparison
  • +
  • anv: drop unused parameter
  • +
  • anv: remove spaces around kwargs assignment
  • +
  • anv: fix typo
  • +
  • Revert "swr/rast: Archrast codegen updates"
  • +
  • meson: avoid going back up the tree with include_directories()
  • +
  • anv: use the platform defines in vk.xml instead of hard-coding them
  • +
  • radv: use the platform defines in vk.xml instead of hard-coding them
  • +
  • util: #define PATH_MAX when undefined (eg. Hurd)
  • +
  • vulkan: import missing file from Khronos
  • +
  • egl: fix libdrm-less builds
  • +
  • vulkan: import vk_layer.h from Khronos
  • +
  • gitlab-ci: drop job prefixes
  • +
  • meson: fix with_dri2 definition for GNU Hurd
  • +
  • meson: remove unused include_directories(vulkan)
  • +
  • vulkan/util: use the platform defines in vk.xml instead of hard-coding them
  • +
  • vulkan/overlay: fix missing var rename in previous commit
  • +
  • meson: don't build libGLES*.so with GLVND
  • +
  • autotools: don't build libGLES*.so with GLVND
  • +
  • travis: fix meson build by letting `auto` do its job
  • +
  • travis: drop unused vars
  • +
  • travis: clean up
  • +
  • gitlab-ci: only build the default (=latest) and oldest llvm versions
  • +
  • gitlab-ci: autotools needs to be told which llvm version to use
  • +
  • r600: cast pointer to expected type
  • +
  • build: make passing an incorrect pointer type a hard error
  • +
  • gitlab-ci: fix llvm version (7 doesn't have a ".0")
  • +
  • hgl/meson: drop unused include directory
  • +
  • glx/meson: use full include path for dri_interface.h
  • +
  • android: fix missing backspace for line continuation
  • +
  • panfrost: fix tgsi_to_nir() call
  • +
  • panfrost: move #include to fix compilation
  • +
  • gitlab-ci: add panfrost to the gallium drivers build
  • +
  • wsi: deduplicate get_current_time() functions between display and x11
  • +
  • wsi/display: s/#if/#ifdef/ to fix -Wundef
  • +
  • wsi/wayland: fix pointer casting warning on 32bit
  • +
  • wsi/x11: use WSI_FROM_HANDLE() instead of pointer casts
  • +
  • turnip: use the platform defines in vk.xml instead of hard-coding them
  • +
  • travis: fix osx meson build
  • +
  • nir: const `nir_call_instr::callee`
  • +
  • gitlab-ci: add clang build
  • +
  • gitlab-ci: drop most autotools builds
  • +
  • util/disk_cache: close fd in the fallback path
  • +
  • egl: hide entrypoints that shouldn't be exported when using glvnd
  • +
  • meson: strip rpath from megadrivers
  • +
  • gallium/hud: fix memory leaks
  • +
  • gallium/hud: prevent buffer overflow
  • +
  • gallium/hud: fix rounding error in nic bps computation
  • +
  • simplify LLVM version string printing
  • +
  • util/process: document memory leak
  • +
  • vk/util: remove unneeded array index
  • +
  • bin: drop unused import from install_megadrivers.py
  • +
  • meson: remove meson-created megadrivers symlinks
  • +
  • gitlab-ci: build gallium extra hud
  • +
  • gitlab-ci: add lima to the build
  • +
  • delete autotools .gitignore files
  • +
  • delete autotools input files
  • +
  • docs: remove unsupported GL function name mangling
  • +
  • docs: drop autotools python information
  • +
  • docs: replace autotools intructions with meson equivalent
  • +
  • docs: use past tense when talking about autotools
  • +
  • docs: haiku can be built using meson
  • +
  • egl: fixup autotools-specific wording
  • +
  • util: add os_read_file() helper
  • +
  • anv: add support for VK_EXT_memory_budget
  • +
  • radv: update to use the new features struct names
  • +
  • turnip: update to use the new features struct names
  • +
  • gitlab-ci: build vulkan drivers in clang build
  • +
  • util: move #include out of #if linux
  • +
  • wsi/wayland: document lack of vkAcquireNextImageKHR timeout support
  • +
  • egl: hard-code destroy function instead of passing it around as a pointer
  • +
  • gitlab-ci: add scons windows build using mingw
  • +
  • gitlab-ci: merge several meson jobs
  • +
  • gitlab-ci: meson-gallium-radeonsi was a subset of meson-gallium-clover-llvm
  • +
  • gitlab-ci: simplify meson job names
  • +
  • gitlab-ci: merge meson-glvnd into meson-swr
  • +
  • travis: fix syntax, and drop unused stuff
  • +
  • util/os_file: always use the 'grow' mechanism
  • +
  • meson: expose glapi through osmesa
  • +
  • util/os_file: actually return the error read() gave us
  • +
+ +

Erico Nunes (5):

+
    +
  • lima/ppir: support ppir_op_ceil
  • +
  • nir/algebraic: add lowering for fsign
  • +
  • lima: enable nir fsign lowering in ppir
  • +
  • lima/gpir: add limit of max 512 instructions
  • +
  • lima/ppir: support nir_op_ftrunc
  • +
+ +

Erik Faye-Lund (79):

+
    +
  • mesa: expose NV_conditional_render on GLES
  • +
  • st/mesa: remove unused header-file
  • +
  • swr/codegen: fix autotools build
  • +
  • virgl: remove unused variables
  • +
  • virgl: remove unused variable
  • +
  • virgl: remove unused variable
  • +
  • virgl: remove unused variable
  • +
  • virgl: do not allow compressed formats for buffers
  • +
  • virgl: stricter usage of compressed 3d textures
  • +
  • virgl: also destroy all read-transfers
  • +
  • virgl: use debug_printf instead of fprintf
  • +
  • virgl: unsigned int -> unsigned
  • +
  • virgl: only warn about unchecked flags
  • +
  • virgl: do not warn about display-target binding
  • +
  • virgl: use debug_printf instead of fprintf
  • +
  • virgl: remove pointless transfer-counter
  • +
  • virgl: tmp_resource -> templ
  • +
  • virgl: track full virgl_resource instead of just virgl_hw_res
  • +
  • virgl: simplify virgl_texture_transfer_unmap logic
  • +
  • virgl: make unmap queuing a bit more straight-forward
  • +
  • virgl: check for readback on correct resource
  • +
  • virgl: wait for the right resource
  • +
  • virgl: return error if allocating resolve_tmp fails
  • +
  • virgl: rewrite core of virgl_texture_transfer_map
  • +
  • virgl: use pipe_box for blit dst-rect
  • +
  • virgl: support write-back with staged transfers
  • +
  • virgl: make sure bind is set for non-buffers
  • +
  • gallium/util: support translating between uint and sint formats
  • +
  • virgl: get readback-formats from host
  • +
  • virgl: only blit if resource is read
  • +
  • virgl: do color-conversion during when mapping transfer
  • +
  • virgl: document potentially failing blit
  • +
  • mesa/st: remove impossible error-check
  • +
  • gallium/u_vbuf: support NULL-resources
  • +
  • i915: support NULL-resources
  • +
  • nouveau: support NULL-resources
  • +
  • swr: support NULL-resources
  • +
  • mesa/st: accept NULL and empty buffer objects
  • +
  • mesa/st: remove always-false state
  • +
  • softpipe: setup pixel_offset for all primitive types
  • +
  • docs: normaize css-indent style
  • +
  • docs: remove non-existent css attribute
  • +
  • docs: remove long commented out css
  • +
  • docs: add missing semicolon
  • +
  • docs: avoid repeating the font
  • +
  • docs: avoid repeating the color
  • +
  • docs: remove spurious newline
  • +
  • docs: use multiple background-images for header
  • +
  • docs: simplify css-centering
  • +
  • docs: do not hard-code header-height
  • +
  • docs: properly escape '>'
  • +
  • docs: properly escape ampersand
  • +
  • docs: remove stray paragraph-close
  • +
  • docs: use h2 instead of b-tag for headings
  • +
  • docs: use dl/dd instead of blockquote for freedesktop link
  • +
  • docs: open list-item before closing it
  • +
  • docs: close paragraphs before lists
  • +
  • docs: close lists
  • +
  • docs: remove stray paragraph-close
  • +
  • docs: close paragraphs before preformatted text
  • +
  • docs: start paragraph before closing it
  • +
  • docs: drop paragraph around preformatted text
  • +
  • docs: fix incorrectly closed paragraph
  • +
  • docs: don't pointlessly close and re-start definition lists
  • +
  • docs: remove stray list-start
  • +
  • docs: fixup bad paragraphing
  • +
  • docs: add missing lists
  • +
  • docs: fix closing of paragraphs
  • +
  • docs: fixup list-item tags
  • +
  • docs: fix closing of list-items
  • +
  • docs: replace empty list with a none-paragraph
  • +
  • docs: turn faq-index into an ordered list
  • +
  • docs: drop centered heading for faq
  • +
  • docs: reorder heading and notice
  • +
  • meson: lift driver-collection out into parent build-file
  • +
  • meson: give dri- and gallium-drivers separate vars
  • +
  • meson: add build-summary
  • +
  • docs: fixup mistake in contents
  • +
  • draw: flush when setting stream-out targets
  • +
+ +

Ernestas Kulik (2):

+
    +
  • vc4: Fix leak in HW queries error path
  • +
  • v3d: Fix leak in resource setup error path
  • +
+ +

Francisco Jerez (6):

+
    +
  • intel/dump_gpu: Disambiguate between BOs from different GEM handle spaces.
  • +
  • intel/fs: Exclude control sources from execution type and region alignment calculations.
  • +
  • intel/fs: Lower integer multiply correctly when destination stride equals 4.
  • +
  • intel/fs: Cap dst-aligned region stride to maximum representable hstride value.
  • +
  • intel/fs: Implement extended strides greater than 4 for IR source regions.
  • +
  • intel/fs: Rely on undocumented unrestricted regioning for 32x16-bit integer multiply.
  • +
+ +

Fritz Koenig (4):

+
    +
  • freedreno: pass count to query_dmabuf_modifiers
  • +
  • freedreno/a6xx: UBWC support
  • +
  • freedreno: UBWC allocator
  • +
  • freedreno/a6xx: Enable UBWC modifier
  • +
+ +

Gert Wollny (35):

+
    +
  • mesa/core: Enable EXT_texture_sRGB_R8 also for desktop GL
  • +
  • radeonsi: release tokens after creating the shader program
  • +
  • mesa: release references to image textures when a context is destroyed
  • +
  • virgl: Enable mixed color FBO attachemnets only when the host supports it
  • +
  • mesa/core: Enable EXT_depth_clamp for GLES >= 2.0
  • +
  • nir: Add posibility to not lower to source mod 'abs' for ops with three sources
  • +
  • mesa: Expose EXT_texture_query_lod and add support for its use shaders
  • +
  • softpipe: Enable PIPE_CAP_MIXED_COLORBUFFER_FORMATS    It seems softpipe actually supports this. This change enables the following piglits as passing without regressions in the gpu test set:
  • +
  • virgl: Add a caps feature check version
  • +
  • softpipe: Implement ATOMFADD and enable cap TGSI_ATOMFADD
  • +
  • virgl: define MAX_VERTEX_STREAMS based on availability of TF3
  • +
  • softpipe: Use mag texture filter also for clamped lod == 0
  • +
  • softpipe: Don't use mag filter for gather op
  • +
  • softpipe: raise number of bits used for X coordinate texture lookup
  • +
  • softpipe: Add an extra code path for the buffer texel lookup
  • +
  • softpipe: Enable PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
  • +
  • Gallium: Add new CAP that indicated whether IO array definitions can be shriked
  • +
  • virgl: Enable passing arrays as input to fragment shaders
  • +
  • doc/features: Add a few extensions to the feature matrix
  • +
  • softpipe: Factor gradient evaluation out of the lambda evaluation
  • +
  • softpipe: Prepare handling explicit gradients
  • +
  • softpipe: Pipe gather_comp through from st_tgsi_get_samples
  • +
  • softpipe: Move selection of shadow values up and clean parameter list
  • +
  • softpipe: tie in new code path for lod evaluation
  • +
  • softpipe: keep input lod for explicite derivatives
  • +
  • softpipe: evaluate cube the faces on a per sample bases
  • +
  • softpipe: Factor out evaluation of the source indices
  • +
  • softpipe: Add an per-input array for interpolator correctors to machine
  • +
  • softpipe: Add (fake) support for TGSI_OPCODE_INTERP_SAMPLE
  • +
  • softpipe: Add support for TGSI_OPCODE_INTERP_OFFSET
  • +
  • softpipe: Add support for TGSI_OPCODE_INTERP_CENTROID
  • +
  • softpipe: Increase the GLSL feature level
  • +
  • doc: Update feature matrix
  • +
  • softpipe/buffer: load only as many components as the the buffer resource type provides
  • +
  • Revert "softpipe/buffer: load only as many components as the the buffer resource type provides"
  • +
+ +

Greg V (3):

+
    +
  • util: emulate futex on FreeBSD using umtx
  • +
  • gallium/hud: add CPU usage support for FreeBSD
  • +
  • gallium: enable dmabuf on BSD as well
  • +
+ +

Grigori Goronzy (1):

+
    +
  • glx: add support for GLX_ARB_create_context_no_error (v3)
  • +
+ +

Guido Günther (4):

+
    +
  • docs: Fix 19.0.x version numbers
  • +
  • gallium: ddebug: Add missing fence related wrappers
  • +
  • gallium/u_dump: util_dump_sampler_view: Dump u.tex.first_level
  • +
  • gallium: trace: Add missing fence related wrappers
  • +
+ +

Gurchetan Singh (44):

+
    +
  • mesa/main: Expose EXT_texture_compression_s3tc_srgb
  • +
  • i965: Set flag for EXT_texture_compression_s3tc_srgb
  • +
  • st/mesa: expose EXT_texture_compression_s3tc_srgb
  • +
  • docs: add GL_EXT_texture_compression_s3tc_srgb to release notes
  • +
  • virgl: add ability to do finer grain dirty tracking
  • +
  • virgl: use virgl_resource_dirty helper
  • +
  • virgl: don't mark unclean after a flush
  • +
  • virgl: track level cleanliness rather than resource cleanliness
  • +
  • virgl: make alignment smaller when uploading index user buffers
  • +
  • virgl: unmap uploader at flush time
  • +
  • virgl: when creating / freeing transfers, pass slab pool directly
  • +
  • virgl: add protocol for resource transfers
  • +
  • virgl: use virgl_transfer in inline write
  • +
  • virgl: limit command length to 16 bits
  • +
  • virgl: keep track of number of computations
  • +
  • virgl: pass virgl transfer to virgl_res_needs_flush_wait
  • +
  • virgl: add extra checks in virgl_res_needs_flush_wait
  • +
  • virgl: make winsys modifications for encoded transfers
  • +
  • virgl: add encoder functions for new protocol
  • +
  • virgl: introduce transfer queue
  • +
  • virgl: use transfer queue
  • +
  • virgl: use virgl_transfer_inline_write even less
  • +
  • virgl/vtest: deprecate protocol version 1
  • +
  • egl/sl: also allow virtgpu to fallback to kms_swrast
  • +
  • virgl: use uint16_t mask instead of separate booleans
  • +
  • configure.ac / meson: depend on libnativewindow when appropriate
  • +
  • anv: move anv_GetMemoryAndroidHardwareBufferANDROID up a bit
  • +
  • anv: fix build on Nougat
  • +
  • egl/android: move droid_image_loader_extension down a bit
  • +
  • egl/android: move droid_open_device_drm_gralloc down a bit
  • +
  • egl/android: droid_open_device_drm_gralloc --> droid_open_device
  • +
  • egl/android: refactor droid_load_driver a bit
  • +
  • egl/android: plumb swrast option
  • +
  • egl/android: use swrast option in droid_load_driver
  • +
  • egl/android: use software rendering when appropriate
  • +
  • egl/android: chose node type based on swrast and preprocessor flags
  • +
  • virgl: wait after a flush
  • +
  • virgl/vtest: execute a transfer_get when flushing the front buffer
  • +
  • virgl/vtest: add utilities for receiving fds
  • +
  • virgl/vtest: plumb support for shared memory
  • +
  • virgl/vtest: receive and handle shared memory fd
  • +
  • virgl/vtest: modify sending and receiving data for shared memory
  • +
  • virgl/vtest: wait after issuing a transfer get
  • +
  • virgl/vtest: bump up protocol version + support encoded transfers
  • +
+ +

Guttula, Suresh (1):

+
    +
  • st/va:Add support for indirect manner by returning VA_STATUS_ERROR_OPERATION_FAILED
  • +
+ +

Hal Gentz (1):

+
    +
  • glx: Fix synthetic error generation in __glXSendError
  • +
+ +

Heinrich (1):

+
    +
  • gbm: Improve documentation of BO import
  • +
+ +

Iago Toral Quiroga (39):

+
    +
  • compiler/nir: add an is_conversion field to nir_op_info
  • +
  • compiler/nir: add lowering option for 16-bit fmod
  • +
  • compiler/nir: add lowering for 16-bit flrp
  • +
  • compiler/nir: add lowering for 16-bit ldexp
  • +
  • intel/compiler: add a NIR pass to lower conversions
  • +
  • intel/compiler: split float to 64-bit opcodes from int to 64-bit
  • +
  • intel/compiler: handle b2i/b2f with other integer conversion opcodes
  • +
  • intel/compiler: assert restrictions on conversions to half-float
  • +
  • intel/compiler: lower some 16-bit float operations to 32-bit
  • +
  • intel/compiler: handle extended math restrictions for half-float
  • +
  • intel/compiler: implement 16-bit fsign
  • +
  • intel/compiler: drop unnecessary temporary from 32-bit fsign implementation
  • +
  • intel/compiler: add instruction setters for Src1Type and Src2Type.
  • +
  • intel/compiler: add new half-float register type for 3-src instructions
  • +
  • intel/compiler: don't compact 3-src instructions with Src1Type or Src2Type bits
  • +
  • intel/compiler: allow half-float on 3-source instructions since gen8
  • +
  • intel/compiler: set correct precision fields for 3-source float instructions
  • +
  • intel/compiler: fix ddx and ddy for 16-bit float
  • +
  • intel/compiler: fix ddy for half-float in Broadwell
  • +
  • intel/compiler: workaround for SIMD8 half-float MAD in gen8
  • +
  • intel/compiler: split is_partial_write() into two variants
  • +
  • intel/compiler: activate 16-bit bit-size lowerings also for 8-bit
  • +
  • intel/compiler: rework conversion opcodes
  • +
  • intel/compiler: ask for an integer type if requesting an 8-bit type
  • +
  • intel/eu: force stride of 2 on NULL register for Byte instructions
  • +
  • intel/compiler: generalize the combine constants pass
  • +
  • intel/compiler: implement is_zero, is_one, is_negative_one for 8-bit/16-bit
  • +
  • intel/compiler: add a brw_reg_type_is_integer helper
  • +
  • intel/compiler: fix cmod propagation for non 32-bit types
  • +
  • intel/compiler: remove inexact algebraic optimizations from the backend
  • +
  • intel/compiler: skip MAD algebraic optimization for half-float or mixed mode
  • +
  • intel/compiler: implement SIMD16 restrictions for mixed-float instructions
  • +
  • intel/compiler: also set F execution type for mixed float mode in BDW
  • +
  • intel/compiler: validate region restrictions for half-float conversions
  • +
  • intel/compiler: validate conversions between 64-bit and 8-bit types
  • +
  • intel/compiler: validate region restrictions for mixed float mode
  • +
  • compiler/spirv: move the check for Int8 capability
  • +
  • anv/pipeline: support Float16 and Int8 SPIR-V capabilities in gen8+
  • +
  • anv/device: expose VK_KHR_shader_float16_int8 in gen8+
  • +
+ +

Ian Romanick (55):

+
    +
  • nir: Silence zillions of unused parameter warnings in release builds
  • +
  • intel/compiler: Silence warning about value that may be used uninitialized
  • +
  • nir: Document some fields of nir_loop_terminator
  • +
  • nir: Refactor code that checks phi nodes in opt_peel_loop_initial_if
  • +
  • nir: Select phi nodes using prev_block instead of continue_block
  • +
  • nir: Split ALU instructions in loops that read phis
  • +
  • nir: Convert a bcsel with only phi node sources to a phi node
  • +
  • spirv: Add missing break
  • +
  • nir/algebraic: Convert some f2u to f2i
  • +
  • nir/algebraic: Simplify comparison with sequential integers starting with 0
  • +
  • intel/vec4: Emit constants for some ALU sources as immediate values
  • +
  • nir/algebraic: Replace i2b used by bcsel or if-statement with comparison
  • +
  • intel/fs: Relax type matching rules in cmod propagation from MOV instructions
  • +
  • intel/fs: Handle OR source modifiers in algebraic optimization
  • +
  • intel/fs: Refactor ALU source and destination handling to a separate function
  • +
  • intel/fs: Emit logical-not of operands on Gen8+
  • +
  • intel/fs: Use De Morgan's laws to avoid logical-not of a logic result on Gen8+
  • +
  • intel/fs: Emit better code for b2f(inot(a)) and b2i(inot(a))
  • +
  • nir/algebraic: Replace a bcsel of a b2f sources with a b2f(!(a || b))
  • +
  • intel/fs: Generate if instructions with inverted conditions
  • +
  • nir/algebraic: Replace a-fract(a) with floor(a)
  • +
  • intel/fs: Don't assert on b2f with a saturate modifier
  • +
  • nir/algebraic: Optimize away an fsat of a b2f
  • +
  • intel/compiler: Silence many unused parameter warnings in brw_eu.h
  • +
  • intel/compiler: Silence unused parameter warning in brw_interpolation_map.c
  • +
  • intel/fs: nir_op_extract_i8 extracts a byte, not a word
  • +
  • intel/fs: Fix extract_u8 of an odd byte from a 64-bit integer
  • +
  • nir/algebraic: Fix up extract_[iu]8 after loop unrolling
  • +
  • nir/algebraic: Remove redundant extract_[iu]8 patterns
  • +
  • nir/algebraic: Add missing 64-bit extract_[iu]8 patterns
  • +
  • nir/algebraic: Add missing 16-bit extract_[iu]8 patterns
  • +
  • nir/algebraic: Fix up extract_[iu]8 after loop unrolling
  • +
  • nir/algebraic: Remove redundant extract_[iu]8 patterns
  • +
  • nir/algebraic: Add missing 64-bit extract_[iu]8 patterns
  • +
  • nir/algebraic: Add missing 16-bit extract_[iu]8 patterns
  • +
  • nir: Add nir_const_value_negative_equal
  • +
  • nir: Add nir_alu_srcs_negative_equal
  • +
  • nir: Add partial redundancy elimination for compares
  • +
  • intel/compiler: Use partial redundancy elimination for compares
  • +
  • intel/fs: Eliminate dead code first
  • +
  • intel/fs: Refactor code generation for nir_op_fsign to its own function
  • +
  • intel/fs: Add a scale factor to emit_fsign
  • +
  • intel/fs: Generate better code for fsign multiplied by a value
  • +
  • nir/algebraic: Recognize open-coded copysign(1.0, a)
  • +
  • nir/algebraic: Replace a pattern where iand with a Boolean is used as a bcsel
  • +
  • nir/algebraic: Fix some 1-bit Boolean weirdness
  • +
  • nir/algebraic: Strength reduce some compares of x and -x
  • +
  • intel/fs: Add support for float16 to the fsign optimizations
  • +
  • glsl: Silence may unused parameter warnings in glsl/ir.h
  • +
  • intel/compiler: Don't have sepearate, per-Gen nir_options
  • +
  • intel/compiler: Lower ffma on Gen4 and Gen5
  • +
  • intel/fs: Fix D to W conversion in opt_combine_constants
  • +
  • mesa: Add missing display list support for GL_FOG_COORDINATE_SOURCE
  • +
  • nir: Saturating integer arithmetic is not associative
  • +
  • Revert "nir: add late opt to turn inot/b2f combos back to bcsel"
  • +
+ +

Icenowy Zheng (5):

+
    +
  • lima: add dummy set_sample_mask function
  • +
  • lima: make lima_context_framebuffer subtype of pipe_framebuffer_state
  • +
  • lima: implement blit with util_blitter
  • +
  • lima: lower bool to float when building shaders
  • +
  • lima: add Android build
  • +
+ +

Ilia Mirkin (14):

+
    +
  • nv50,nvc0: add explicit settings for recent caps
  • +
  • nvc0: add support for handling indirect draws with attrib conversion
  • +
  • nvc0/ir: always use CG mode for loads from atomic-only buffers
  • +
  • nvc0/ir: fix second tex argument after levelZero optimization
  • +
  • nvc0: fix 3d images on kepler
  • +
  • nv50,nvc0: use condition for occlusion queries when already complete
  • +
  • nvc0: stick zero values for the compute invocation counts
  • +
  • nvc0: we have 16k-sized framebuffers, fix default scissors
  • +
  • swr: set PIPE_CAP_MAX_VARYINGS correctly
  • +
  • mesa: add explicit enable for EXT_float_blend, and error condition
  • +
  • st/mesa: enable GL_EXT_float_blend when possible
  • +
  • i965: always enable EXT_float_blend
  • +
  • nv50: disable compute
  • +
  • glsl: fix recording of variables for XFB in TCS shaders
  • +
+ +

Illia Iorin (1):

+
    +
  • mesa/main: Fix multisample texture initialize
  • +
+ +

James Zhu (12):

+
    +
  • gallium/auxiliary/vl: Move dirty define to header file
  • +
  • gallium/auxiliary/vl: Split vl_compositor graphic shaders from vl_compositor API
  • +
  • gallium/auxiliary/vl: Rename csc_matrix and increase its size.
  • +
  • gallium/auxiliary/vl: Add compute shader to support video compositor render
  • +
  • gallium/auxiliary/vl: Add video compositor compute shader render
  • +
  • gallium/auxiliary/vl: Fix transparent issue on compute shader with rgba
  • +
  • gallium/auxiliary/vl: Increase shader_params size
  • +
  • gallium/auxiliary/vl: Change grid setting
  • +
  • gallium/auxiliary/vl: Change weave compute shader implementation
  • +
  • gallium/auxiliary/vl: Fixed blur issue with weave compute shader
  • +
  • gallium/auxiliary/vl: Fixed blank issue with compute shader
  • +
  • gallium/auxiliary/vl: Add barrier/unbind after compute shader launch.
  • +
+ +

Jan Vesely (2):

+
    +
  • Partially revert "gallium: fix autotools build of pipe_msm.la"
  • +
  • gallium/aux: Report error if loading of a pipe driver fails.
  • +
+ +

Jan Zielinski (1):

+
    +
  • swr/rast: fix 32-bit compilation on Linux
  • +
+ +

Jason Ekstrand (212):

+
    +
  • spirv: Replace vtn_constant_value with vtn_constant_uint
  • +
  • spirv: Rework handling of spec constant workgroup size built-ins
  • +
  • spirv: Handle constants and types before execution modes
  • +
  • spirv: Handle OpExecutionModeId
  • +
  • spirv: Support LocalSizeId and LocalSizeHintId execution modes
  • +
  • intel/nir: Add global support to lower_mem_access_bit_sizes
  • +
  • intel/fs/cse: Split create_copy_instr into three cases
  • +
  • intel/fs: Properly handle 64-bit types in LOAD_PAYLOAD
  • +
  • intel/fs: Do the grf127 hack on SIMD8 instructions in SIMD16 mode
  • +
  • intel/fs: Implement load/store_global with A64 untyped messages
  • +
  • intel/fs: Use SENDS for A64 writes on gen9+
  • +
  • intel/fs: Implement nir_intrinsic_global_atomic_*
  • +
  • anv: Implement VK_EXT_buffer_device_address
  • +
  • relnotes: Add VK_EXT_buffer_device_address
  • +
  • nir/deref: Drop zero ptr_as_array derefs
  • +
  • README: Drop the badges from the readme
  • +
  • intel/fs: Use enumerated array assignments in fb read TXF setup
  • +
  • nir/deref: Rematerialize parents in rematerialize_derefs_in_use_blocks
  • +
  • nir: Silence a couple of warnings in release builds
  • +
  • anv/blorp: Delete a pointless assert
  • +
  • anv: Silence some compiler warnings in release builds
  • +
  • intel/fs: Silence a compiler warning
  • +
  • intel/fs: Bail in optimize_extract_to_float if we have modifiers
  • +
  • nir/dead_cf: Inline cf_node_has_side_effects
  • +
  • nir/dead_cf: Stop relying on liveness analysis
  • +
  • compiler/types: Add a contains_64bit helper
  • +
  • nir/xfb: Properly align 64-bit values
  • +
  • nir: Rewrite lower_clip_cull_distance_arrays to do a lot less lowering
  • +
  • nir/xfb: Work in terms of components rather than slots
  • +
  • nir/xfb: Handle compact arrays in gather_xfb_info
  • +
  • nir: Fix a compile warning
  • +
  • nir/lower_clip_cull: Fix an incorrect assert
  • +
  • iris: Don't lower image formats for write-only images
  • +
  • iris/compute: Don't increment the grid size offset
  • +
  • iris/compute: Zero out the last grid size on indirect dispatches
  • +
  • iris: Configure the L3$ on the compute context
  • +
  • iris: Don't set constant read lengths at upload time
  • +
  • iris: Allocate buffer resources separately
  • +
  • iris: Copy anv's MI_MATH helpers for multiplication and division
  • +
  • nir/split_vars: Don't compact vectors unnecessarily
  • +
  • nir/builder: Don't emit no-op swizzles
  • +
  • intel/eu: Add an EOT parameter to send_indirect_[split]_message
  • +
  • intel/fs: Add an enum type for logical sampler inst sources
  • +
  • intel/fs: Re-order logical surface arguments
  • +
  • intel/fs: Drop the fs_surface_builder
  • +
  • intel/vec4: Drop dead code for handling typed surface messages
  • +
  • intel/fs: Get rid of the IMAGE_SIZE opcode
  • +
  • intel/compiler: Drop unused surface opcodes
  • +
  • intel/schedule_instructions: Move some comments
  • +
  • intel/compiler: Re-prefix non-logical surface opcodes with VEC4
  • +
  • anv: Count surfaces for non-YCbCr images in GetDescriptorSetLayoutSupport
  • +
  • spirv: OpImageQueryLod requires a sampler
  • +
  • intel,nir: Lower TXD with min_lod when the sampler index is not < 16
  • +
  • anv: Use an actual binding for gl_NumWorkgroups
  • +
  • anv/pipeline: Drop anv_fill_binding_table
  • +
  • anv/descriptor_set: Refactor alloc/free of descriptor sets
  • +
  • anv: Rework arguments to anv_descriptor_set_write_*
  • +
  • anv: Stop allocating buffer views for dynamic buffers
  • +
  • anv: Count image param entries rather than images
  • +
  • anv: Clean up descriptor set layouts
  • +
  • anv: drop add_var_binding from anv_nir_apply_pipeline_layout.c
  • +
  • anv: Refactor descriptor pushing a bit
  • +
  • anv: Take references to push descriptor set layouts
  • +
  • anv: Add a concept of a descriptor buffer
  • +
  • spirv: Pull offset/stride from the pointer for OpArrayLength
  • +
  • spirv: Use the generic dereference function for OpArrayLength
  • +
  • spirv: Use the same types for resource indices as pointers
  • +
  • anv: Implement VK_EXT_inline_uniform_block
  • +
  • nir: Expose double and int64 op_to_options_mask helpers
  • +
  • nir: Teach loop unrolling about 64-bit instruction lowering
  • +
  • i965: Compile the fp64 program based on nir options
  • +
  • intel/debug: Add a debug flag to force software fp64
  • +
  • intel/nir: Drop an unneeded lower_constant_initializers call
  • +
  • glsl/nir: Add a shared helper for building float64 shaders
  • +
  • glsl/nir: Inline functions in float64_funcs_to_nir
  • +
  • nir/inline_functions: Break inlining into a builder helper
  • +
  • nir/deref: Expose nir_opt_deref_impl
  • +
  • nir/lower_doubles: Inline functions directly in lower_doubles
  • +
  • intel/nir: Move 64-bit lowering later
  • +
  • st/nir: Move 64-bit lowering later
  • +
  • nir/builder: Emit better code for iadd/imul_imm
  • +
  • nir/builder: Cast array indices in build_deref_follower
  • +
  • nir/builder: Add a build_deref_array_imm helper
  • +
  • intel/nir: Move lower_mem_access_bit_sizes to postprocess_nir
  • +
  • anv/pipeline: Move lower_explicit_io much later
  • +
  • nir: Add a pass for lowering IO back to vector when possible
  • +
  • intel/nir: Vectorize all IO
  • +
  • anv: Ignore VkRenderPassInputAttachementAspectCreateInfo
  • +
  • nir/loop_unroll: Fix out-of-bounds access handling
  • +
  • glsl/list: Add a list variant of insert_after
  • +
  • glsl/lower_vector_derefs: Don't use a temporary for TCS outputs
  • +
  • anv: Stop using VK_TRUE/FALSE
  • +
  • anv/pass: Flag the need for a RT flush for resolve attachments
  • +
  • anv: Only set 3DSTATE_PS::VectorMaskEnable on gen8+
  • +
  • nir/algebraic: Add a couple optimizations for iabs and ishr
  • +
  • nir/validate: Only require bare types to match for copy_deref
  • +
  • nir/validate: Allow 32-bit boolean load/store intrinsics
  • +
  • compiler/types: Add a new is_interface C wrapper
  • +
  • compiler/types: Add a C wrapper to get full struct field data
  • +
  • compiler/types: Add helpers to get explicit types for standard layouts
  • +
  • nir/deref: Consider COHERENT decorated var derefs as aliasing
  • +
  • nir: Rename nir_address_format_vk_index_offset to not be vk
  • +
  • nir/lower_io: Add a new buffer_array_length intrinsic and lowering
  • +
  • glsl: Don't lower vector derefs for SSBOs, UBOs, and shared
  • +
  • glsl/nir: Set explicit types on UBO/SSBO variables
  • +
  • glsl/nir: Handle unlowered SSBO atomic and array_length intrinsics
  • +
  • glsl/nir: Add a pass to lower UBO and SSBO access
  • +
  • i965: Stop setting LowerBuferInterfaceBlocks
  • +
  • st/mesa: Let NIR lower UBO and SSBO access when we have it
  • +
  • nir/builder: Add a vector extract helper
  • +
  • nir: Add a new pass to lower array dereferences on vectors
  • +
  • intel/nir: Lower array-deref-of-vector UBO and SSBO loads
  • +
  • anv: Implement VK_EXT_host_query_reset
  • +
  • anv,radv: Implement VK_KHR_surface_capability_protected
  • +
  • Revert "nir: const `nir_call_instr::callee`"
  • +
  • anv: Bump maxComputeWorkgroupInvocations
  • +
  • nir: Constant values are per-column not per-component
  • +
  • anv,radv,turnip: Lower TG4 offsets with nir_lower_tex
  • +
  • spirv: Drop inline tg4 lowering
  • +
  • nir/lower_io: Add a bounds-checked 64-bit global address format
  • +
  • nir: Add a lowering pass for non-uniform resource access
  • +
  • nir: Add texture sources and intrinsics for bindless
  • +
  • nir: Add access flags to deref and SSBO atomics
  • +
  • spirv: Handle the NonUniformEXT decoration
  • +
  • Revert "anv/radv: release memory allocated by glsl types during spirv_to_nir"
  • +
  • nir: Lock around validation fail shader dumping
  • +
  • nir/algebraic: Drop some @bool specifiers
  • +
  • nir/algebraic: Add some logical OR and AND patterns
  • +
  • vc4: Prefer nir_src_comp_as_uint over nir_src_as_const_value
  • +
  • nir/search: Search for all combinations of commutative ops
  • +
  • nir: Get rid of nir_register::is_packed
  • +
  • nir: Get rid of global registers
  • +
  • intel/common: Add a MI command builder
  • +
  • intel/common: Add unit tests for gen_mi_builder
  • +
  • anv: Use gen_mi_builder for CmdDrawIndirectByteCount
  • +
  • anv: Use gen_mi_builder for computing resolve predicates
  • +
  • anv: Use gen_mi_builder for indirect draw parameters
  • +
  • anv: Use gen_mi_builder for indirect dispatch
  • +
  • anv: Use gen_mi_builder for conditional rendering
  • +
  • anv: Use gen_mi_builder for queries
  • +
  • anv: Move mi_memcpy and mi_memset to gen_mi_builder
  • +
  • anv/cmd_buffer: Use gen_mi_sub instead of gen_mi_add with a negative
  • +
  • intel/common: Support bigger right-shifts with mi_builder
  • +
  • anv/pipeline: Fix MEDIA_VFE_STATE::PerThreadScratchSpace on gen7
  • +
  • nir: Add a pass for selectively lowering variables to scratch space
  • +
  • intel/nir: Take a nir_tex_instr and src index in brw_texture_offset
  • +
  • nir/builder: Add a nir_imm_zero helper
  • +
  • nir/print: Use nir_src_as_int for array indices
  • +
  • nir/constant_folding: Get rid of a bit size switch statement
  • +
  • spirv: Drop some unneeded bit size switch statements
  • +
  • nir/load_const_to_scalar: Get rid of a bit size switch statement
  • +
  • nir/validate: Require unused bits of nir_const_value to be zero
  • +
  • vulkan: Update the XML and headers to 1.1.106
  • +
  • anv: Update to use the new features struct names
  • +
  • nir/algebraic: Move the template closer to the render function
  • +
  • nir/algebraic: Use a cache to avoid re-emitting structs
  • +
  • intel/mi_builder: Re-order an initializer
  • +
  • intel/mi_builder: Disable mem_mem tests on IVB
  • +
  • nir: Drop "struct" from some nir_* declarations
  • +
  • nir: Rework nir_src_as_alu_instr to not take a pointer
  • +
  • nir: Add a nir_src_as_intrinsic() helper
  • +
  • anv: Re-sort the GetPhysicalDeviceFeatures2 switch statement
  • +
  • anv: Drop some unneeded ANV_FROM_HANDLE for physical devices
  • +
  • intel/fs: Account for live range lengths in spill costs
  • +
  • anv: Make all VkDeviceMemory BOs resident permanently
  • +
  • anv: Put image params in the descriptor set buffer on gen8 and earlier
  • +
  • anv: Add a #define for the max binding table size
  • +
  • anv/pipeline: Sort bindings by most used first
  • +
  • anv/pipeline: Add skeleton support for spilling to bindless
  • +
  • nir/lower_io: Expose some explicit I/O lowering helpers
  • +
  • intel/nir: Re-run int64 lowering in postprocess_nir
  • +
  • anv: Add a has_a64_buffer_access to anv_physical_device
  • +
  • anv: Lower some SSBO operations in apply_pipeline_layout
  • +
  • anv: Implement SSBOs bindings with GPU addresses in the descriptor BO
  • +
  • anv: Implement VK_KHR_shader_atomic_int64
  • +
  • intel,nir: Lower TXD with a bindless sampler
  • +
  • intel/fs: Add support for bindless texture ops
  • +
  • anv: Count the number of planes in each descriptor binding
  • +
  • anv: Use write_image_view to initialize immutable samplers
  • +
  • anv: Pass the plane into lower_tex_deref
  • +
  • anv: Use bindless textures and samplers
  • +
  • intel/fs: Add support for bindless image load/store/atomic
  • +
  • anv: Use bindless handles for images
  • +
  • anv: Put binding flags in descriptor set layouts
  • +
  • anv: Implement VK_EXT_descriptor_indexing
  • +
  • nir: Add helpers for getting the type of an address format
  • +
  • anv/nir: Add a central helper for figuring out SSBO address formats
  • +
  • anv: Ignore descriptor binding flags if bindingCount == 0
  • +
  • anv: Rework the descriptor set layout create loop
  • +
  • anv,radv: Update release notes for newly implemented extensiosn
  • +
  • nir: Use the NIR_SRC_AS_ macro to define nir_src_as_deref
  • +
  • anv/descriptor_set: Unlink sets from the pool in set_destroy
  • +
  • anv/descriptor_set: Destroy sets before pool finalization
  • +
  • anv/descriptor_set: Only vma_heap_finish if we have a descriptor buffer
  • +
  • anv/descriptor_set: Properly align descriptor buffer to a page
  • +
  • anv: Better handle 32-byte alignment of descriptor set buffers
  • +
  • anv/descriptor_set: Don't fully destroy sets in pool destroy/reset
  • +
  • nir/algebraic: Optimize integer cast-of-cast
  • +
  • util/bitset: Return an actual bool from test macros
  • +
  • anv: Stop including POS in FS input limits
  • +
  • anv,i965: Stop warning about incomplete gen11 support
  • +
  • nir: Add a SSA type gathering pass
  • +
  • intel/fs/ra: Only add dest interference to sources that exist
  • +
  • intel/fs/ra: Stop adding RA interference to too many SENDS nodes
  • +
  • anv: Emulate texture swizzle in the shader when needed
  • +
  • anv: Stop forcing bindless for images
  • +
  • anv: Only consider minSampleShading when sampleShadingEnable is set
  • +
  • iris: Don't assume UBO indices are constant
  • +
  • intel/fs,vec4: Use g0 as the header for MFENCE
  • +
  • intel/fs: Do a stalling MFENCE in endInvocationInterlock()
  • +
  • nir/dead_cf: Call instructions aren't dead
  • +
  • nir/propagate_invariant: Don't add NULL vars to the hash table
  • +
+ +

Jian-Hong Pan (1):

+
    +
  • intel: Fix the description of Coffeelake pci-id 0x3E98
  • +
+ +

Jiang, Sonny (1):

+
    +
  • va: use a compute shader for the blit
  • +
+ +

John Stultz (3):

+
    +
  • mesa: android: freedreno: Fix build failure due to path change
  • +
  • mesa: Makefile.sources: Add ir3_nir_lower_load_barycentric_at_sample/offset to Makefile.sources
  • +
  • mesa: Makefile.sources: Add nir_lower_fb_read.c to Makefile.sources list
  • +
+ +

Jon Turney (1):

+
    +
  • meson: Force '.so' extension for DRI drivers
  • +
+ +

Jonathan Marek (22):

+
    +
  • nir: add missing vec opcodes in lower_bool_to_float
  • +
  • freedreno: a2xx: fix fast clear
  • +
  • freedreno: a2xx: don't write 4th vertex in mem2gmem
  • +
  • freedreno: a2xx: add use_hw_binning function
  • +
  • freedreno: a2xx: fix fast clear for some gmem configurations
  • +
  • freedreno: a2xx: fix mipmapping for NPOT textures
  • +
  • freedreno: use renderonly path for buffers allocated with modifiers
  • +
  • freedreno: catch failing fd_blit and fallback to software blit
  • +
  • mesa: add GL_AMD_compressed_ATC_texture support
  • +
  • gallium: add ATC format support
  • +
  • llvmpipe, softpipe: no support for ATC textures
  • +
  • st/mesa: add ATC support
  • +
  • freedreno: a3xx: add GL_AMD_compressed_ATC_texture support
  • +
  • freedreno: a2xx: add GL_AMD_compressed_ATC_texture support
  • +
  • svga: add new ATC formats to the format conversion table
  • +
  • freedreno: a2xx: fix builtin blit program compilation
  • +
  • freedreno: a2xx: disable PIPE_CAP_PACKED_UNIFORMS
  • +
  • freedreno: a2xx: use nir_lower_io for TGSI shaders
  • +
  • freedreno: a2xx: enable batch reordering
  • +
  • freedreno: a2xx: same gmem2mem sequence for all tiles
  • +
  • nir: improve convert_yuv_to_rgb
  • +
  • freedreno/ir3: fix input ncomp for vertex shaders
  • +
+ +

Jordan Justen (22):

+
    +
  • iris: Set num_uniforms in bytes
  • +
  • iris/compute: Set mask bits on PIPELINE_SELECT
  • +
  • iris: Add IRIS_DIRTY_CONSTANTS_CS
  • +
  • iris: Add iris_restore_compute_saved_bos
  • +
  • iris/compute: Add MEDIA_STATE_FLUSH following WALKER
  • +
  • iris/compute: Flush compute batches
  • +
  • iris/compute: Get group counts from grid->grid
  • +
  • iris/program: Don't try to push ubo ranges for compute
  • +
  • iris/compute: Wait on compute batch when mapping
  • +
  • iris/compute: Provide binding table entry for gl_NumWorkGroups
  • +
  • iris/compute: Flush compute batch on memory-barriers
  • +
  • iris/compute: Push subgroup-id
  • +
  • iris/compute: Support indirect compute dispatch
  • +
  • iris: Emit default L3 config for the render pipeline
  • +
  • genxml/gen_bits_header.py: Use regex to strip no alphanum chars
  • +
  • genxml: Remove extra space in gen4/45/5 field name
  • +
  • iris: Add gitlab-ci build testing
  • +
  • iris: Always use in-tree i915_drm.h
  • +
  • nir: Add int64/doubles options into nir_shader_compiler_options
  • +
  • intel/compiler: Move int64/doubles lowering options
  • +
  • scons: Generate float64_glsl.h for glsl_to_nir fp64 lowering
  • +
  • intel/genxml: Support base-16 in value & start fields in gen_sort_tags.py
  • +
+ +

Jose Maria Casanova Crespo (4):

+
    +
  • iris: Enable ARB_shader_draw_parameters support
  • +
  • glsl: fix typos in comments "transfor" -> "transform"
  • +
  • glsl: TCS outputs can not be transform feedback candidates on GLES
  • +
  • iris: setup EdgeFlag Vertex Element when needed.
  • +
+ +

José Fonseca (1):

+
    +
  • scons: Workaround failures with MSVC when using SCons 3.0.[2-4].
  • +
+ +

Juan A. Suarez Romero (22):

+
    +
  • anv/cmd_buffer: check for NULL framebuffer
  • +
  • nir: move ALU instruction before the jump instruction
  • +
  • nir: remove jump from two merging jump-ending blocks
  • +
  • genxml: add missing field values for 3DSTATE_SF
  • +
  • anv: advertise 8 subpixel precision bits
  • +
  • nir/spirv: return after emitting a branch in block
  • +
  • anv: destroy descriptor sets when pool gets reset
  • +
  • nir: deref only for OpTypePointer
  • +
  • anv: advertise 8 subtexel/mipmap precision bits
  • +
  • nir/xfb: do not use bare interface type
  • +
  • meson: Add dependency on genxml to anvil genfiles
  • +
  • Revert "intel/compiler: split is_partial_write() into two variants"
  • +
  • spirv: add missing SPV_EXT_descriptor_indexing capabilities
  • +
  • radv: enable descriptor indexing capabilities
  • +
  • anv: enable descriptor indexing capabilities
  • +
  • Update version to 19.1.0-rc1
  • +
  • Update version to 19.1.0-rc2
  • +
  • cherry-ignore: radeonsi: update buffer descriptors in all contexts after buffer invalidation
  • +
  • Update version to 19.1.0-rc3
  • +
  • Update version to 19.1.0-rc4
  • +
  • Update version to 19.1.0-rc5
  • +
  • Update version to 19.1.0
  • +
+ +

Julien Isorce (5):

+
    +
  • gallium: add resource_get_info to pipe_screen
  • +
  • radeonsi: implement resource_get_info
  • +
  • st/va: properly set stride and offset in vlVaDeriveImage
  • +
  • r600: implement resource_get_info
  • +
  • st/va: check resource_get_info nullity in vlVaDeriveImage
  • +
+ +

Józef Kucia (3):

+
    +
  • mesa: Fix GL_NUM_DEVICE_UUIDS_EXT
  • +
  • radv: Fix driverUUID
  • +
  • radv: clear vertex bindings while resetting command buffer
  • +
+ +

Karol Herbst (82):

+
    +
  • nvc0/ir: replace cvt instructions with add to improve shader performance
  • +
  • gk104/ir: Use the new rcp/rsq in library
  • +
  • gm107/ir: add fp64 rcp
  • +
  • gm107/ir: add fp64 rsq
  • +
  • gallium: add PIPE_CAP_MAX_VARYINGS
  • +
  • st/mesa: require RGBA2, RGB4, and RGBA4 to be renderable
  • +
  • glsl_type: initialize offset and location to -1 for glsl_struct_field
  • +
  • nir/opt_if: don't mark progress if nothing changes
  • +
  • clover: update ICD table to support everything up to 2.2
  • +
  • nir: replace magic numbers with M_PI
  • +
  • nir/spirv: improve parsing of the memory model
  • +
  • nir: add support for address bit sized system values
  • +
  • nir/vtn: add support for SpvBuiltInGlobalLinearId
  • +
  • nir/spirv: initial handling of OpenCL.std extension opcodes
  • +
  • prog_to_nir: fix write from vps to FOG
  • +
  • nvc0: print the shader type when dumping headers
  • +
  • nv50/ir: move common converter code in base class
  • +
  • nv50/ir: add lowering helper
  • +
  • nouveau: add support for nir
  • +
  • nouveau: fix nir and TGSI shader cache collision
  • +
  • nv50/ir/nir: run some passes to make the conversion easier
  • +
  • nv50/ir/nir: track defs and provide easy access functions
  • +
  • nv50/ir/nir: add nir type helper functions
  • +
  • nv50/ir/nir: run assignSlots
  • +
  • nv50/ir/nir: add loadFrom and storeTo helpler
  • +
  • nv50/ir/nir: parse NIR shader info
  • +
  • nv50/ir/nir: implement nir_load_const_instr
  • +
  • nv50/ir/nir: add skeleton for nir_intrinsic_instr
  • +
  • nv50/ir/nir: implement nir_alu_instr handling
  • +
  • nv50/ir/nir: implement nir_intrinsic_load_uniform
  • +
  • nv50/ir/nir: implement nir_intrinsic_store_(per_vertex_)output
  • +
  • nv50/ir/nir: implement load_(interpolated_)input/output
  • +
  • nv50/ir/nir: implement intrinsic_discard(_if)
  • +
  • nv50/ir/nir: implement loading system values
  • +
  • nv50/ir/nir: implement nir_ssa_undef_instr
  • +
  • nv50/ir/nir: implement nir_instr_type_tex
  • +
  • nv50/ir/nir: add skeleton getOperation for intrinsics
  • +
  • nv50/ir/nir: implement vote and ballot
  • +
  • nv50/ir/nir: implement variable indexing
  • +
  • nv50/ir/nir: implement geometry shader nir_intrinsics
  • +
  • nv50/ir/nir: implement nir_intrinsic_load_ubo
  • +
  • nv50/ir/nir: implement ssbo intrinsics
  • +
  • nv50/ir/nir: implement images
  • +
  • nv50/ir/nir: add memory barriers
  • +
  • nv50/ir/nir: implement load_per_vertex_output
  • +
  • nv50/ir/nir: implement intrinsic shader_clock
  • +
  • nv50/ir/nir: handle user clip planes for each emitted vertex
  • +
  • nv50ir/nir: move immediates before use
  • +
  • glsl: add packed for struct types
  • +
  • glsl: add cl_size and cl_alignment
  • +
  • nir/lower_locals_to_regs: cast array index to 32 bit
  • +
  • nir/spirv: handle kernel function parameters
  • +
  • nir/spirv: support physical pointers
  • +
  • nir: add support for gather offsets
  • +
  • nv50/ir/nir: support gather offsets
  • +
  • nir/lower_tex: Add support for tg4 offsets lowering
  • +
  • nir/print: fix printing the image_array intrinsic index
  • +
  • nir/validate: validate that tex deref sources are actually derefs
  • +
  • v3d: prefer using nir_src_comp_as_int over nir_src_as_const_value
  • +
  • panfrost/midgard: use nir_src_is_const and nir_src_as_uint
  • +
  • glsl/standalone: add GLES3.1 and GLES3.2 compatibility
  • +
  • nir: move brw_nir_rewrite_image_intrinsic into common code
  • +
  • glsl_to_nir: handle bindless textures
  • +
  • glsl/nir: fetch the type for images from the deref instruction
  • +
  • glsl/nir: add support for lowering bindless images_derefs
  • +
  • nv50/ir/nir: handle bindless texture
  • +
  • nv50/ir/nir: add support for bindless images
  • +
  • nvc0/nir: enable bindless texture
  • +
  • lima: add bool parameter to type_size function
  • +
  • amd/nir: some cleanups
  • +
  • radv: use nir constant helpers
  • +
  • intel/nir: use nir_src_is_const and nir_src_as_uint
  • +
  • freedreno/ir3: use nir_src_as_uint in a few places
  • +
  • lima: use nir_src_as_float
  • +
  • nir/builder: Move nir_imm_vec2 from blorp into the builder
  • +
  • nir/loop_analyze: use nir_const_value.b for boolean results, not u32
  • +
  • spirv: reduce array size in vtn_handle_constant
  • +
  • nir: make nir_const_value scalar
  • +
  • vtn: handle bitcast with pointer src/dest
  • +
  • nir: Add a nir_builder_alu variant which takes an array of components
  • +
  • nir: Add nir_op_vec helper
  • +
  • spirv/cl: support vload/vstore
  • +
+ +

Kasireddy, Vivek (3):

+
    +
  • nir/lower_tex: Add support for XYUV lowering
  • +
  • dri: Add XYUV8888 format
  • +
  • i965: Add support for sampling from XYUV images
  • +
+ +

Kenneth Graunke (872):

+
    +
  • st/mesa: Set pipe_image_view::shader_access in PBO readpixels.
  • +
  • st/nir: Move varying setup code to a helper function.
  • +
  • st/nir: Make new helpers for constructing built-in NIR shaders.
  • +
  • st/mesa: Add a NIR version of the drawpixels/bitmap VS copy shader.
  • +
  • st/mesa: Add NIR versions of the drawpixels Z/stencil fragment shaders.
  • +
  • st/mesa: Add NIR versions of the clear shaders.
  • +
  • st/mesa: Add a NIR version of the OES_draw_texture built-in shaders.
  • +
  • st/mesa: Add NIR versions of the PBO upload/download shaders.
  • +
  • program: Use u_bit_scan64 in prog_to_nir.
  • +
  • program: Extend prog_to_nir handle system values.
  • +
  • nir: Record info->fs.pixel_center_integer in lower_system_values
  • +
  • compiler: Mark clip/cull distance arrays as compact before lowering.
  • +
  • nir: Bail on clip/cull distance lowering if GLSL IR already did it.
  • +
  • nir: Avoid clip/cull distance lowering multiple times.
  • +
  • nir: Avoid splitting compact arrays into per-element variables.
  • +
  • st/nir: Call nir_lower_clip_cull_distance_arrays().
  • +
  • gallium: Add a PIPE_CAP_NIR_COMPACT_ARRAYS capability bit.
  • +
  • nouveau: Silence unhandled cap warnings
  • +
  • st/mesa: Limit GL_MAX_[NATIVE_]PROGRAM_PARAMETERS_ARB to 2048
  • +
  • glsl: Allow gl_nir_lower_samplers*() without a gl_shader_program
  • +
  • glsl: Don't look at sampler uniform storage for internal vars
  • +
  • i965: Call nir_lower_samplers for ARB programs.
  • +
  • st/nir: Pull sampler lowering into a helper function.
  • +
  • st/nir: Lower sampler derefs for builtin shaders.
  • +
  • st/nir: Use sampler derefs in built-in shaders.
  • +
  • program: Make prog_to_nir create texture/sampler derefs.
  • +
  • nir: Use sampler derefs in drawpixels and bitmap lowering.
  • +
  • nir: Gather texture bitmasks in gl_nir_lower_samplers_as_deref.
  • +
  • i965: Drop unnecessary 'and' with prog->SamplerUnits
  • +
  • i965: Use info->textures_used instead of prog->SamplersUsed.
  • +
  • mesa: Advertise EXT_float_blend in ES 3.0+ contexts.
  • +
  • anv: Put MOCS in the correct location
  • +
  • spirv: Eliminate dead input/output variables after translation.
  • +
  • nir: Don't reassociate add/mul chains containing only constants
  • +
  • compiler: Make is_64bit(GL_*) helper more broadly available
  • +
  • mesa: Align doubles to a 64-bit starting boundary, even if packing.
  • +
  • radeonsi: Go back to using llvm.pow intrinsic for nir_op_fpow
  • +
  • st/mesa: Copy VP TGSI tokens if they exist, even for NIR shaders.
  • +
  • nir: Don't forget if-uses in new nir_opt_dead_cf liveness check
  • +
  • iris: Initial commit of a new 'iris' driver for Intel Gen8+ GPUs.
  • +
  • iris: viewport state, sort of
  • +
  • iris: port over batchbuffer updates
  • +
  • iris: initial render state upload
  • +
  • iris: packing with valgrind.
  • +
  • iris: merge pack
  • +
  • iris: initial gpu state, merges
  • +
  • iris: RASTER + SF + some CLIP, fix DIRTY vs. NEW
  • +
  • iris: scissors
  • +
  • iris: SF_CLIP_VIEWPORT
  • +
  • iris: Surfaces!
  • +
  • iris: sampler views
  • +
  • iris: stipples and vertex elements
  • +
  • iris: framebuffers
  • +
  • iris: don't segfault on !old_cso
  • +
  • iris: fix SF_CL length
  • +
  • iris: a bit of depth
  • +
  • iris: some draw info, vbs, sample mask
  • +
  • iris: fix crash - CSO binding can be NULL (when destroying context)
  • +
  • iris: COLOR_CALC_STATE
  • +
  • iris: sampler states
  • +
  • iris: emit 3DSTATE_SAMPLER_STATE_POINTERS
  • +
  • iris: basic push constant alloc
  • +
  • iris: some program code
  • +
  • iris: linear resources
  • +
  • iris: maps
  • +
  • iris: shader debug log
  • +
  • iris: drop unused field
  • +
  • iris: make an ice->render_batch field
  • +
  • iris: disable execbuf for now
  • +
  • iris: delete iris_pipe.c, shuffle code around
  • +
  • iris: init the batch!
  • +
  • iris: fix/rework line stipple
  • +
  • iris: actually save VBs
  • +
  • iris: msaa sample count packing problems
  • +
  • iris: fix prim type
  • +
  • iris: fix bogus index buffer reference
  • +
  • iris: draw->restart_index is uninitialized if PR is not enabled
  • +
  • iris: parse INTEL_DEBUG
  • +
  • iris: reworks, FS compile pieces
  • +
  • iris: import program cache code
  • +
  • iris: do the FS...asserts because we don't lower uniforms yet
  • +
  • iris: lower io
  • +
  • iris: make iris_batch target a particular ring
  • +
  • iris: kill iris_new_batch
  • +
  • iris: move MAX defines to iris_batch.h
  • +
  • iris: bit of SBA code
  • +
  • iris: flag SBA updates when instruction BO changes
  • +
  • iris: try and have an iris address
  • +
  • iris: so, sba then.
  • +
  • iris: reference VB BOs
  • +
  • iris: VB addresses
  • +
  • iris: DEBUG=bat
  • +
  • iris: VB fixes
  • +
  • iris: actually APPEND commands, not stomp over the top and never incr
  • +
  • iris: actually flush the commands
  • +
  • iris: actually advance forward when emitting commands
  • +
  • iris: initialize dirty bits to ~0ull
  • +
  • iris: hack to stop crashing on samplers for now
  • +
  • iris: fix indentation
  • +
  • iris: fix assert
  • +
  • iris: fix VBs
  • +
  • iris: vertex packet fixes
  • +
  • iris: fix VF instancing length so we don't get garbage in batch
  • +
  • iris: 3DPRIMITIVE fields
  • +
  • iris: bind_state -> compute state
  • +
  • iris: scissor slots
  • +
  • iris: some shader bits
  • +
  • iris: promote iris_program_cache_item to iris_compiled_shader
  • +
  • iris: actually save derived state
  • +
  • iris: emit shader packets
  • +
  • iris: convert IRIS_DIRTY_* to #defines
  • +
  • iris: don't forget about TE
  • +
  • iris: reorganize commands to match brw
  • +
  • iris: initial gpu state
  • +
  • iris: WM.
  • +
  • iris: index buffer BO
  • +
  • iris: more comes from bits filled in
  • +
  • iris: drop const from prog data parameters
  • +
  • iris: softpin some things
  • +
  • iris: use vtbl to avoid multiple symbols, fix state base address
  • +
  • iris: fix SBA
  • +
  • iris: move key pop to state module
  • +
  • iris: bits of WM key
  • +
  • iris: shuffle comments
  • +
  • iris: no NEW_SBA
  • +
  • iris: rewrite program cache to use u_upload_mgr
  • +
  • iris: actually destroy the cache
  • +
  • iris: actually softpin at an address
  • +
  • iris: actually set KSP offsets
  • +
  • iris: URB configs.
  • +
  • iris: dummy constants
  • +
  • iris: blend state
  • +
  • iris: alpha testing in PSB
  • +
  • iris: basic SBE code
  • +
  • iris: warning fixes
  • +
  • iris: fix silly unused batch with addr macro
  • +
  • iris: render targets!
  • +
  • iris: don't do samplers for disabled stages
  • +
  • iris: smaller blend state
  • +
  • iris: actually pin the instruction cache buffers
  • +
  • iris: compctrl
  • +
  • iris: more sketchy SBE
  • +
  • iris: fix dmabuf retval comparisons
  • +
  • iris: more SF CL VPs
  • +
  • iris: catastrophic state pointer mistake
  • +
  • iris: fix extents
  • +
  • iris: write DISABLES are not write ENABLES...whoops
  • +
  • iris: sample mask...not 0.
  • +
  • iris: uniform bits...badly
  • +
  • iris: warn if execbuf fails
  • +
  • iris: NOOP pad batches correctly
  • +
  • iris: decode batches if they fail to submit
  • +
  • iris: enable a few more formats
  • +
  • iris: set strides on transfers
  • +
  • iris: stop adding 9 to our varyings
  • +
  • iris: bufmgr updates.
  • +
  • iris: some thinking about binding tables
  • +
  • iris: Soft-pin the universe
  • +
  • iris: fix icache memzone
  • +
  • iris: dump gtt offset in dump_validation_list
  • +
  • iris: Also set SUPPORTS_48B? Not sure if necessary.
  • +
  • iris: more uploaders
  • +
  • iris: rewrite to use memzones and not relocs
  • +
  • iris: set EXEC_OBJECT_WRITE
  • +
  • iris: include p_defines.h in iris_bufmgr.h
  • +
  • iris: binders
  • +
  • iris: hook up batch decoder
  • +
  • iris: binder fixes
  • +
  • iris: decoder fixes
  • +
  • iris: update vb BO handling now that we have softpin
  • +
  • iris: validation dumping improvements
  • +
  • iris: canonicalize addresses.
  • +
  • iris: delete more trash
  • +
  • iris: allocate SURFACE_STATEs up front and stop streaming them
  • +
  • iris: same treatment for sampler views
  • +
  • iris: assemble SAMPLER_STATE table at bind time
  • +
  • iris: fix a scissor bug
  • +
  • iris: SBA once at context creation, not per batch
  • +
  • iris: TES stash
  • +
  • iris: isv freeing fixes
  • +
  • iris: set sampler views
  • +
  • iris: decoder fixes
  • +
  • iris: better BT asserts
  • +
  • iris: increase allocator alignment
  • +
  • iris: fix index
  • +
  • iris: port bug fix from i965
  • +
  • iris: fixes from i965
  • +
  • iris: fixes
  • +
  • iris: crazy pipe control code
  • +
  • iris: bo reuse
  • +
  • iris: vma fixes - don't free binder address
  • +
  • iris: vma - fix assert
  • +
  • iris: better SBE
  • +
  • iris: fix texturing!
  • +
  • iris: Move get_command_space to iris_batch.c
  • +
  • iris: Defines for base addresses rather than numbers everywhere
  • +
  • iris: pull in newer comments
  • +
  • iris: copy over i965's cache tracking
  • +
  • iris: move bo_offset_from_sba
  • +
  • iris: bits of blorp code
  • +
  • iris: more blitting code to make readpixels work
  • +
  • iris: drop bogus binder free
  • +
  • iris: fix sampler view crashes
  • +
  • iris: more blorp
  • +
  • iris: fix blorp prog data crashes
  • +
  • iris: add INTEL_DEBUG=reemit
  • +
  • iris: drop the 48b printout, we never use anything else
  • +
  • iris: hacky flushing for now
  • +
  • iris: linear staging buffers - fast CPU access...
  • +
  • iris: make blorp pin the binder
  • +
  • iris: blorp URB
  • +
  • iris: no more drawing rectangle in blorp
  • +
  • iris: assert surf init
  • +
  • iris: some depth stuff :(
  • +
  • iris: bump GL version to 4.2
  • +
  • iris: uniforms for VS
  • +
  • iris: proper length for VE packet?
  • +
  • iris: proper # of uniforms
  • +
  • iris: properly reject formats, fixes RGB32 rendering with texture float
  • +
  • iris: blorp bug fixes
  • +
  • iris: delete growing code and just die for now
  • +
  • iris: just turn batch reset_and_clear_caches into reset
  • +
  • iris: chaining not growing
  • +
  • iris: caps
  • +
  • iris: fix batch chaining...
  • +
  • iris: fix decoding and undo testing code
  • +
  • iris: Lower the max number of decoded VBO lines
  • +
  • iris: fix whitespace
  • +
  • iris: fix 3DSTATE_VERTEX_ELEMENTS length
  • +
  • iris: more depth stuffs...
  • +
  • iris: fix VF INSTANCING length
  • +
  • iris: util_copy_framebuffer_state (ported from Rob's v3d patches)
  • +
  • iris: transfers
  • +
  • iris: flush always
  • +
  • iris: maybe slightly less boats uniforms
  • +
  • iris: fix constant packet length to match i965
  • +
  • iris: better ubo handling
  • +
  • iris: completely rewrite binder
  • +
  • iris: have more than one const_offset
  • +
  • iris: make surface states for cbufs
  • +
  • iris: fill out pull constant buffers
  • +
  • iris: fix pull bufs that aren't the first user upload
  • +
  • iris: use u_transfer helpers for now
  • +
  • iris: better VFI
  • +
  • iris: fix release builds
  • +
  • iris: drop assert for now
  • +
  • iris: disable __gen_validate_value in release mode
  • +
  • iris: allow mapped buffers during execution (faster)
  • +
  • iris: comment about reemitting and flushing
  • +
  • iris: state cleaning
  • +
  • iris: untested index buffer upload
  • +
  • iris: delete some pointless STATIC_ASSERTS
  • +
  • iris: untested SAMPLER_STATE pin BO fix
  • +
  • iris: put back the always flush - fixes some things :(
  • +
  • iris: save pointers to streamed state resources
  • +
  • iris: fix the validation list on new batches
  • +
  • iris: flag DIRTY_WM properly
  • +
  • iris: bindings dirty tracking
  • +
  • iris: some dirty fixes
  • +
  • iris: clear dirty
  • +
  • iris: plug leaks
  • +
  • iris: more leak fixes
  • +
  • iris: pc fixes
  • +
  • iris: remove 4 bytes of padding in iris_compiled_shader
  • +
  • iris: rzalloc iris_compiled_shader so memcmp works even if padding creeps in
  • +
  • iris: don't leak sampler state table resources
  • +
  • iris: don't leak keyboxes when searching for an existing program
  • +
  • iris: indentation
  • +
  • iris: use pipe resources not direct BOs
  • +
  • iris: clean up some warnings so I can see through the noise
  • +
  • iris: print binder utilization in INTEL_DEBUG=submit
  • +
  • iris: redo VB CSO a bit
  • +
  • iris: print refcounts in INTEL_DEBUG=submit
  • +
  • iris: support signed vertex buffer offsets
  • +
  • iris: fix major refcounting bug with resources
  • +
  • iris: fix caps so tests run again
  • +
  • iris: avoid crashing on unbound constant resources
  • +
  • iris: emit 3DSTATE_SBE_SWIZ
  • +
  • iris: max VP index
  • +
  • iris: fix viewport counts and settings
  • +
  • iris: fix num viewports to be based on programs
  • +
  • iris: fix VP iteration
  • +
  • iris: scissor count fixes
  • +
  • iris: actually init num_viewports
  • +
  • iris: print second batch size separately
  • +
  • iris: don't always flush
  • +
  • iris: Handle batch submission failure "better"
  • +
  • iris: bad inherited comments
  • +
  • iris: colorize batchbuffer failures to make them stand out
  • +
  • iris: iris - fix QWord aligned endings after batch chaining rework
  • +
  • iris: tidy comments about mirroring modes
  • +
  • iris: Disable unsupported mirror clamp modes
  • +
  • iris: fix fragcoord ytransform
  • +
  • iris: better boxing on maps
  • +
  • iris: clears
  • +
  • iris: rework DEBUG_REEMIT
  • +
  • iris: shader dirty bits
  • +
  • iris: clear fix
  • +
  • iris: fall back to u_generate_mipmap
  • +
  • iris: implement copy image
  • +
  • iris: lightmodel flat
  • +
  • iris: maybe-flush before blorp operations
  • +
  • iris: fix provoking vertex ordering
  • +
  • iris: larger polygon offset
  • +
  • iris: TES uniform fixes
  • +
  • iris: geometry shader support
  • +
  • iris: don't emit garbage 3DSTATE_VERTEX_BUFFERS when there aren't any
  • +
  • iris: fix 3DSTATE_VERTEX_ELEMENTS / VF_INSTANCING for 0 elements
  • +
  • iris: fix GS dispatch mode
  • +
  • iris: depth clears
  • +
  • iris: null surface for unbound textures
  • +
  • iris: state ref tuple
  • +
  • iris: don't include binder in surface VMA range
  • +
  • iris: border color memory zone :(
  • +
  • iris: implement border color, fix other sampler nonsense
  • +
  • iris: dead pointer
  • +
  • iris: just malloc one iris_genx_state instead of a bunch of oddball pieces
  • +
  • iris: SBE change stash
  • +
  • iris: fix zoffset asserts with 2DArray/Cube
  • +
  • iris: rename map->stride
  • +
  • iris: actually set cube bit properly
  • +
  • iris: keep DISCARD_RANGE
  • +
  • iris: actually handle array layers in blits
  • +
  • iris: comment out l/a/i/la
  • +
  • iris: fix clip flagging on fb changes
  • +
  • iris: fix depth bounds clamp enables
  • +
  • iris: don't crash on shader perf logs
  • +
  • iris: slab allocate transfers
  • +
  • iris: rearrange iris_resource.h
  • +
  • iris: Implement 3DSTATE_SO_DECL_LIST
  • +
  • iris: SO buffers
  • +
  • iris: streamout
  • +
  • iris: set even if no outputs
  • +
  • iris: bother setting program_string_id...
  • +
  • iris: fix SO_DECL_LIST
  • +
  • iris: actually pin the buffers
  • +
  • iris: fix sample mask for MSAA-off
  • +
  • iris: disable 6x MSAA support
  • +
  • iris: multislice transfer maps
  • +
  • iris: fix CC_VIEWPORT
  • +
  • iris: draw indirect support?
  • +
  • iris: save query type
  • +
  • iris: bits of multisample program key
  • +
  • iris: s/hwcso/state/g
  • +
  • iris: bind state helper function
  • +
  • iris: NOS mechanics
  • +
  • iris: record FS NOS
  • +
  • iris: fix crash
  • +
  • iris: fix sampler views of TBOs
  • +
  • iris: fix texture buffer stride
  • +
  • iris: TES program key inputs
  • +
  • iris: compile a TCS...don't bother with passthrough yet
  • +
  • iris: don't emit SO_BUFFERS and SO_DECL_LIST unless streamout is enabled
  • +
  • iris: vertex ID, instance ID
  • +
  • iris: fix SGVS when there are no valid vertex elements
  • +
  • iris: fill out MAX_PATCH_VERTICES
  • +
  • iris: assert about passthrough shaders to make this easier to detect
  • +
  • iris: fix EmitNoIndirect
  • +
  • iris: fix Z24
  • +
  • iris: reemit blend state for alpha test function changes
  • +
  • iris: point sprite enables
  • +
  • iris: hack around samples confusion
  • +
  • iris: fix blorp filters
  • +
  • iris: expose more things that we already support
  • +
  • iris: fix msaa flipping filters
  • +
  • iris: export get_shader_info
  • +
  • iris: implement set_shader_buffers
  • +
  • iris: emit binding table for atomic counters and SSBOs
  • +
  • iris: shorten loop
  • +
  • iris: unbind compiled shaders if none are present
  • +
  • iris: fix TBO alignment to match 965
  • +
  • iris: enable SSBOs
  • +
  • iris: fix SSBO indexing
  • +
  • iris: fix for disabling ssbos
  • +
  • iris: update bindings when changing programs
  • +
  • iris: drop unused bo parameter
  • +
  • iris: implement texture/memory barriers
  • +
  • iris: Don't reserve new binding table section unless things are dirty
  • +
  • iris: update a todo comment
  • +
  • iris: BIG OL' HACK for UBO updates
  • +
  • iris: enable texture gather
  • +
  • iris: Avoid croaking when trying to create FBO surfaces with bad formats
  • +
  • iris: fix GS output component limit
  • +
  • iris: drop pipe_shader_state
  • +
  • iris: fix sample mask
  • +
  • iris: cube arrays are cubes too
  • +
  • iris: we don't support textureGatherOffsets, need it lowered
  • +
  • iris: add minor comments
  • +
  • iris: comment everything
  • +
  • iris: sync bugfixes from brw_bufmgr
  • +
  • iris: remember to set bo->userptr
  • +
  • iris: rename ring to engine
  • +
  • iris: simplify batch len qword alignment
  • +
  • iris: get angry about execbuf failures
  • +
  • iris: fill out more caps
  • +
  • iris: depth or stencil fixes
  • +
  • iris: clear stencil
  • +
  • iris: actually emit stencil packets
  • +
  • iris: allow S8 as a stencil format
  • +
  • iris: WTF transfers
  • +
  • iris: use u_transfer_helper for depth stencil packing/unpacking
  • +
  • iris: drop stencil handling now that u_transfer_helper does it
  • +
  • iris: refcounting, who needs it?
  • +
  • iris: actually do stencil blits
  • +
  • iris: say no to more formats
  • +
  • iris: deal with Marek's new MSAA caps
  • +
  • iris: we can do multisample Z resolves
  • +
  • iris: Convert RGBX to RGBA for rendering.
  • +
  • iris: disallow RGB32 formats too
  • +
  • iris: Fix tiled memcpy for cubes...and for array slices
  • +
  • iris: blorp blit multiple slices
  • +
  • iris: assert depth is 1 in resource_copy_region
  • +
  • iris: call maybe_flush for each blorp operation
  • +
  • iris: implement ARB_clear_texture
  • +
  • iris: last VUE map NOS, handle > 16 FS inputs
  • +
  • iris: drop dead assignments
  • +
  • iris: drop pwrite
  • +
  • iris: port non-bucket alignment bugfix
  • +
  • iris: don't emit SBE all the time
  • +
  • iris: rename pipe to base
  • +
  • iris: Drop bogus sampler state saving
  • +
  • iris: move iris_shader_state from ice->shaders.state to ice->state.shaders
  • +
  • iris: Move things to iris_shader_state
  • +
  • iris: Move iris_sampler_view declaration to iris_resource.h
  • +
  • iris: track depth/stencil writes enabled
  • +
  • iris: use consistent copyright formatting
  • +
  • iris: Move cache tracking to iris_resolve.c
  • +
  • iris: proper cache tracking
  • +
  • iris: precompute hashes for cache tracking
  • +
  • iris: Reduce binder alignment from 64 to 32
  • +
  • iris: reenable R32G32B32 texture buffers
  • +
  • iris: z_res -> s_res
  • +
  • iris: implement get_sample_position
  • +
  • iris: fix line-aa-width
  • +
  • iris: try to hack around binder issue
  • +
  • iris: fix sampler state setting
  • +
  • iris: big old hack for tex-miplevel-selection
  • +
  • iris: use linear for 1D textures
  • +
  • iris: handle level/layer in direct maps
  • +
  • iris: fix crash when binding optional shader for the first time
  • +
  • iris: Skip primitive ID overrides if the shader wrote a custom value
  • +
  • iris: fix blend state memcpy
  • +
  • iris: new caps
  • +
  • iris: use Eric's new caps helper
  • +
  • iris: Allow inlining of require/get_command_space
  • +
  • iris: skip over whole function if dirty == 0
  • +
  • iris: don't unconditionally emit 3DSTATE_VF / 3DSTATE_VF_TOPOLOGY
  • +
  • iris: fix constant buffer 0 to be absolute
  • +
  • iris: set EXEC_OBJECT_CAPTURE on all driver internal buffers
  • +
  • iris: fix null FB and unbound tex surface state addresses
  • +
  • iris: Support multiple binder BOs, update Surface State Base Address
  • +
  • iris: fix SO offset writes for multiple streams
  • +
  • iris: update comments for multibinder
  • +
  • iris: move binder pinning outside the dirty == 0 check
  • +
  • iris: re-pin binding table contents if we didn't re-emit them
  • +
  • iris: enable ARB_enhanced_layouts
  • +
  • iris: refactor LRIs in context setup
  • +
  • iris: initialize "don't suck" bits, as Ben likes to call them
  • +
  • iris: totally untested icelake support
  • +
  • iris: refactor program CSO stuff
  • +
  • iris: silence const warning
  • +
  • iris: fix context restore of 3DSTATE_CONSTANT ranges
  • +
  • iris: properly re-pin stencil buffers
  • +
  • iris: delete bogus comment
  • +
  • iris: inherit the index buffer properly
  • +
  • iris: use 0 for TCS passthrough program string ID
  • +
  • iris: rw_bo for pipe controls
  • +
  • iris: LRM/SRM/SDI hooks
  • +
  • iris: initial query code
  • +
  • iris: gen10+ workarounds and break fix
  • +
  • iris: results write
  • +
  • iris: flush batch when asking for result via QBO
  • +
  • iris: fix random failures via CS stall...but why?
  • +
  • iris: gpr0 to bool
  • +
  • iris: play chicken with timer queries for now
  • +
  • iris: pipeline stats
  • +
  • iris: primitives generated query support
  • +
  • iris: drop explicit pinning
  • +
  • iris: timestamps
  • +
  • iris: ...and SO prims emitted queries
  • +
  • iris: glGet timestamps, more correct timestamps
  • +
  • iris: Need to | 1 when asking for timestamps
  • +
  • iris: 36-bit overflow fixes
  • +
  • iris: early return properly
  • +
  • iris: better query file comment
  • +
  • iris: magic number 36 -> #define
  • +
  • iris: Enable ARB_shader_vote
  • +
  • iris: just mark snapshots_landed from the CPU
  • +
  • iris: drop a bunch of pipe_sampler_state stuff we don't need
  • +
  • iris: vma_free bo->size, not bo_size
  • +
  • iris: don't mark contains_draw = false when chaining batches
  • +
  • iris: fix Z32_S8 depth sampling
  • +
  • iris: stencil texturing
  • +
  • iris: force persample interp cap
  • +
  • iris: pipe to scs -> iris_pipe.h
  • +
  • iris: inline stage_from_pipe to avoid unused warnings
  • +
  • iris: add gen11 to genX_call
  • +
  • iris: Allow PIPE_CONTROL with Stall at Scoreboard and RT flush
  • +
  • iris: rework format translation apis
  • +
  • iris: Use R/RG instead of I/L/A when sampling
  • +
  • iris: enable I/L formats
  • +
  • iris: X32_S8X24 :/
  • +
  • iris: set the binding table size
  • +
  • iris: lower storage image derefs
  • +
  • iris: implement set_shader_images hook
  • +
  • iris: bother with BTIs
  • +
  • iris: set image access correctly
  • +
  • iris: actually set image access
  • +
  • iris: null for non-existent cbufs
  • +
  • iris: move images next to textures in binding table
  • +
  • iris: advertise GL_ARB_shader_texture_image_samples
  • +
  • iris: Enable fb fetch
  • +
  • iris: initial compute caps
  • +
  • iris: yes
  • +
  • iris: drop dead format //'s
  • +
  • iris: drop XXX's about swizzling
  • +
  • iris: little bits of compute basics
  • +
  • iris: drop XXX that Jordan handled
  • +
  • iris: drop unnecessary #ifdefs
  • +
  • iris: leave XXX about unnecessary binding table uploads
  • +
  • iris: bail if SLM is needed
  • +
  • iris: fix whitespace
  • +
  • iris: XXX for compute state tracking :/
  • +
  • iris: rewrite grid surface handling
  • +
  • iris: better dirty checking
  • +
  • iris: don't let render/compute contexts stomp each other's dirty bits
  • +
  • iris: hack to avoid memorybarriers out the wazoo
  • +
  • iris: do PIPELINE_SELECT for render engine, add flushes, GLK hacks
  • +
  • iris: fix SBA flushing by refactoring code
  • +
  • iris: try and avoid pointless compute submissions
  • +
  • iris: fix UBOs with bindings that have an offset
  • +
  • iris: flag CC_VIEWPORT when changing num viewports
  • +
  • iris: fix SF_CLIP_VIEWPORT array indexing with multiple VPs
  • +
  • iris: Fix texture buffer / image buffer sizes.
  • +
  • iris: Clamp UBO and SSBO access to the actual BO size, for safety
  • +
  • iris: Move snapshots_landed to the front.
  • +
  • iris: Fix off by one in scissoring, empty scissors, default scissors
  • +
  • iris: Fall back to 1x1x1 null surface if no framebuffer supplied
  • +
  • iris: SO_DECL_LIST fix
  • +
  • iris: Fix refcounting of grid surface
  • +
  • iris: delete dead code
  • +
  • iris: fix overhead regression from "don't stomp each other's dirty bits"
  • +
  • iris: allow binding a null vertex buffer
  • +
  • iris: Flag constants dirty on program changes
  • +
  • iris: Disable a PIPE_CONTROL workaround on Icelake
  • +
  • iris: Enable ARB_shader_stencil_export
  • +
  • iris: Enable A8/A16_UNORM in an inefficient manner
  • +
  • iris: Drop B5G5R5X1 support
  • +
  • iris: Use at least 1x1 size for null FB surface state.
  • +
  • iris: Cross-link iris_batches so they can potentially flush each other
  • +
  • iris: cross batch flushing
  • +
  • iris: Don't leak the compute batch
  • +
  • iris: Actually create/destroy HW contexts
  • +
  • iris: Enable msaa_map transfer helpers
  • +
  • iris: tidy more warnings
  • +
  • iris: implement scratch space!
  • +
  • iris: Fix MSAA smooth points
  • +
  • iris: Fix TextureBarrier
  • +
  • iris: Fix multiple RTs with non-independent blending
  • +
  • iris: partial set_query_active_state
  • +
  • iris: Print the batch name when decoding
  • +
  • iris: Clone the NIR
  • +
  • iris: Defer cbuf0 upload to draw time
  • +
  • iris: drop unnecessary param[] setup from iris_setup_uniforms
  • +
  • iris: add param domain defines
  • +
  • iris: fill out params array with built-ins, like clip planes
  • +
  • iris: only bother with params if there are any...
  • +
  • iris: lower user clip planes
  • +
  • iris: hook up key stuff for clip plane lowering
  • +
  • iris: fix system value remapping
  • +
  • iris: dodge backend UCP lowering
  • +
  • iris: bypass params and do it ourselves
  • +
  • iris: actually upload clip planes.
  • +
  • iris: fix num clip plane consts
  • +
  • iris: fix more uniform setup
  • +
  • iris: drop iris_setup_push_uniform_range
  • +
  • iris: enable push constants if we have sysvals but no uniforms
  • +
  • iris: regather info so we get CLIP_DIST slots, not CLIP_VERTEX
  • +
  • iris: don't support pull constants.
  • +
  • iris: don't trip on param asserts
  • +
  • iris: drop param stuffs
  • +
  • iris: don't forget to upload CS consts
  • +
  • iris: fix sysval only binding tables
  • +
  • iris: only clip lower if there's something to clip against
  • +
  • iris: leave another TODO
  • +
  • iris: Fix SourceAlphaBlendFactor
  • +
  • iris: "Fix" transfer maps of buffers
  • +
  • iris: Fix independent alpha blending.
  • +
  • iris: more TODO
  • +
  • iris: scissored and mirrored blits
  • +
  • iris: more todo notes
  • +
  • iris: Fix TCS/TES slot unification
  • +
  • iris: properly pin stencil buffers
  • +
  • iris: Fix SLM
  • +
  • iris: Use iris_use_pinned_bo rather than add_exec_bo directly
  • +
  • iris: Combine iris_use_pinned_bo and add_exec_bo
  • +
  • iris: Avoid cross-batch synchronization on read/reads
  • +
  • iris: Avoid synchronizing due to the workaround BO
  • +
  • iris: replace vestiges of fence fds with newer exec_fence API
  • +
  • iris: Drop vestiges of throttling code
  • +
  • iris: Hang on to the last batch's sync-point, so we can wait on it
  • +
  • iris: Add wait fences to properly sync between render/compute
  • +
  • iris: leave a TODO
  • +
  • iris: flush the compute batch too if border pool is redone
  • +
  • iris: put render batch first in fence code
  • +
  • iris: Put batches in an array
  • +
  • iris: PIPE_CONTROL workarounds for GPGPU mode
  • +
  • iris: RT flush for memorybarrier with texture bit
  • +
  • iris: update comment
  • +
  • iris: Enable ctx->Const.UseSTD430AsDefaultPacking
  • +
  • iris: Lie about indirects
  • +
  • iris: Fix buffer -> buffer copy_region
  • +
  • iris: Fix VIEWPORT/LAYER in stream output info
  • +
  • iris: Do the 48-bit vertex buffer address invalidation workaround
  • +
  • iris: drop long dead XXX comment
  • +
  • iris: Track a binding history for buffer resources
  • +
  • iris: add iris_flush_and_dirty_for_history
  • +
  • iris: Flush for history at various moments
  • +
  • iris: Re-pin even if nothing is dirty
  • +
  • iris: fix prototype warning
  • +
  • iris: export iris_upload_shader
  • +
  • iris: fix comment location
  • +
  • iris: Use wrappers for create_xs_state rather than a switch statement
  • +
  • iris: rework program cache interface
  • +
  • iris: Enable precompiles
  • +
  • iris: Use program's num textures not the state tracker's bound
  • +
  • iris: drop pull constant binding table entry
  • +
  • iris: add assertions about binding table starts
  • +
  • iris: add an extra BT assert from Chris Wilson
  • +
  • iris: actually flush for storage images
  • +
  • iris: fix some SO overflow query bugs and tidy the code a bit
  • +
  • iris: drop key_size_for_cache
  • +
  • iris: for BLORP, only use the predicate enable bit when USE_BIT
  • +
  • iris: check query first
  • +
  • iris: fix conditional compute, don't stomp predicate for pipelined queries
  • +
  • iris: Rework tiling/modifiers handling
  • +
  • iris: Fix failed to compile TCS message
  • +
  • iris: Destroy transfer helper on screen teardown
  • +
  • iris: Destroy the border color pool
  • +
  • iris: Unref unbound_tex resource
  • +
  • iris: Fix IRIS_MEMZONE_COUNT to exclude the border color pool
  • +
  • iris: Destroy the bufmgr
  • +
  • iris: Stop leaking iris_uncompiled_shaders like mad
  • +
  • iris: move some non-buffer case code in a bit
  • +
  • iris: Don't bother considering if the underlying surface is a cube
  • +
  • iris: fix alpha channel for RGB BC1 formats
  • +
  • iris: fix dma buf import strides
  • +
  • iris: CS stall for stream out -> VB
  • +
  • iris: make clipper statistics dynamic
  • +
  • iris: reject all clipping when we can't use streamout render disabled
  • +
  • iris: omask can kill
  • +
  • iris: reemit SBE when sprite coord origin changes
  • +
  • iris: re-pin inherited streamout buffers
  • +
  • iris: Fix NOS mechanism
  • +
  • iris: fix overhead regression from flushing for storage images
  • +
  • iris: fix set_sampler_views to not unbind, be better about bounds
  • +
  • iris: Fix set_sampler_views with start > 0
  • +
  • iris: Replace num_textures etc with a bitmask we can scan
  • +
  • iris: Drop continues in resolve
  • +
  • iris: Fix clear dimensions
  • +
  • iris: Clamp viewport extents to the framebuffer dimensions
  • +
  • iris: Enable guardband clipping
  • +
  • iris: Fix primitive generated query active flag
  • +
  • iris: Always do rasterizer discard in clipper
  • +
  • iris: override alpha to one src1 blend factors
  • +
  • iris: handle PatchVerticesIn as a system value.
  • +
  • iris: rewrite set_vertex_buffer and VB handling
  • +
  • iris: Reorder LRR parameters to have dst first.
  • +
  • iris: Add _MI_ALU helpers that don't paste
  • +
  • iris: Don't bother packing 3DSTATE_SO_BUFFER at create time
  • +
  • iris: Move iris_stream_output_target def to iris_context.h
  • +
  • iris: only get space for one offset in stream output targets
  • +
  • iris: Implement DrawTransformFeedback()
  • +
  • iris: drop unnecessary genx->streamout field
  • +
  • iris: Fix for PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET
  • +
  • iris: Fix the prototype for iris_bo_alloc_tiled
  • +
  • iris: don't print the pointer in INTEL_DEBUG=submit
  • +
  • iris: Use a surface state fill helper
  • +
  • iris: Make a alloc_surface_state helper
  • +
  • iris: whitespace fixes
  • +
  • iris: Track blend enables, save outbound for resolve code
  • +
  • iris: always pin the binder...in the compute context, too.
  • +
  • iris: delete finished comments
  • +
  • iris: pin and re-pin the scratch BO
  • +
  • iris: more dead comments
  • +
  • iris: only mark depth/stencil as writable if writes are actually enabled
  • +
  • iris: better MOCS
  • +
  • iris: Fix scratch space allocation on Icelake.
  • +
  • iris: Only resolve inputs for actual shader stages
  • +
  • iris: Add a more long term TODO about timebase scaling
  • +
  • iris: Fix compute scratch pinning
  • +
  • iris: Delete bogus comment about cube array counting.
  • +
  • iris: Fix framebuffer layer count
  • +
  • iris: Don't enable push constants just because there are system values
  • +
  • iris: Don't make duplicate system values
  • +
  • iris: Fill out brw_image_params for storage images on Broadwell
  • +
  • iris: Fix surface states for Gen8 lowered-to-untype images
  • +
  • iris: Leave a comment about why Broadwell images are broken
  • +
  • iris: Implement multi-slice copy_region
  • +
  • iris: Flush the render cache in flush_and_dirty_for_history
  • +
  • iris: Handle PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE somewhat
  • +
  • iris: Don't check other batches for our batch BO
  • +
  • iris: Drop a dead comment
  • +
  • iris: Delete genx->bound_vertex_buffers
  • +
  • iris: Fix Broadwell WaDividePSInvocationCountBy4
  • +
  • iris: Use new PIPE_STAT_QUERY enums rather than hardcoded numbers.
  • +
  • iris: Switch to the new PIPELINE_STATISTICS_QUERY_SINGLE capability
  • +
  • iris: fail to create screen for older unsupported HW
  • +
  • iris: Allow sample mask of 0
  • +
  • iris: Don't enable smooth points when point sprites are enabled
  • +
  • iris: Assert about blits with color masking
  • +
  • iris: Pay attention to blit masks
  • +
  • iris: CS stall on VF cache invalidate workarounds
  • +
  • iris: Fix SO issue with INTEL_DEBUG=reemit, set fewer bits
  • +
  • iris: Don't whack SO dirty bits when finishing a BLORP op
  • +
  • iris: Fix memzone_for_address for the surface and binder zones
  • +
  • iris: Do binder address allocations per-context, not globally.
  • +
  • iris: Zero the compute predicate when changing the render condition
  • +
  • iris: Remap stream output indexes back to VARYING_SLOT_*.
  • +
  • iris: Enable PIPE_CAP_COMPACT_ARRAYS
  • +
  • iris: Drop comment about ISP_DIS
  • +
  • iris: Drop dead state_size hash table
  • +
  • iris: Unreference some more things on state module teardown
  • +
  • iris: minor tidying
  • +
  • iris: Fix bug in bound vertex buffer tracking
  • +
  • iris: Implement ALT mode for ARB_{vertex,fragment}_shader
  • +
  • iris: Add a timeout_nsec parameter, rename check_syncpt to wait_syncpt
  • +
  • iris: Fix accidental busy-looping in query waits
  • +
  • iris: Use READ_ONCE and WRITE_ONCE for snapshots_landed
  • +
  • iris: Make a iris_batch_reference_signal_syncpt helper function.
  • +
  • iris: Add PIPE_CAP_MAX_VARYINGS
  • +
  • iris: rework num textures to util_lastbit
  • +
  • iris: Stop chopping off the first nine characters of the renderer string
  • +
  • iris: Drop XXX about alpha testing
  • +
  • iris: Set 3DSTATE_WM::ForceThreadDispatchEnable
  • +
  • iris: Set HasWriteableRT correctly
  • +
  • iris: Drop XXX about checking for swizzling
  • +
  • iris: Move create and bind driver hooks to the end of iris_program.c
  • +
  • iris: Make an IRIS_MAX_MIPLEVELS define
  • +
  • iris: Simplify iris_get_depth_stencil_resources
  • +
  • iris: Add missing depth cache flushes
  • +
  • iris: Always emit at least one BLEND_STATE
  • +
  • iris: Add iris_resource fields for aux surfaces
  • +
  • iris: Fill out res->aux.possible_usages
  • +
  • iris: Fill out SURFACE_STATE entries for each possible aux usage
  • +
  • iris: create aux surface if needed
  • +
  • iris: Initial import of resolve code
  • +
  • iris: blorp using resolve hooks
  • +
  • iris: add some draw resolve hooks
  • +
  • iris: actually use the multiple surf states for aux modes
  • +
  • iris: try to fix copyimage vs copybuffers
  • +
  • iris: be sure to skip buffers in resolve code
  • +
  • iris: resolve before transfer maps
  • +
  • iris: pin the buffers
  • +
  • iris: store modifier info in res
  • +
  • iris: Make blit code use actual aux usages
  • +
  • iris: consider framebuffer parameter for aux usages
  • +
  • iris: Resolves for compute
  • +
  • iris: disable aux for external things
  • +
  • iris: some initial HiZ bits
  • +
  • iris: don't use hiz for MSAA buffers
  • +
  • iris: Set program key fields for MCS
  • +
  • iris: make surface states for CCS_D too
  • +
  • iris: do flush for buffers still
  • +
  • iris: Allow disabling aux via INTEL_DEBUG options
  • +
  • iris: Fix aux usage in render resolve code
  • +
  • iris: Only resolve compute resources for compute shaders
  • +
  • iris: Enable auxiliary buffer support
  • +
  • iris: Enable -msse2 and -mstackrealign
  • +
  • Revert "iris: Enable auxiliary buffer support"
  • +
  • vulkan: Fix 32-bit build for the new overlay layer
  • +
  • mesa: Fix RGBBuffers for renderbuffers with sized internal formats
  • +
  • iris: Drop RGBX -> RGBA for storage image usages
  • +
  • iris: Properly allow rendering to RGBX formats.
  • +
  • i965: Implement threaded GL support.
  • +
  • tgsi_to_nir: use sampler variables and derefs
  • +
  • iris: Fix MOCS for blits and clears
  • +
  • isl: Add a swizzle parameter to isl_buffer_fill_state()
  • +
  • iris: Plumb through ISL_SWIZZLE_IDENTITY in buffer surface emitters
  • +
  • iris: Defer uploading sampler state tables until draw time
  • +
  • iris: Properly support alpha and luminance-alpha formats
  • +
  • iris: Drop PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY
  • +
  • iris: Spruce up "are we using this engine?" checks for flushing
  • +
  • iris: Export a copy_region helper that doesn't flush
  • +
  • iris: Use copy_region and staging resources to avoid transfer stalls
  • +
  • Revert MR 369 (Fix extract_i8 and extract_u8 for 64-bit integers)
  • +
  • iris: Fix backface stencil write condition
  • +
  • iris: Rework default tessellation level uploads
  • +
  • iris: Fix TES gl_PatchVerticesIn handling.
  • +
  • iris: Move depth/stencil flushes so they actually do something
  • +
  • iris: Refactor depth/stencil buffer pinning into a helper.
  • +
  • iris: Fix write enable in pinning of depth/stencil resources
  • +
  • i965: Move some genX infrastructure to genX_boilerplate.h.
  • +
  • i965: Rename ISP_DIS to INDIRECT_STATE_POINTERS_DISABLE.
  • +
  • i965: Use genxml for emitting PIPE_CONTROL.
  • +
  • i965: Reimplement all the PIPE_CONTROL rules.
  • +
  • intel/fs: Fix opt_peephole_csel to not throw away saturates.
  • +
  • iris: Don't mutate box in transfer map code
  • +
  • iris: Don't flush the batch for unsynchronized mappings
  • +
  • iris: Slightly better bounds on buffer sizes
  • +
  • gallium: Add PIPE_BARRIER_UPDATE_BUFFER and UPDATE_TEXTURE bits.
  • +
  • nvc0: Skip new update barrier bits
  • +
  • nir: Record non-vector/scalar varyings as unmovable when compacting
  • +
  • iris: Fix util_vma_heap_init size for IRIS_MEMZONE_SHADER
  • +
  • iris: Skip input resolve handling if bindings haven't changed
  • +
  • iris: Skip framebuffer resolve tracking if framebuffer isn't dirty
  • +
  • iris: Skip resolves and flushes altogether if unnecessary
  • +
  • iris: Fix batch chaining map_next increment.
  • +
  • iris: Actually advertise some modifiers
  • +
  • st/nir: Free the GLSL IR after linking.
  • +
  • st/mesa: Fix blitting from GL_DEPTH_STENCIL to GL_STENCIL_INDEX
  • +
  • iris: Fix blits with S8_UINT destination
  • +
  • iris: Print the memzone name when allocating BOs with INTEL_DEBUG=buf
  • +
  • iris: Save/restore MI_PREDICATE_RESULT, not MI_PREDICATE_DATA.
  • +
  • iris: Silence unused variable warnings in release mode
  • +
  • gallium/util: Add const to u_range_intersect
  • +
  • iris: Actually pin the scratch BO.
  • +
  • glsl: Set location on structure-split sampler uniform variables
  • +
  • intel: Emit 3DSTATE_VF_STATISTICS dynamically
  • +
  • iris: Actually mark blorp_copy_buffer destinations as written.
  • +
  • iris: Preserve all PIPE_TRANSFER flags in xfer->usage
  • +
  • iris: Fix FLUSH_EXPLICIT handling with staging buffers.
  • +
  • iris: Make shader_perf_log print to stderr if INTEL_DEBUG=perf is set
  • +
  • i965: Move program key debugging to the compiler.
  • +
  • iris: Print the reason for shader recompiles.
  • +
  • iris: Move iris_debug_recompile calls before uploading.
  • +
  • iris: Change vendor and renderer strings
  • +
  • iris: Add texture cache flushing hacks for blit and resource_copy_region
  • +
  • iris: Be less aggressive at postdraw work skipping
  • +
  • iris: Add mechanism for iris-specific driconf options
  • +
  • iris: Enable the dual_color_blend_by_location driconf option.
  • +
  • iris: Track bound and writable SSBOs
  • +
  • Revert "glsl: Set location on structure-split sampler uniform variables"
  • +
  • i965: Ignore uniform storage for samplers or images, use binding info
  • +
  • i965: Tidy bogus indentation left by previous commit
  • +
  • iris: Mark constants dirty on transfer unmap even if no flushes occur
  • +
  • iris: Track bound constant buffers
  • +
  • iris: Rework UBOs and SSBOs to use pipe_shader_buffer
  • +
  • iris: Rework image views to store pipe_image_view.
  • +
  • iris: Make a gl_shader_stage -> pipe_shader_stage helper function
  • +
  • iris: Make memzone_for_address non-static
  • +
  • iris: Replace buffer backing storage and rebind to update addresses.
  • +
  • iris: Make a resource_is_busy() helper
  • +
  • iris: Track valid data range and infer unsynchronized mappings.
  • +
  • iris: Make some offset math helpers take a const isl_surf pointer
  • +
  • iris: Fix DrawTransformFeedback math when there's a buffer offset
  • +
  • iris: Prefer staging blits when destination supports CCS_E.
  • +
  • iris: Actually put Mesa in GL_RENDERER string
  • +
  • iris: Split iris_flush_and_dirty_for_history into two helpers.
  • +
  • iris: Enable GL_AMD_depth_clamp_separate
  • +
  • iris: Advertise EXT_texture_sRGB_R8 support
  • +
  • iris: Some tidying for preemption support
  • +
  • iris: Silence unused function warning
  • +
  • iris: Fix zeroing of transform feedback offsets in strange cases.
  • +
  • glsl/list: Add an exec_list_is_singular() helper.
  • +
  • nir: Add a new nir_cf_list_is_empty_block() helper.
  • +
  • intel/fs: Don't emit empty ELSE blocks.
  • +
  • iris: Set XY Clipping correctly.
  • +
  • iris: Only enable GL_AMD_depth_clamp_separate on Gen9+
  • +
  • iris: Fix imageBuffer and PBO download.
  • +
  • iris: Disable dual source blending when shader doesn't handle it
  • +
  • iris: Resolve textures used by the program, not merely bound textures
  • +
  • iris: Fix 4GB memory zone heap sizes.
  • +
  • iris: leave the top 4Gb of the high heap VMA unused
  • +
  • iris: Force VMA alignment to be a multiple of the page size.
  • +
  • iris: Delete bucketing allocators
  • +
  • i965: Fix BRW_MEMZONE_LOW_4G heap size.
  • +
  • i965: Force VMA alignment to be a multiple of the page size.
  • +
  • i965: leave the top 4Gb of the high heap VMA unused
  • +
  • i965: Fix memory leaks in brw_upload_cs_work_groups_surface().
  • +
  • iris: Use full ways for L3 cache setup on Icelake.
  • +
  • egl/x11: calloc dri2_surf so it's properly zeroed
  • +
+ +

Kevin Strasser (1):

+
    +
  • egl/dri: Avoid out of bounds array access
  • +
+ +

Khaled Emara (1):

+
    +
  • freedreno: PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT unreachable statement
  • +
+ +

Khem Raj (1):

+
    +
  • winsys/svga/drm: Include sys/types.h
  • +
+ +

Kishore Kadiyala (1):

+
    +
  • android: static link with libexpat with Android O+
  • +
+ +

Konstantin Kharlamov (1):

+
    +
  • mapi: work around GCC LTO dropping assembly-defined functions
  • +
+ +

Kristian Høgsberg (49):

+
    +
  • st/nir: Use src/ relative include path for autotools
  • +
  • freedreno/a6xx: Emit blitter dst with OUT_RELOCW
  • +
  • freedreno/a6xx: Use tiling for all resources
  • +
  • freedreno/a6xx: regen headers
  • +
  • freedreno/a6xx: Drop render condition check in blitter
  • +
  • freedreno: Log number of draw for sysmem passes
  • +
  • freedreno/a6xx: Use the right resource for separate stencil stride
  • +
  • freedreno/a6xx: Combine emit_blit and fd6_blit
  • +
  • freedreno: Consolidate u_blitter functions in freedreno_blitter.c
  • +
  • freedreno: Don't tell the blitter what it can't do
  • +
  • freedreno/a6xx: Move blit check so as to restore comment
  • +
  • freedreno/a6xx: Support some depth/stencil blits on blitter
  • +
  • freedreno/a6xx: Support y-inverted blits
  • +
  • freedreno/a6xx: Add format argument to fd6_tex_swiz()
  • +
  • freedreno/a6xx: Fall back to masked RGBA blits for depth/stencil
  • +
  • freedreno/a6xx: Clean up mixed use of swap and swizzle for texture state
  • +
  • freedreno/a6xx: Update headers
  • +
  • freedreno/a6xx: Front facing needs UNK3 bit
  • +
  • freedreno/a6xx: Fix point coord
  • +
  • .mailmap: Add a few more alises for myself
  • +
  • freedreno: Update headers
  • +
  • freedreno/a6xx: Copy stencil as R8_UINT
  • +
  • freedreno/a6xx: Support MSAA resolve blits on blitter
  • +
  • freedreno/a6xx: Only output MRT control for used framebuffers
  • +
  • freedreno/a6xx: Don't zero SO buffer addresses
  • +
  • freedreno: Fix a couple of warnings
  • +
  • turnip: Only get bo offset when we need to mmap
  • +
  • freedreno: Use c_vis_args and no_override_init_args
  • +
  • freedreno/a6xx: Remove extra parens
  • +
  • freedreno/ir3: Track whether shader needs derivatives
  • +
  • freedreno/ir3: Fix operand order for DSX/DSY
  • +
  • st/glsl_to_nir: Calculate num_uniforms from NumParameterValues
  • +
  • freedreno/ir3: Enable PIPE_CAP_PACKED_UNIFORMS
  • +
  • freedreno/ir3: Push UBOs to constant file
  • +
  • freedreno/ir3: Don't access beyond available regs
  • +
  • freedreno/ir3: Add workaround for VS samgq
  • +
  • freedreno/ir3: Mark ir3_context_error() as NORETURN
  • +
  • freedreno/a2xx: Fix redundant if statement
  • +
  • freedreno: Use enum values from matching enum
  • +
  • freedreno/a6xx: Add helper for incrementing regid
  • +
  • freedreno: Fix format string warning
  • +
  • .gitignore: Remove autotool artifacts
  • +
  • tgsi: Mark tgsi_strings_check() unused
  • +
  • glsl_to_nir: Initialize debug variable
  • +
  • nir_opcodes.py: Saturate to expression that doesn't overflow
  • +
  • ralloc: Fully qualify non-virtual destructor call
  • +
  • egl/dri2: Mark potentially unused 'display' variable with MAYBE_UNUSED
  • +
  • gallium/auxiliary/vl: Fix a couple of warnings
  • +
  • freedreno/drm: Quiet pointer to u64 conversion warning
  • +
+ +

Leo Liu (6):

+
    +
  • st/va: fix the incorrect max profiles report
  • +
  • st/va/vp9: set max reference as default of VP9 reference number
  • +
  • vl/dri3: remove the wait before getting back buffer
  • +
  • radeon/vcn: add H.264 constrained baseline support
  • +
  • radeon/vcn/vp9: search the render target from the whole list
  • +
  • winsys/amdgpu: add VCN JPEG to no user fence group
  • +
+ +

Lepton Wu (2):

+
    +
  • virgl: close drm fd when destroying virgl screen.
  • +
  • virgl: Set bind when creating temp resource.
  • +
+ +

Lionel Landwerlin (127):

+
    +
  • anv: assert that color attachment are valid
  • +
  • radv: assert that colorAttachment is valid for CmdClearAttachment
  • +
  • i965: scale factor changes should trigger recompile
  • +
  • vulkan: Update the XML and headers to 1.1.101
  • +
  • anv: implement VK_EXT_depth_clip_enable
  • +
  • build: move imgui out of src/intel/tools to be reused
  • +
  • imgui: bump copy
  • +
  • imgui: make sure our copy of imgui doesn't clash with others in the same process
  • +
  • vulkan: add an overlay layer
  • +
  • intel: fix urb size for CFL GT1
  • +
  • anv: add support for INTEL_DEBUG=bat
  • +
  • Revert "anv: add support for INTEL_DEBUG=bat"
  • +
  • intel/aub_viewer: printout 48bits addresses
  • +
  • intel/aub_viewer: silence compiler warning
  • +
  • intel/aub_viewer: silence more compiler warnings
  • +
  • vulkan/overlay: fix missing installation of layer
  • +
  • vulkan/overlay: fix includes
  • +
  • imgui: update commit
  • +
  • imgui: update memory editor
  • +
  • vulkan/overlay: install layer binary in libdir
  • +
  • intel/compiler: use correct swizzle for replacement
  • +
  • vulkan/overlay: fix min/max computations
  • +
  • vulkan/overlay: rework option parsing
  • +
  • vulkan/overlay: add support for fps output in file
  • +
  • anv: add support for INTEL_DEBUG=bat
  • +
  • vulkan: update headers/registry to 1.1.102
  • +
  • anv: update supported patch version
  • +
  • radv: set num_components on vulkan_resource_index intrinsic
  • +
  • vulkan/util: make header available from c++
  • +
  • vulkan/util: generate instance/device dispatch tables
  • +
  • vulkan/overlay: drop dependency on validation layer headers
  • +
  • intel/decoders: add address space indicator to get BOs
  • +
  • intel/decoders: handle decoding MI_BBS from ring
  • +
  • intel/decoders: limit number of decoded batchbuffers
  • +
  • intel/aub_read: reuse defines from gen_context
  • +
  • intel/aub_write: split comment section from HW setup
  • +
  • intel/aub_write: write header in init
  • +
  • intel/aub_write: break execlist write in 2
  • +
  • intel/aub_write: switch to use i915_drm engine classes
  • +
  • intel/aub_write: log mmio writes
  • +
  • intel/aub_write: store the physical page allocator in struct
  • +
  • intel/aub_write: turn context images arrays into functions
  • +
  • intel/aub_write: factorize context image/pphwsp/ring creation
  • +
  • iris: fix decoder call
  • +
  • iris: fix decode_get_bo callback
  • +
  • intel/error2aub: build a list of BOs before writing them
  • +
  • intel/error2aub: identify buffers by engine
  • +
  • intel/error2aub: strenghten batchbuffer identifier marker
  • +
  • intel/error2aub: parse other buffer types
  • +
  • intel/error2aub: annotate buffer with their address space
  • +
  • intel/error2aub: store engine last ring buffer head/tail pointers
  • +
  • intel/error2aub: write GGTT buffers into the aub file
  • +
  • intel/error2aub: add a verbose option
  • +
  • intel/error2aub: deal with GuC log buffer
  • +
  • intel/error2aub: support older style engine names
  • +
  • vulkan: factor out wsi dependencies
  • +
  • anv: implement VK_EXT_pipeline_creation_feedback
  • +
  • vulkan/overlay: properly register layer object with loader
  • +
  • vulkan/overlay: silence validation layer warnings
  • +
  • vulkan/overlay: check return value of swapchain get images
  • +
  • vulkan/overlay: improve error reporting
  • +
  • i965: perf: sklgt2: update a priority for register programming
  • +
  • i965: perf: sklgt2: update compute metrics config
  • +
  • i965: perf: sklgt2: update memory write config
  • +
  • i965: perf: add PMA stall metrics
  • +
  • i965: perf: chv: fixup counters names
  • +
  • i965: perf: hsw: drop register programming not needed on HSW
  • +
  • i965: perf: sklgt2: drop programming of an unused NOA register
  • +
  • i965: perf: add Icelake metrics
  • +
  • i965: perf: enable Icelake metrics
  • +
  • i965: perf: add ring busyness metric for cfl gt2
  • +
  • i965: perf: update render basic configs for big core gen9/gen10
  • +
  • anv: implement VK_KHR_swapchain revision 70
  • +
  • intel: add dependency on genxml generated files
  • +
  • genxml: add a sorting script
  • +
  • genxml: sort xml files using new script
  • +
  • anv: don't use default pipeline cache for hits for VK_EXT_pipeline_creation_feedback
  • +
  • anv: store heap address bounds when initializing physical device
  • +
  • anv: leave the top 4Gb of the high heap VMA unused
  • +
  • i965: store device revision in gen_device_info
  • +
  • i965: extract performance query metrics
  • +
  • i965: move mdapi data structure to intel/perf
  • +
  • i965: move OA accumulation code to intel/perf
  • +
  • i965: move brw_timebase_scale to device info
  • +
  • i965: move mdapi result data format to intel/perf
  • +
  • i965: move mdapi guid into intel/perf
  • +
  • intel/perf: stub gen10/11 missing definitions
  • +
  • i965: perf: add mdapi pipeline statistics queries on gen10/11
  • +
  • intel/perf: drop counter size field
  • +
  • intel/perf: constify accumlator parameter
  • +
  • iris: implement WaEnableStateCacheRedirectToCS
  • +
  • i965: implement WaEnableStateCacheRedirectToCS
  • +
  • anv: implement WaEnableStateCacheRedirectToCS
  • +
  • anv: fix uninitialized pthread cond clock domain
  • +
  • intel/devinfo: fix missing num_thread_per_eu on ICL
  • +
  • intel/devinfo: add basic sanity tests on device database
  • +
  • anv: limit URB reconfigurations when using blorp
  • +
  • intel: workaround VS fixed function issue on Gen9 GT1 parts
  • +
  • anv: fix argument name for vkCmdEndQuery
  • +
  • i965: fix icelake performance query enabling
  • +
  • Revert "anv: limit URB reconfigurations when using blorp"
  • +
  • vulkan/util: generate a helper function to return pNext struct sizes
  • +
  • vulkan/overlay: update help printout
  • +
  • vulkan/overlay: record stats in command buffers and accumulate on exec/submit
  • +
  • vulkan/overlay: add pipeline statistic & timestamps support
  • +
  • vulkan/overlay: add no display option
  • +
  • vulkan/overlay: add a margin to the size of the window
  • +
  • vulkan/overlay: record all select metrics into output file
  • +
  • vulkan/overlay: add a frame counter option
  • +
  • vulkan/overlay: make overlay size configurable
  • +
  • vulkan/overlay: make overriden functions static
  • +
  • vulkan/overlay: add TODO list
  • +
  • anv: fix crash when application does not provide push constants
  • +
  • anv: rework queries writes to ensure ordering memory writes
  • +
  • anv: fix use after free
  • +
  • anv: Use corresponding type from the vector allocation
  • +
  • vulkan/overlay: keep allocating draw data until it can be reused
  • +
  • nir: fix lower_non_uniform_access pass
  • +
  • vulkan/overlay-layer: fix cast errors
  • +
  • vulkan/overlay: fix truncating error on 32bit platforms
  • +
  • nir: lower_non_uniform_access: iterate over instructions safely
  • +
  • vulkan/overlay: fix timestamp query emission with no pipeline stats
  • +
  • vulkan: fix build dependency issue with generated files
  • +
  • anv: fix apply_pipeline_layout pass for arrays of YCbCr descriptors
  • +
  • nir/lower_non_uniform: safely iterate over blocks
  • +
  • intel/perf: fix EuThreadsCount value in performance equations
  • +
  • intel/perf: improve dynamic loading config detection
  • +
+ +

Lubomir Rintel (3):

+
    +
  • kmsro: Extend to include armada-drm
  • +
  • gallivm: guess CPU features also on ARM
  • +
  • gallivm: disable NEON instructions if they are not supported
  • +
+ +

Lucas Stach (3):

+
    +
  • etnaviv: don't flush own context when updating resource use
  • +
  • etnaviv: flush all pending contexts when accessing a resource with the CPU
  • +
  • etnaviv: only try to construct scanout resource when on KMS winsys
  • +
+ +

Marek Olšák (121):

+
    +
  • radeonsi: enable dithered alpha-to-coverage for better quality
  • +
  • radeonsi: merge & rename texture BO metadata functions
  • +
  • radeonsi: unify error paths in si_texture_create_object
  • +
  • winsys/amdgpu: remove amdgpu_drm.h definitions
  • +
  • r600: add -Wstrict-overflow=0 to meson to silence the warning
  • +
  • radeonsi: fix a comment typo in si_fine_fence_set
  • +
  • gallium: allow more PIPE_RESOURCE_ driver flags
  • +
  • meson: drop the xcb-xrandr version requirement
  • +
  • radeonsi: handle render_condition_enable in si_compute_clear_render_target
  • +
  • radeonsi: fix crashing performance counters (division by zero)
  • +
  • radeonsi: initialize textures using DCC to black when possible
  • +
  • radeonsi: clear allocator_zeroed_memory with SDMA
  • +
  • radeonsi: make allocator_zeroed_memory unmappable and use bigger buffers
  • +
  • radeonsi: don't leak an index buffer if draw_vbo fails
  • +
  • radeonsi: use local ws variable in si_need_dma_space
  • +
  • gallium/u_threaded: fix EXPLICIT_FLUSH for flush offsets > 0
  • +
  • radeonsi: fix EXPLICIT_FLUSH for flush offsets > 0
  • +
  • winsys/amdgpu: don't drop manually added fence dependencies
  • +
  • winsys/amdgpu: unify fence list code
  • +
  • winsys/amdgpu: use a separate fence list for syncobjs
  • +
  • winsys/amdgpu: remove occurence of INDIRECT_BUFFER_CONST
  • +
  • winsys/amdgpu: clean up IB buffer size computation
  • +
  • winsys/amdgpu: cs_check_space sets the minimum IB size for future IBs
  • +
  • radeonsi: add AMD_DEBUG env var as an alternative to R600_DEBUG
  • +
  • radeonsi: use MEM instead of MEM_GRBM in COPY_DATA.DST_SEL
  • +
  • radeonsi: add driconf option radeonsi_enable_nir
  • +
  • radeonsi: always enable NIR for Civilization 6 to fix corruption
  • +
  • driconf: add Civ6Sub executable for Civilization 6
  • +
  • st/mesa: always unmap the uploader in st_atom_array.c
  • +
  • gallium/u_threaded: always unmap const_uploader
  • +
  • gallium/u_upload_mgr: allow use of FLUSH_EXPLICIT with persistent mappings
  • +
  • radeonsi: use SDMA for uploading data through const_uploader
  • +
  • tgsi: don't set tgsi_info::uses_bindless_images for constbufs and hw atomics
  • +
  • radeonsi: always use compute rings for clover on CI and newer (v2)
  • +
  • gallium/u_tests: use a compute-only context to test GCN compute ring
  • +
  • gallium: add pipe_grid_info::last_block
  • +
  • omx: clean up enc_LoadImage_common
  • +
  • omx: add a compute path in enc_LoadImage_common
  • +
  • radeonsi: fix assertion failure by using the correct type
  • +
  • mesa: implement ARB/KHR_parallel_shader_compile
  • +
  • gallium: implement ARB/KHR_parallel_shader_compile
  • +
  • util/queue: move thread creation into a separate function
  • +
  • util/queue: add ability to kill a subset of threads
  • +
  • util/queue: hold a lock when reading num_threads in util_queue_finish
  • +
  • util/queue: add util_queue_adjust_num_threads
  • +
  • radeonsi: implement ARB/KHR_parallel_shader_compile callbacks
  • +
  • radeonsi: don't use PFP_SYNC_ME with compute-only contexts
  • +
  • docs/relnotes: document parallel_shader_compile changes in 19.1.0, not 19.0.0
  • +
  • amd/addrlib: fix uninitialized values for Addr2ComputeDccAddrFromCoord
  • +
  • radeonsi/gfx9: add support for PIPE_ALIGNED=0
  • +
  • radeonsi: add ability to bind images as image buffers
  • +
  • radeonsi: add support for displayable DCC for 1 RB chips
  • +
  • radeonsi: add support for displayable DCC for multi-RB chips
  • +
  • radeonsi: enable displayable DCC on Ravens
  • +
  • gallium: add writable_bitmask parameter into set_shader_buffers
  • +
  • glsl: remember which SSBOs are not read-only and pass it to gallium
  • +
  • radeonsi: set exact shader buffer read/write usage in CS
  • +
  • tegra: fix the build after the set_shader_buffers change
  • +
  • radeonsi: fix a crash when unbinding sampler states
  • +
  • glsl: fix shader_storage_blocks_write_access for SSBO block arrays
  • +
  • Revert "glsl: fix shader_storage_blocks_write_access for SSBO block arrays"
  • +
  • glsl: allow the #extension directive within code blocks for the dri option
  • +
  • mesa: don't overwrite existing shader files with MESA_SHADER_CAPTURE_PATH
  • +
  • radeonsi: set AC_FUNC_ATTR_READNONE for image opcodes where it was missing
  • +
  • ac: use the common helper ac_apply_fmask_to_sample
  • +
  • ac: fix incorrect bindless atomic code in visit_image_atomic
  • +
  • radeonsi: enable GL_EXT_shader_image_load_formatted
  • +
  • nir: optimize gl_SampleMaskIn to gl_HelperInvocation for radeonsi when possible
  • +
  • winsys/amdgpu: don't set GTT with GDS & OA placements on APUs
  • +
  • radeonsi/gfx9: use the correct condition for the DPBB + QUANT_MODE workaround
  • +
  • radeonsi: use CP DMA for the null const buffer clear on CIK
  • +
  • tgsi/scan: add uses_drawid
  • +
  • ac: add radeon_info::marketing_name, replacing the winsys callback
  • +
  • ac: add radeon_info::is_pro_graphics
  • +
  • ac: add ac_get_i1_sgpr_mask
  • +
  • ac: add REWIND and GDS registers to register headers
  • +
  • winsys/amdgpu: make IBs writable and expose their address
  • +
  • winsys/amdgpu: reorder chunks, make BO_HANDLES first, IB and FENCE last
  • +
  • winsys/amdgpu: enable chaining for compute IBs
  • +
  • winsys/amdgpu: clean up and remove nonsensical assertion
  • +
  • radeonsi: add si_cp_copy_data
  • +
  • radeonsi: add helper si_get_minimum_num_gfx_cs_dwords
  • +
  • radeonsi: delay adding BOs at the beginning of IBs until the first draw
  • +
  • gallium: document conservative rasterization flags
  • +
  • st/dri: simplify throttling code
  • +
  • gallium: replace DRM_CONF_THROTTLE with PIPE_CAP_MAX_FRAMES_IN_FLIGHT
  • +
  • gallium: replace DRM_CONF_SHARE_FD with PIPE_CAP_DMABUF
  • +
  • gallium: replace drm_driver_descriptor::configuration with driconf_xml
  • +
  • gallium: set PIPE_CAP_MAX_FRAMES_IN_FLIGHT to 2 for all drivers
  • +
  • gallium: add PIPE_CAP_PREFER_COMPUTE_BLIT_FOR_MULTIMEDIA
  • +
  • util: fix a compile failure in u_compute.c on windows
  • +
  • mesa: enable glGet for EXT_gpu_shader4
  • +
  • glsl: add `unsigned int` type for EXT_GPU_shader4
  • +
  • glsl: apply some 1.30 and other rules to EXT_gpu_shader4 as well
  • +
  • glsl: add builtin variables for EXT_gpu_shader4
  • +
  • glsl: add arithmetic builtin functions for EXT_gpu_shader4
  • +
  • glsl: add texture builtin functions for EXT_gpu_shader4
  • +
  • glsl: allow "varying out" for fragment shader outputs with EXT_gpu_shader4
  • +
  • mesa: expose EXT_texture_buffer_object
  • +
  • mesa: only allow EXT_gpu_shader4 in the compatibility profile
  • +
  • st/mesa: expose EXT_gpu_shader4 if GLSL 1.40 is supported
  • +
  • glsl: handle interactions between EXT_gpu_shader4 and texture extensions
  • +
  • radeonsi: add BOs after need_cs_space
  • +
  • radeonsi/gfx9: set that window_rectangles always roll the context
  • +
  • radeonsi/gfx9: rework the gfx9 scissor bug workaround (v2)
  • +
  • radeonsi: remove dirty slot masks from scissor and viewport states
  • +
  • glsl: fix shader_storage_blocks_write_access for SSBO block arrays (v2)
  • +
  • radeonsi: don't ignore PIPE_FLUSH_ASYNC
  • +
  • mesa: rework error handling in glDrawBuffers
  • +
  • mesa: fix pbuffers because internally they are front buffers
  • +
  • st/mesa: don't flush the front buffer if it's a pbuffer
  • +
  • radeonsi: use new atomic LLVM helpers
  • +
  • radeonsi: set sampler state and view functions for compute-only contexts
  • +
  • st/dri: decrease input lag by syncing sooner in SwapBuffers
  • +
  • glsl: fix and clean up NV_compute_shader_derivatives support
  • +
  • st/mesa: fix 2 crashes in st_tgsi_lower_yuv
  • +
  • radeonsi: remove old_va parameter from si_rebind_buffer by remembering offsets
  • +
  • radeonsi: update buffer descriptors in all contexts after buffer invalidation
  • +
  • radeonsi: fix a regression in si_rebind_buffer
  • +
  • u_blitter: don't fail mipmap generation for depth formats containing stencil
  • +
  • ac: fix a typo in ac_build_wg_scan_bottom
  • +
+ +

Mario Kleiner (1):

+
    +
  • drirc: Add sddm-greeter to adaptive_sync blacklist.
  • +
+ +

Mark Janes (5):

+
    +
  • mesa: properly report the length of truncated log messages
  • +
  • mesa: rename logging functions to reflect that they format strings
  • +
  • mesa: add logging function for formatted string
  • +
  • intel/common: move gen_debug to intel/dev
  • +
  • intel/tools: Remove redundant definitions of INTEL_DEBUG
  • +
+ +

Mateusz Krzak (2):

+
    +
  • panfrost: cast bo_handles pointer to uintptr_t first
  • +
  • panfrost: use os_mmap and os_munmap
  • +
+ +

Mathias Fröhlich (22):

+
    +
  • st/mesa: Reduce array updates due to current changes.
  • +
  • mesa: Track buffer object use also for VAO usage.
  • +
  • st/mesa: Invalidate the gallium array atom only if needed.
  • +
  • mesa: Implement helper functions to map and unmap a VAO.
  • +
  • mesa: Factor out _mesa_array_element.
  • +
  • mesa: Use _mesa_array_element in dlist save.
  • +
  • mesa: Replace _ae_{,un}map_vbos with _mesa_vao_{,un}map_arrays
  • +
  • mesa: Remove _ae_{,un}map_vbos and dependencies.
  • +
  • mesa: Use mapping tools in debug prints.
  • +
  • vbo: Fix basevertex handling in display list compiles.
  • +
  • vbo: Fix GL_PRIMITIVE_RESTART_FIXED_INDEX in display list compiles.
  • +
  • mesa: Add assert to _mesa_primitive_restart_index.
  • +
  • mesa: Factor out index function that will have multiple use.
  • +
  • mesa: Use glVertexAttrib*NV functions for fixed function attribs.
  • +
  • mesa: Implement _mesa_array_element by walking enabled arrays.
  • +
  • mesa: Rip out now unused gl_context::aelt_context.
  • +
  • mesa: Remove the now unused _NEW_ARRAY state change flag.
  • +
  • mesa: Constify static const array in api_arrayelt.c
  • +
  • mesa: Remove the _glapi_table argument from _mesa_array_element.
  • +
  • mesa: Set CurrentSavePrimitive in vbo_save_NotifyBegin.
  • +
  • mesa: Correct the is_vertex_position decision for dlists.
  • +
  • mesa: Leave aliasing of vertex and generic0 attribute to the dlist code.
  • +
+ +

Matt Turner (7):

+
    +
  • intel/compiler/test: Set devinfo->gen = 7
  • +
  • intel/compiler: Avoid propagating inequality cmods if types are different
  • +
  • intel/compiler/test: Add unit test for mismatched signedness comparison
  • +
  • intel/compiler: Add commas on final values of compaction table arrays
  • +
  • intel/compiler: Use SIMD16 instructions in fs saturate prop unit test
  • +
  • intel/compiler: Add unit tests for sat prop for different exec sizes
  • +
  • intel/compiler: Improve fix_3src_operand()
  • +
+ +

Matthias Lorenz (1):

+
    +
  • vulkan/overlay: Add fps counter
  • +
+ +

Mauro Rossi (6):

+
    +
  • android: intel/isl: remove redundant building rules
  • +
  • android: anv: fix generated files depedencies (v2)
  • +
  • android: anv: fix libexpat shared dependency
  • +
  • android: nouveau: add support for nir
  • +
  • android: fix LLVM version string related building errors
  • +
  • draw: fix building error in draw_gs_init()
  • +
+ +

Maya Rashish (1):

+
    +
  • configure: fix test portability
  • +
+ +

Michel Dänzer (19):

+
    +
  • loader/dri3: Use strlen instead of sizeof for creating VRR property atom
  • +
  • gitlab-ci: Re-use docker image from the main repo in forked repos
  • +
  • gitlab-ci: List some longer-running jobs before others of the same stage
  • +
  • gitlab-ci: Use 8 CPU cores in autotools job
  • +
  • gitlab-ci: Make sure clang job actually uses ccache
  • +
  • gitlab-ci: Only pull/push cache contents in build+test stage jobs
  • +
  • gitlab-ci: Automatically retry jobs after runner system failure
  • +
  • gitlab-ci: Run CI pipeline for all branches in the main repository
  • +
  • gitlab-ci: Use Debian stretch instead of Ubuntu bionic
  • +
  • gitlab-ci: Use HTTPS for APT repositories
  • +
  • gitlab-ci: Use Debian packages instead of pip ones for meson and scons
  • +
  • gitlab-ci: Install most packages from Debian buster
  • +
  • gitlab-ci: Remove unneded (stuff from) APT command lines
  • +
  • gitlab-ci: Remove unused Debian packages from Docker image
  • +
  • gitlab-ci: Use clang 8 instead of 7
  • +
  • gitlab-ci: Drop unused clang 5/6 packages
  • +
  • gitlab-ci: Do not use subshells for compiling dependencies
  • +
  • gitlab-ci: Use LLVM 3.4 from Debian jessie for scons-llvm job
  • +
  • gitlab-ci: Use meson buildtype debug instead of default debugoptimized
  • +
+ +

Mike Blumenkrantz (6):

+
    +
  • iris: support INTEL_NO_HW environment variable
  • +
  • gallium: add pipe cap for inner_coverage conservative raster mode
  • +
  • st/mesa: indicate intel extension support for inner_coverage based on cap
  • +
  • iris: add support for INTEL_conservative_rasterization
  • +
  • iris: add preemption support on gen9
  • +
  • iris: enable preemption support for gen10
  • +
+ +

Nanley Chery (3):

+
    +
  • i965: Rename intel_mipmap_tree::r8stencil_* -> ::shadow_*
  • +
  • anv: Fix some depth buffer sampling cases on ICL+
  • +
  • anv/cmd_buffer: Initalize the clear color struct for CNL+
  • +
+ +

Nataraj Deshpande (1):

+
    +
  • anv: Fix check for isl_fmt in assert
  • +
+ +

Neha Bhende (2):

+
    +
  • st/mesa: Fix topogun-1.06-orc-84k-resize.trace crash
  • +
  • draw: fix memory leak introduced 7720ce32a
  • +
+ +

Nicolai Hähnle (9):

+
    +
  • amd/surface: provide firstMipIdInTail for metadata surface calculations
  • +
  • radeonsi: add si_debug_options for convenient adding/removing of options
  • +
  • util/u_log: flush auto loggers before starting a new page
  • +
  • ddebug: set thread name
  • +
  • ddebug: log calls to pipe->flush
  • +
  • ddebug: dump driver state into a separate file
  • +
  • ddebug: expose some helper functions as non-inline
  • +
  • radeonsi: add radeonsi_aux_debug option for aux context debug dumps
  • +
  • radeonsi: add radeonsi_sync_compile option
  • +
+ +

Oscar Blumberg (3):

+
    +
  • intel/fs: Fix memory corruption when compiling a CS
  • +
  • radeonsi: Fix guardband computation for large render targets
  • +
  • glsl: Fix function return typechecking
  • +
+ +

Patrick Lerda (1):

+
    +
  • lima/ppir: fix pointer referenced after a free
  • +
+ +

Patrick Rudolph (1):

+
    +
  • d3dadapter9: Support software renderer on any DRI device
  • +
+ +

Philipp Zabel (1):

+
    +
  • etnaviv: fill missing offset in etna_resource_get_handle
  • +
+ +

Pierre Moreau (12):

+
    +
  • include/CL: Update to the latest OpenCL 2.2 headers
  • +
  • clover: Avoid warnings from new OpenCL headers
  • +
  • clover: Remove the TGSI backend as unused
  • +
  • clover: Add an helper for checking if an IR is supported
  • +
  • clover/api: Rework the validation of devices for building
  • +
  • clover/api: Fail if trying to build a non-executable binary
  • +
  • clover: Disallow creating libraries from other libraries
  • +
  • clover: Validate program and library linking options
  • +
  • clover: Move device extensions definitions to core/device.cpp
  • +
  • clover: Move platform extensions definitions to clover/platform.cpp
  • +
  • clover: Only use devices supporting IR_NATIVE
  • +
  • clover: Fix indentation issues
  • +
+ +

Pierre-Eric Pelloux-Prayer (1):

+
    +
  • radeonsi: init sctx->dma_copy before using it
  • +
+ +

Plamena Manolova (3):

+
    +
  • i965: Disable ARB_fragment_shader_interlock for platforms prior to GEN9
  • +
  • isl: Set ClearColorConversionEnable.
  • +
  • i965: Re-enable fast color clears for GEN11.
  • +
+ +

Qiang Yu (9):

+
    +
  • u_math: add ushort_to_float/float_to_ushort
  • +
  • u_dynarray: add util_dynarray_grow_cap
  • +
  • gallium/u_vbuf: export u_vbuf_get_minmax_index
  • +
  • drm-uapi: add lima_drm.h
  • +
  • gallium: add lima driver
  • +
  • lima/gpir: fix compile fail when two slot node
  • +
  • lima/gpir: fix alu check miss last store slot
  • +
  • lima: fix lima_blit with non-zero level source resource
  • +
  • lima: fix render to non-zero level texture
  • +
+ +

Rafael Antognolli (45):

+
    +
  • iris: Store internal_format when getting resource from handle.
  • +
  • iris: Skip msaa16 on gen < 9.
  • +
  • iris: Flush before hiz_exec.
  • +
  • iris: Pin HiZ buffers when rendering.
  • +
  • iris: Avoid leaking if we fail to allocate the aux buffer.
  • +
  • iris/clear: Pass on render_condition_enabled.
  • +
  • iris: Skip resolve if there's no context.
  • +
  • iris: Flag ALL_DIRTY_BINDINGS on aux state change.
  • +
  • iris: Add resolve on iris_flush_resource.
  • +
  • iris: Convert RGBX to RGBA always.
  • +
  • iris: Enable auxiliary buffer support again
  • +
  • iris: Enable HiZ for multisampled depth surfaces.
  • +
  • iris: Make intel_hiz_exec public.
  • +
  • iris: Allocate buffer space for the fast clear color.
  • +
  • iris: Use the clear depth when emitting 3DSTATE_CLEAR_PARAMS.
  • +
  • iris: Fast clear depth buffers.
  • +
  • iris: Add helper to convert fast clear color.
  • +
  • iris: Add function to update clear color in surface state.
  • +
  • iris: Bring back check for srgb and fast clear color.
  • +
  • intel/isl: Add isl_format_has_color_component() function.
  • +
  • intel/blorp: Make swizzle_color_value public.
  • +
  • iris: Implement fast clear color.
  • +
  • iris: Add iris_resolve_conditional_render().
  • +
  • iris: Stall on the CPU and resolve predication during fast clears.
  • +
  • iris: Track fast clear color.
  • +
  • iris: Let blorp update the clear color for us.
  • +
  • i965/blorp: Remove unused parameter from blorp_surf_for_miptree.
  • +
  • iris: Only update clear color for gens 8 and 9.
  • +
  • iris/gen8: Re-emit the SURFACE_STATE if the clear color changed.
  • +
  • iris: Manually apply fast clear color channel overrides.
  • +
  • iris: Do not allocate clear_color_bo for gen8.
  • +
  • iris: Add aux.sampler_usages.
  • +
  • iris: Enable fast clears on gen8.
  • +
  • intel/fs: Only propagate saturation if exec_size is the same.
  • +
  • intel/fs: Move the scalar-region conversion to the generator.
  • +
  • intel/fs: Add a lowering pass for linear interpolation.
  • +
  • intel/fs: Remove fs_generator::generate_linterp from gen11+.
  • +
  • intel/isl: Resize clear color buffer to full cacheline
  • +
  • intel/genxml: Update MI_ATOMIC genxml definition.
  • +
  • intel/blorp: Make blorp update the clear color in gen11.
  • +
  • iris: Do not advertise multisampled image load/store.
  • +
  • iris: Support sRGB fast clears even if the colorspaces differ.
  • +
  • iris: Use the linear version of the surface format during fast clears.
  • +
  • iris: Update the surface state clear color address when available.
  • +
  • iris: Enable fast clear colors on gen11.
  • +
+ +

Ray Zhang (1):

+
    +
  • glx: fix shared memory leak in X11
  • +
+ +

Rhys Kidd (1):

+
    +
  • iris: Fix assertion in iris_resource_from_handle() tiling usage
  • +
+ +

Rhys Perry (28):

+
    +
  • nvc0: add compute invocation counter
  • +
  • radv: bitcast 16-bit outputs to integers
  • +
  • radv: ensure export arguments are always float
  • +
  • ac/nir: implement 8-bit nir_load_const_instr
  • +
  • ac/nir: fix 64-bit nir_op_f2f16_rtz
  • +
  • ac/nir: make ac_build_clamp work on all bit sizes
  • +
  • ac/nir: make ac_build_isign work on all bit sizes
  • +
  • ac/nir: make ac_build_fdiv support 16-bit floats
  • +
  • ac/nir: implement half-float nir_op_frcp
  • +
  • ac/nir: implement half-float nir_op_frsq
  • +
  • ac/nir: implement half-float nir_op_ldexp
  • +
  • ac/nir: fix 16-bit ssbo stores
  • +
  • ac/nir: implement 8-bit push constant, ssbo and ubo loads
  • +
  • ac/nir: implement 8-bit ssbo stores
  • +
  • ac/nir: add 8-bit types to glsl_base_to_llvm_type
  • +
  • ac/nir: implement 8-bit conversions
  • +
  • radv: enable VK_KHR_8bit_storage
  • +
  • ac/nir: implement 16-bit pack/unpack opcodes
  • +
  • radv: lower 16-bit flrp
  • +
  • ac: add 16-bit support to ac_build_ddxy()
  • +
  • nir,ac/nir: fix cube_face_coord
  • +
  • gallium: add support for formatted image loads
  • +
  • mesa, glsl: add support for EXT_shader_image_load_formatted
  • +
  • st/mesa: add support for EXT_shader_image_load_formatted
  • +
  • vc4: fix build
  • +
  • ac,ac/nir: use a better sync scope for shared atomics
  • +
  • radv: fix set_output_usage_mask() with composite and 64-bit types
  • +
  • ac/nir: mark some texture intrinsics as convergent
  • +
+ +

Rob Clark (135):

+
    +
  • freedreno: fix release tarball
  • +
  • freedreno: more fixing release tarball
  • +
  • freedreno/a6xx: small compiler warning fix
  • +
  • freedreno/ir3: fix varying packing vs. tex sharp edge
  • +
  • freedreno/a6xx: move stream-out emit to helper
  • +
  • freedreno/a6xx: clean up some open-coded bits
  • +
  • freedreno/ir3: split out image helpers
  • +
  • freedreno/ir3: split out a4xx+ instructions
  • +
  • freedreno/ir3: fix ncomp for _store_image() src
  • +
  • freedreno/ir3: add image/ssbo <-> ibo/tex mapping
  • +
  • freedreno/ir3: add a6xx instruction encoding
  • +
  • freedreno/ir3: add a6xx+ SSBO/image support
  • +
  • freedreno/ir3: HIGH reg w/a for a6xx
  • +
  • freedreno/a6xx: border-color offset helper
  • +
  • freedreno/a6xx: image/ssbo state emit
  • +
  • freedreno/a6xx: compute support
  • +
  • freedreno/a6xx: cache flush harder
  • +
  • freedreno/a6xx: fix helper_invocation (sampler mask/id)
  • +
  • freedreno/ir3: handle quirky atomic dst for a6xx
  • +
  • freedreno/ir3: fix legalize for vecN inputs
  • +
  • freedreno/ir3: fix crash in compile fail case
  • +
  • freedreno/a6xx: 3d and cube image fixes
  • +
  • freedreno: fix crash w/ masked non-SSA dst
  • +
  • freedreno/ir3: rename put_dst()
  • +
  • freedreno/ir3/a6xx: fix load_ssbo barrier type.
  • +
  • freedreno/ir3: sync instr/disasm and add ldib encoding
  • +
  • freedreno/ir3/a6xx: use ldib for ssbo reads
  • +
  • freedreno/a6xx: samplerBuffer fixes
  • +
  • freedreno/a6xx: enable tiled images
  • +
  • freedreno: fix race condition
  • +
  • freedreno/ir3: don't hardcode wrmask
  • +
  • freedreno/a6xx: fix border-color offset
  • +
  • freedreno/a6xx: cube image fix
  • +
  • freedreno/a6xx: fix hangs with large shaders
  • +
  • freedreno/ir3: use nopN encoding when possible
  • +
  • freedreno/a6xx: fix ssbo alignment
  • +
  • freedreno/ir3/a6xx: fix non-ssa atomic dst
  • +
  • freedreno/a6xx: fix DRAW_IDX_INDIRECT max_indicies
  • +
  • freedreno/a6xx: vertex_id is not _zero_based
  • +
  • freedreno/ir3/a6xx: fix atomic shader outputs
  • +
  • freedreno/ir3: gsampler2DMSArray fixes
  • +
  • freedreno/ir3: include nopN in expanded instruction count
  • +
  • freedreno/ir3: add Sethi–Ullman numbering pass
  • +
  • freedreno/ir3: track register pressure in sched
  • +
  • freedreno: fix ir3_cmdline build
  • +
  • freedreno/a6xx: remove astc_srgb workaround
  • +
  • freedreno/a6xx: refactor fd6_tex_swiz()
  • +
  • freedreno/a6xx: fix border-color swizzles
  • +
  • freedreno/a6xx: perfcntrs
  • +
  • freedreno/ir3: fix ir3_cmdline harder
  • +
  • freedreno/ir3: turn on [iu]mul_high
  • +
  • freedreno/a6xx: more bcolor fixes
  • +
  • freedreno/ir3/cp: fix ldib bug
  • +
  • freedreno/ir3/a6xx: fix ssbo comp_swap
  • +
  • freedreno/ir3 better cat6 encoding detection
  • +
  • freedreno/ir3/ra: fix half-class conflicts
  • +
  • freedreno/ir3: fix sam.s2en decoding
  • +
  • freedreno/ir3: fix sam.s2en encoding
  • +
  • freedreno/ir3: fix regmask for merged regs
  • +
  • nir: move gls_type_get_{sampler,image}_count()
  • +
  • freedreno/ir3: find # of samplers from uniform vars
  • +
  • freedreno/ir3: enable indirect tex/samp (sam.s2en)
  • +
  • freedreno/ir3: optimize sam.s2en to sam
  • +
  • freedreno/ir3: additional lowering
  • +
  • freedreno/ir3: fix bit_count
  • +
  • freedreno/ir3: dynamic UBO indexing vs 64b pointers
  • +
  • freedreno/ir3: rename has_kill to no_earlyz
  • +
  • freedreno/ir3: disable early-z for SSBO/image writes
  • +
  • gallium: add PIPE_CAP_ESSL_FEATURE_LEVEL
  • +
  • mesa/st: use ESSL cap top enable gpu_shader5
  • +
  • freedreno: add ESSL cap
  • +
  • docs: update freedreno status
  • +
  • freedreno/a6xx: small cleanup
  • +
  • freedreno/ir3: sched fix
  • +
  • freedreno/ir3: reads/writes to unrelated arrays are not dependent
  • +
  • freedreno/ir3: align const size to vec4
  • +
  • nir: print var name for load_interpolated_input too
  • +
  • nir: add lower_all_io_to_elements
  • +
  • freedreno/ir3: re-indent comment
  • +
  • freedreno/ir3: rework varying packing
  • +
  • freedreno/ir3: add pass to move varying loads
  • +
  • freedreno/ir3: convert to "new style" frag inputs
  • +
  • gallium/docs: clarify set_sampler_views (v2)
  • +
  • iris: fix set_sampler_view
  • +
  • freedreno/ir3: fix const assert
  • +
  • freedreno/drm: update for robustness
  • +
  • freedreno: add robustness support
  • +
  • compiler: rename SYSTEM_VALUE_VARYING_COORD
  • +
  • freedreno/ir3: fix rgetpos decoding
  • +
  • freedreno/ir3: more emit-cat5 fixes
  • +
  • freedreno/ir3: cleanup instruction builder macros
  • +
  • freedreno: update generated headers
  • +
  • freedreno/ir3: lower load_barycentric_at_sample
  • +
  • freedreno/ir3: lower load_barycentric_at_offset
  • +
  • freedreno/ir3: remove bogus assert
  • +
  • freedreno/ir3: rename frag_vcoord -> ij_pixel
  • +
  • freedreno/a6xx: add VALIDREG/CONDREG helper macros
  • +
  • freedreno/ir3: fix load_interpolated_input slot
  • +
  • freedreno: wire up core sample-shading support
  • +
  • freedreno/ir3: sample-shading support
  • +
  • freedreno/a6xx: sample-shading support
  • +
  • docs/features: update GL too
  • +
  • freedreno/ir3: switch fragcoord to sysval
  • +
  • freedreno/a6xx: small texture emit cleanup
  • +
  • freedreno/a6xx: pre-bake UBWC flags in texture-view
  • +
  • freedreno/ir3: fixes for half reg in/out
  • +
  • freedreno/ir3: fix shader variants vs UBO analysis
  • +
  • freedreno/ir3: fix lowered ubo region alignment
  • +
  • freedreno/ir3: add IR3_SHADER_DEBUG flag to disable ubo lowering
  • +
  • freedreno/ir3: add some ubo range related asserts
  • +
  • nir: rework tex instruction printing
  • +
  • nir: fix lower_wpos_ytransform in load_frag_coord case
  • +
  • nir: add pass to lower fb reads
  • +
  • freedreno/drm: expose GMEM_BASE address
  • +
  • freedreno/ir3: fb read support
  • +
  • freedreno/a6xx: KHR_blend_equation_advanced support
  • +
  • freedreno/a6xx: smaller hammer for fb barrier
  • +
  • docs: mark KHR_blend_equation_advanced done on a6xx
  • +
  • nir: fix nir tex print harder
  • +
  • freedreno/ir3: remove assert
  • +
  • freedreno/a6xx: OUT_RELOC vs OUT_RELOCW fixes
  • +
  • freedreno: update generated headers
  • +
  • freedreno/a6xx: UBWC fixes
  • +
  • freedreno/a6xx: UBWC support for images
  • +
  • freedreno: mark imported resources as valid
  • +
  • freedreno/a6xx: buffer resources cannot be compressed
  • +
  • freedreno: move UBWC color offset to fd_resource_offset()
  • +
  • freedreno: add ubwc_enabled helper
  • +
  • freedreno/a6xx: deduplicate a few lines
  • +
  • freedreno: remove unused forward struct declaration
  • +
  • freedreno/ir3: fix rasterflat/glxgears
  • +
  • freedreno/ir3: set more barrier bits
  • +
  • freedreno/a6xx: fix GPU crash on small render targets
  • +
  • freedreno/a6xx: fix issues with gallium HUD
  • +
  • freedreno/a6xx: fix hangs with newer sqe fw
  • +
+ +

Rob Herring (2):

+
    +
  • kmsro: Add lima renderonly support
  • +
  • kmsro: Add platform support for exynos and sun4i
  • +
+ +

Rodrigo Vivi (1):

+
    +
  • intel: Add more PCI Device IDs for Coffee Lake and Ice Lake.
  • +
+ +

Roland Scheidegger (2):

+
    +
  • gallivm: fix bogus assert in get_indirect_index
  • +
  • gallivm: fix saturated signed add / sub with llvm 9
  • +
+ +

Romain Failliot (1):

+
    +
  • docs: changed "Done" to "DONE" in features.txt
  • +
+ +

Ross Burton (1):

+
    +
  • Revert "meson: drop GLESv1 .so version back to 1.0.0"
  • +
+ +

Ryan Houdek (1):

+
    +
  • panfrost: Adds Bifrost shader disassembler utility
  • +
+ +

Sagar Ghuge (10):

+
    +
  • iris: Don't allocate a BO per query object
  • +
  • nir/glsl: Add another way of doing lower_imul64 for gen8+
  • +
  • glsl: [u/i]mulExtended optimization for GLSL
  • +
  • nir/algebraic: Optimize low 32 bit extraction
  • +
  • spirv: Allow [i/u]mulExtended to use new nir opcode
  • +
  • iris: Refactor code to share 3DSTATE_URB_* packet
  • +
  • iris: Track last VS URB entry size
  • +
  • iris: Flag fewer dirty bits in BLORP
  • +
  • intel/fs: Remove unused condition from opt_algebraic case
  • +
  • intel/compiler: Fix assertions in brw_alu3
  • +
+ +

Samuel Iglesias Gonsálvez (4):

+
    +
  • isl: remove the cache line size alignment requirement
  • +
  • isl: the display engine requires 64B alignment for linear surfaces
  • +
  • radv: don't overwrite results in VkGetQueryPoolResults() when queries are not available
  • +
  • radv: write availability status vkGetQueryPoolResults() when the data is not available
  • +
+ +

Samuel Pitoiset (147):

+
    +
  • radv/winsys: fix hash when adding internal buffers
  • +
  • radv: fix build
  • +
  • radv: bail out when no image transitions will be performed
  • +
  • radv: remove unused radv_render_pass_attachment::view_mask
  • +
  • radv: remove useless MAYBE_UNUSED in CmdBeginRenderPass()
  • +
  • radv: add radv_cmd_buffer_begin_subpass() helper
  • +
  • radv: move subpass image transitions to radv_cmd_buffer_begin_subpass()
  • +
  • radv: store the list of attachments for every subpass
  • +
  • radv: use the new attachments array when starting subpasses
  • +
  • radv: determine the last subpass id for every attachments
  • +
  • radv: handle final layouts at end of every subpass and render pass
  • +
  • radv: move some render pass things to radv_render_pass_compile()
  • +
  • radv: add radv_render_pass_add_subpass_dep() helper
  • +
  • radv: track if subpasses have color attachments
  • +
  • radv: handle subpass dependencies correctly
  • +
  • radv: accumulate all ingoing external dependencies to the first subpass
  • +
  • radv: execute external subpass barriers after ending subpasses
  • +
  • radv: drop useless checks when resolving subpass color attachments
  • +
  • radv: do not set preserveAttachments for internal render passes
  • +
  • radv: don't flush src stages when dstStageMask == BOTTOM_OF_PIPE
  • +
  • radv: fix compiler issues with GCC 9
  • +
  • radv: gather more info about push constants
  • +
  • radv: gather if shaders load dynamic offsets separately
  • +
  • radv: keep track of the number of remaining user SGPRs
  • +
  • radv: add support for push constants inlining when possible
  • +
  • radv: fix using LOAD_CONTEXT_REG with old GFX ME firmwares on GFX8
  • +
  • radv/winsys: fix BO list creation when RADV_DEBUG=allbos is set
  • +
  • radv: always export gl_SampleMask when the fragment shader uses it
  • +
  • ac: make use of ac_build_expand_to_vec4() in visit_image_store()
  • +
  • radv: use MAX_{VBS,VERTEX_ATTRIBS} when defining max vertex input limits
  • +
  • radv: store vertex attribute formats as pipeline keys
  • +
  • radv: reduce the number of loaded channels for vertex input fetches
  • +
  • radv: fix radv_fixup_vertex_input_fetches()
  • +
  • radv: fix invalid element type when filling vertex input default values
  • +
  • ac: add ac_build_llvm8_tbuffer_load() helper
  • +
  • ac: use new LLVM 8 intrinsic when loading 16-bit values
  • +
  • radv: write the alpha channel of MRT0 when alpha coverage is enabled
  • +
  • radv: remove unused variable in gather_push_constant_info()
  • +
  • radv: fix writing the alpha channel of MRT0 when alpha coverage is enabled
  • +
  • radv: fix clearing attachments in secondary command buffers
  • +
  • radv: fix out-of-bounds access when copying descriptors BO list
  • +
  • radv: don't copy buffer descriptors list for samplers
  • +
  • rav: use 32_AR instead of 32_ABGR when alpha coverage is required
  • +
  • radv: allocate enough space in cmdbuf when starting a subpass
  • +
  • radv: properly align the fence and EOP bug VA on GFX9
  • +
  • radv: enable lower_mul_2x32_64
  • +
  • Revert "radv: execute external subpass barriers after ending subpasses"
  • +
  • radv: fix pointSizeRange limits
  • +
  • radv: set the maximum number of IBs per submit to 192
  • +
  • ac: rework typed buffers loads for LLVM 7
  • +
  • radv: store more vertex attribute infos as pipeline keys
  • +
  • radv: use typed buffer loads for vertex input fetches
  • +
  • ac: add ac_build_{struct,raw}_tbuffer_load() helpers
  • +
  • ac: use the raw tbuffer version for 16-bit SSBO loads
  • +
  • radv: always initialize HTILE when the src layout is UNDEFINED
  • +
  • radv: always load 3 channels for formats that need to be shuffled
  • +
  • ac: use llvm.amdgcn.fract intrinsic for nir_op_ffract
  • +
  • radv: fix binding transform feedback buffers
  • +
  • ac: make use of ac_get_store_intr_attribs() where possible
  • +
  • ac/nir: set attrib flags for SSBO and image store operations
  • +
  • ac: add ac_build_buffer_store_format() helper
  • +
  • ac/nir: remove one useless check in visit_store_ssbo()
  • +
  • ac/nir: use new LLVM 8 intrinsics for SSBO atomic operations
  • +
  • ac/nir: use ac_build_buffer_load() for SSBO load operations
  • +
  • ac/nir: use ac_build_buffer_store_dword() for SSBO store operations
  • +
  • ac: use new LLVM 8 intrinsics in ac_build_buffer_load()
  • +
  • ac: add ac_build_{struct,raw}_tbuffer_store() helpers
  • +
  • ac: use new LLVM 8 intrinsic when storing 16-bit values
  • +
  • ac: use new LLVM 8 intrinsics in ac_build_buffer_store_dword()
  • +
  • ac: add various int8 definitions
  • +
  • ac: add ac_build_tbuffer_load_byte() helper
  • +
  • ac: add ac_build_tbuffer_store_byte() helper
  • +
  • radv: add missing initializations since VK_EXT_pipeline_creation_feedback
  • +
  • ac: add f16_0 and f16_1 constants
  • +
  • ac: add 16-bit support fo fsign
  • +
  • ac: add 16-bit support to fract
  • +
  • ac: fix 16-bit shifts
  • +
  • ac: fix incorrect argument type for tbuffer.{load,store} with LLVM 7
  • +
  • nir: use generic float types for frexp_exp and frexp_sig
  • +
  • spirv,nir: lower frexp_exp/frexp_sig inside a new NIR pass
  • +
  • nir: add nir_{load,store}_deref_with_access() helpers
  • +
  • spirv: propagate the access flag for store and load derefs
  • +
  • ac: use llvm.amdgcn.fmed3 intrinsic for nir_op_fmed3
  • +
  • ac: add ac_build_frexp_mant() helper and 16-bit/32-bit support
  • +
  • ac: add ac_build_frex_exp() helper ans 16-bit/32-bit support
  • +
  • radv: do not lower frexp_exp and frexp_sig
  • +
  • radv: enable VK_AMD_gpu_shader_int16
  • +
  • radv: skip updating depth/color metadata for conditional rendering
  • +
  • radv: do not always initialize HTILE in compressed state
  • +
  • ac: fix return type for llvm.amdgcn.frexp.exp.i32.64
  • +
  • ac/nir: fix nir_op_b2i16
  • +
  • ac: fix ac_build_bit_count() for 16-bit integer type
  • +
  • ac: fix ac_build_bitfield_reverse() for 16-bit integer type
  • +
  • ac: fix ac_find_lsb() for 16-bit integer type
  • +
  • ac: fix ac_build_umsb() for 16-bit integer type
  • +
  • ac/nir: add support for nir_op_b2i8
  • +
  • ac: add 8-bit support to ac_build_bit_count()
  • +
  • ac: add 8-bit support to ac_find_lsb()
  • +
  • ac: add 8-bit support to ac_build_umsb()
  • +
  • ac: add 8-bit and 64-bit support to ac_build_bitfield_reverse()
  • +
  • radv: partially enable VK_KHR_shader_float16_int8
  • +
  • nir: do not pack varying with different types
  • +
  • ac/nir: fix intrinsic names for atomic operations with LLVM 9+
  • +
  • radv: fix getting the vertex strides if the bindings aren't contiguous
  • +
  • ac/nir: fix nir_op_b2f16
  • +
  • radv: enable VK_AMD_gpu_shader_half_float
  • +
  • wsi: allow to override the present mode with MESA_VK_WSI_PRESENT_MODE
  • +
  • ac/nir: make use of ac_build_imax() where possible
  • +
  • ac/nir: make use of ac_build_imin() where possible
  • +
  • ac/nir: make use of ac_build_umin() where possible
  • +
  • ac: add ac_build_umax() and use it where possible
  • +
  • ac: add ac_build_ddxy_interp() helper
  • +
  • ac: add ac_build_load_helper_invocation() helper
  • +
  • ac/nir: remove useles LLVMGetUndef for nir_op_pack_64_2x32_split
  • +
  • ac/nir: remove useless integer cast in adjust_sample_index_using_fmask()
  • +
  • ac/nir: remove useless integer cast in visit_image_load()
  • +
  • ac/nir: remove some useless integer casts for ALU operations
  • +
  • spirv: add SpvCapabilityFloat16 support
  • +
  • radv: enable VK_KHR_shader_float16_int8
  • +
  • radv: set ACCESS_NON_READABLE on stores for copy/fill/clear meta shaders
  • +
  • radv: enable shaderInt8 on SI and CIK
  • +
  • radv: sort the shader capabilities alphabetically
  • +
  • ac/nir: use new LLVM 8 intrinsics for SSBO atomics except cmpswap
  • +
  • ac/nir: add 64-bit SSBO atomic operations support
  • +
  • radv: add VK_KHR_shader_atomic_int64 but disable it for now
  • +
  • ac: add support for more types with struct/raw LLVM intrinsics
  • +
  • ac: use struct/raw load intrinsics for 8-bit/16-bit int with LLVM 9+
  • +
  • ac: use struct/raw store intrinsics for 8-bit/16-bit int with LLVM 9+
  • +
  • ac/nir: only use the new raw/struct image atomic intrinsics with LLVM 9+
  • +
  • ac/nir: only use the new raw/struct SSBO atomic intrinsics with LLVM 9+
  • +
  • ac/nir: use the new raw/struct SSBO atomic intrisics for comp_swap
  • +
  • radv: add VK_NV_compute_shader_derivates support
  • +
  • radv: add missing VEGA20 chip in radv_get_device_name()
  • +
  • radv: do not need to force emit the TCS regs on Vega20
  • +
  • radv: fix color conversions for normalized uint/sint formats
  • +
  • radv: implement a workaround for VK_EXT_conditional_rendering
  • +
  • ac: tidy up ac_build_llvm8_tbuffer_{load,store}
  • +
  • radv: set WD_SWITCH_ON_EOP=1 when drawing primitives from a stream output buffer
  • +
  • radv: only need to force emit the TCS regs on Vega10 and Raven1
  • +
  • radv: fix radv_get_aspect_format() for D+S formats
  • +
  • radv: apply the indexing workaround for atomic buffer operations on GFX9
  • +
  • radv: fix setting the number of rectangles when it's dyanmic
  • +
  • radv: add a workaround for Monster Hunter World and LLVM 7&8
  • +
  • radv: allocate more space in the CS when emitting events
  • +
  • radv: do not use gfx fast depth clears for layered depth/stencil images
  • +
  • radv: fix alpha-to-coverage when there is unused color attachments
  • +
  • radv: fix setting CB_SHADER_MASK for dual source blending
  • +
+ +

Sergii Romantsov (4):

+
    +
  • dri: meson: do not prefix user provided dri-drivers-path
  • +
  • d3d: meson: do not prefix user provided d3d-drivers-path
  • +
  • i965,iris/blorp: do not blit 0-sizes
  • +
  • glsl: Fix input/output structure matching across shader stages
  • +
+ +

Sonny Jiang (1):

+
    +
  • radeonsi: use compute for clear_render_target when possible
  • +
+ +

Tapani Pälli (42):

+
    +
  • nir: add option to use scaling factor when sampling planes YUV lowering
  • +
  • dri: add P010, P012, P016 for 10bit/12bit/16bit YUV420 formats
  • +
  • intel/compiler: add scale_factors to sampler_prog_key_data
  • +
  • i965: add P0x formats and propagate required scaling factors
  • +
  • drirc/i965: add option to disable 565 configs and visuals
  • +
  • mesa: return NULL if we exceed MaxColorAttachments in get_fb_attachment
  • +
  • anv: anv: refactor error handling in anv_shader_bin_write_to_blob()
  • +
  • iris: add Android build
  • +
  • nir: initialize value in copy_prop_vars_block
  • +
  • nir: use nir_variable_create instead of open-coding the logic
  • +
  • android: add liblog to libmesa_intel_common build
  • +
  • android: make libbacktrace optional on USE_LIBBACKTRACE
  • +
  • iris: add libmesa_iris_gen8 library to the build
  • +
  • util: fix a warning when building against clang7 headers
  • +
  • anv: retain the is_array state in create_plane_tex_instr_implicit
  • +
  • anv: toggle on support for VK_EXT_ycbcr_image_arrays
  • +
  • anv: use anv_gem_munmap in block pool cleanup
  • +
  • anv: call blob_finish when done with it
  • +
  • nir: free dead_ctx in case of no progress
  • +
  • anv: destroy descriptor sets when pool gets destroyed
  • +
  • anv: release memory allocated by bo_heap when descriptor pool is destroyed
  • +
  • anv: release memory allocated by glsl types during spirv_to_nir
  • +
  • anv: revert "anv: release memory allocated by glsl types during spirv_to_nir"
  • +
  • i965: remove scaling factors from P010, P012
  • +
  • isl: fix automake build when sse41 is not supported
  • +
  • android: Build fixes for OMR1
  • +
  • iris: initialize num_cbufs
  • +
  • iris: mark switch case fallthrough
  • +
  • anv/radv: release memory allocated by glsl types during spirv_to_nir
  • +
  • st/mesa: fix compilation warning on storage_flags_to_buffer_flags
  • +
  • st/mesa: fix warnings about implicit conversion on enumeration type
  • +
  • spirv: fix a compiler warning
  • +
  • st/nir: run st_nir_opts after 64bit ops lowering
  • +
  • iris: move variable to the scope where it is being used
  • +
  • iris: move iris_flush_resource so we can call it from get_handle
  • +
  • iris: handle aux properly in iris_resource_get_handle
  • +
  • egl: setup fds array correctly when exporting dmabuf
  • +
  • compiler/glsl: handle case where we have multiple users for types
  • +
  • android/iris: fix driinfo header filename
  • +
  • nir: use braces around subobject in initializer
  • +
  • glsl: use empty brace initializer
  • +
  • anv: expose VK_EXT_queue_family_foreign on Android
  • +
+ +

Thomas Hellstrom (5):

+
    +
  • winsys/svga: Add an environment variable to force host-backed operation
  • +
  • winsys/svga: Enable the transfer_from_buffer GPU command for vgpu10
  • +
  • svga: Avoid bouncing buffer data in malloced buffers
  • +
  • winsys/svga: Update the drm interface file
  • +
  • winsys/svga: Don't abort on EBUSY errors from execbuffer
  • +
+ +

Timo Aaltonen (1):

+
    +
  • util/os_misc: Add check for PIPE_OS_HURD
  • +
+ +

Timothy Arceri (72):

+
    +
  • st/glsl_to_nir: remove dead local variables
  • +
  • ac/radv/radeonsi: add ac_get_num_physical_sgprs() helper
  • +
  • radv: take LDS into account for compute shader occupancy stats
  • +
  • util: move BITFIELD macros to util/macros.h
  • +
  • st/glsl_to_nir: call nir_remove_dead_variables() after lowing local indirects
  • +
  • nir: add support for marking used patches when packing varyings
  • +
  • nir: add glsl_type_is_32bit() helper
  • +
  • nir: add is_packing_supported_for_type() helper
  • +
  • nir: rewrite varying component packing
  • +
  • nir: prehash instruction in nir_instr_set_add_or_rewrite()
  • +
  • nir: turn ssa check into an assert
  • +
  • nir: turn an ssa check in nir_search into an assert
  • +
  • nir: remove simple dead if detection from nir_opt_dead_cf()
  • +
  • radeonsi/nir: set input_usage_mask properly
  • +
  • radeonsi/nir: set colors_read properly
  • +
  • radeonsi/nir: set shader_buffers_declared properly
  • +
  • st/nir: use NIR for asm programs
  • +
  • nir: remove non-ssa support from nir_copy_prop()
  • +
  • nir: clone instruction set rather than removing individual entries
  • +
  • nir: allow nir_lower_phis_to_scalar() on more src types
  • +
  • radeonsi: fix query buffer allocation
  • +
  • glsl: fix shader cache for packed param list
  • +
  • radeonsi/nir: move si_lower_nir() call into compiler thread
  • +
  • glsl: rename is_record() -> is_struct()
  • +
  • glsl: rename get_record_instance() -> get_struct_instance()
  • +
  • glsl: rename record_location_offset() -> struct_location_offset()
  • +
  • glsl: rename record_types -> struct_types
  • +
  • nir: rename glsl_type_is_struct() -> glsl_type_is_struct_or_ifc()
  • +
  • glsl/freedreno/panfrost: pass gl_context to the standalone compiler
  • +
  • glsl: use NIR function inlining for drivers that use glsl_to_nir()
  • +
  • i965: stop calling nir_lower_returns()
  • +
  • radeonsi/nir: stop calling nir_lower_returns()
  • +
  • st/glsl: start spilling out common st glsl conversion code
  • +
  • anv: add support for dumping shader info via VK_EXT_debug_report
  • +
  • nir: add guess trip count support to loop analysis
  • +
  • nir: add new partially_unrolled bool to nir_loop
  • +
  • nir: add partial loop unrolling support
  • +
  • nir: calculate trip count for more loops
  • +
  • nir: unroll some loops with a variable limit
  • +
  • nir: simplify the loop analysis trip count code a little
  • +
  • nir: add helper to return inversion op of a comparison
  • +
  • nir: add get_induction_and_limit_vars() helper to loop analysis
  • +
  • nir: pass nir_op to calculate_iterations()
  • +
  • nir: find induction/limit vars in iand instructions
  • +
  • st/glsl_to_nir: fix incorrect arrary access
  • +
  • radeonsi/nir: call some more var optimisation passes
  • +
  • ac/nir_to_llvm: add assert to emit_bcsel()
  • +
  • nir: only override previous alu during loop analysis if supported
  • +
  • nir: fix opt_if_loop_last_continue()
  • +
  • nir: add support for user defined loop control
  • +
  • spirv: make use of the loop control support in nir
  • +
  • nir: add support for user defined select control
  • +
  • spirv: make use of the select control support in nir
  • +
  • Revert "ac/nir: use new LLVM 8 intrinsics for SSBO atomic operations"
  • +
  • nir: propagate known constant values into the if-then branch
  • +
  • Revert "nir: propagate known constant values into the if-then branch"
  • +
  • nir/radv: remove restrictions on opt_if_loop_last_continue()
  • +
  • nir: initialise some variables in opt_if_loop_last_continue()
  • +
  • nir/i965/freedreno/vc4: add a bindless bool to type size functions
  • +
  • ac/nir_to_llvm: make get_sampler_desc() more generic and pass it the image intrinsic
  • +
  • ac/nir_to_llvm: add image bindless support
  • +
  • nir: fix packing components with arrays
  • +
  • radeonsi/nir: fix scanning of bindless images
  • +
  • st/mesa/radeonsi: fix race between destruction of types and shader compilation
  • +
  • nir: fix nir_remove_unused_varyings()
  • +
  • radeonsi/nir: create si_nir_opts() helper
  • +
  • radeonsi/nir: call radeonsi nir opts before the scan pass
  • +
  • util/drirc: add workarounds for bugs in Doom 3: BFG
  • +
  • radeonsi: add config entry for Counter-Strike Global Offensive
  • +
  • Revert "glx: Fix synthetic error generation in __glXSendError"
  • +
  • Revert "st/mesa: expose 0 shader binary formats for compat profiles for Qt"
  • +
  • st/glsl: make sure to propagate initialisers to driver storage
  • +
+ +

Timur Kristóf (19):

+
    +
  • radeonsi/nir: Use uniform location when calculating const_file_max.
  • +
  • iris: implement clearing render target and depth stencil
  • +
  • nir: Add ability for shaders to use window space coordinates.
  • +
  • tgsi_to_nir: Fix the TGSI ARR translation by converting the result to int.
  • +
  • tgsi_to_nir: Fix TGSI LIT translation by using flt.
  • +
  • tgsi_to_nir: Make the TGSI IF translation code more readable.
  • +
  • tgsi_to_nir: Split to smaller functions.
  • +
  • nir: Move nir_lower_uniforms_to_ubo to compiler/nir.
  • +
  • nir: Add multiplier argument to nir_lower_uniforms_to_ubo.
  • +
  • freedreno: Plumb pipe_screen through to irX_tgsi_to_nir.
  • +
  • tgsi_to_nir: Produce optimized NIR for a given pipe_screen.
  • +
  • tgsi_to_nir: Restructure system value loads.
  • +
  • tgsi_to_nir: Extract ttn_emulate_tgsi_front_face into its own function.
  • +
  • tgsi_to_nir: Support FACE and POSITION properly.
  • +
  • tgsi_to_nir: Improve interpolation modes.
  • +
  • tgsi_to_nir: Set correct location for uniforms.
  • +
  • radeonsi/nir: Only set window_space_position for vertex shaders.
  • +
  • iris: Face should be a system value.
  • +
  • gallium: fix autotools build of pipe_msm.la
  • +
+ +

Tobias Klausmann (1):

+
    +
  • vulkan/util: meson build - add wayland client include
  • +
+ +

Tomasz Figa (1):

+
    +
  • llvmpipe: Always return some fence in flush (v2)
  • +
+ +

Tomeu Vizoso (19):

+
    +
  • panfrost: Add gem_handle to panfrost_memory and panfrost_bo
  • +
  • panfrost: Add backend targeting the DRM driver
  • +
  • panfrost/midgard: Add support for MIDGARD_MESA_DEBUG
  • +
  • panfrost: Add support for PAN_MESA_DEBUG
  • +
  • panfrost: Set bo->size[0] in the DRM backend
  • +
  • panfrost: Set bo->gem_handle when creating a linear BO
  • +
  • panfrost: Adapt to uapi changes
  • +
  • panfrost: Fix sscanf format options
  • +
  • panfrost: Set the GEM handle for AFBC buffers
  • +
  • panfrost: Also tell the kernel about the checksum_slab
  • +
  • panfrost: Pass the context BOs to the kernel so they aren't unmapped while in use
  • +
  • panfrost: Wait for last job to finish in force_flush_fragment
  • +
  • panfrost: split asserts in pandecode
  • +
  • panfrost: Guard against reading past end of buffer
  • +
  • panfrost/ci: Initial commit
  • +
  • panfrost/midgard: Skip register allocation if there's no work to do
  • +
  • panfrost/midgard: Skip liveness analysis for instructions without dest
  • +
  • panfrost: Fix two uninitialized accesses in compiler
  • +
  • panfrost: Only take the fast paths on buffers aligned to block size
  • +
+ +

Toni Lönnberg (8):

+
    +
  • intel/genxml: Only handle instructions meant for render engine when generating headers
  • +
  • intel/genxml: Media instructions and structures for gen6
  • +
  • intel/genxml: Media instructions and structures for gen7
  • +
  • intel/genxml: Media instructions and structures for gen7.5
  • +
  • intel/genxml: Media instructions and structures for gen8
  • +
  • intel/genxml: Media instructions and structures for gen9
  • +
  • intel/genxml: Media instructions and structures for gen10
  • +
  • intel/genxml: Media instructions and structures for gen11
  • +
+ +

Topi Pohjolainen (2):

+
    +
  • intel/compiler/icl: Use tcs barrier id bits 24:30 instead of 24:27
  • +
  • intel/compiler/fs/icl: Use dummy masked urb write for tess eval
  • +
+ +

Vasily Khoruzhick (2):

+
    +
  • lima: use individual tile heap for each GP job.
  • +
  • lima: add support for depth/stencil fbo attachments and textures
  • +
+ +

Vinson Lee (5):

+
    +
  • gallium/auxiliary/vl: Fix duplicate symbol build errors.
  • +
  • nir: Fix anonymous union initialization with older GCC.
  • +
  • swr: Fix build with llvm-9.0.
  • +
  • gallium: Fix autotools build with libxatracker.la.
  • +
  • freedreno: Fix GCC build error.
  • +
+ +

Vivek Kasireddy (1):

+
    +
  • drm-uapi: Update headers from drm-next
  • +
+ +

Xavier Bouchoux (1):

+
    +
  • nir/spirv: Fix assert when unsampled OpTypeImage has unknown 'Depth'
  • +
+ +

Yevhenii Kolesnikov (1):

+
    +
  • i965: Fix allow_higher_compat_version workaround limited by OpenGL 3.0
  • +
+ +

coypu (1):

+
    +
  • gbm: don't return void
  • +
+ +

davidbepo (1):

+
    +
  • drirc: add Waterfox to adaptive-sync blacklist
  • +
+ +

grmat (1):

+
    +
  • drirc: add Spectacle, Falkon to a-sync blacklist
  • +
+ +

pal1000 (1):

+
    +
  • scons: Compatibility with Scons development version string
  • +
+ +

suresh guttula (3):

+
    +
  • vl: Add cropping flags for H264
  • +
  • radeon/vce:Add support for frame_cropping_flag of VAEncSequenceParameterBufferH264
  • +
  • st/va/enc: Add support for frame_cropping_flag of VAEncSequenceParameterBufferH264
  • +
+ +
diff --git a/docs/relnotes/19.1.1.html b/docs/relnotes/19.1.1.html new file mode 100644 index 00000000000..9d7426067ac --- /dev/null +++ b/docs/relnotes/19.1.1.html @@ -0,0 +1,154 @@ + + + + + Mesa Release Notes + + + + +
+

The Mesa 3D Graphics Library

+
+ + +
+ +

Mesa 19.1.1 Release Notes / June 25, 2019

+ +

+Mesa 19.1.1 is a bug fix release which fixes bugs found since the 19.1.0 release. +

+

+Mesa 19.1.1 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

+ +

SHA256 checksums

+
+72114b16b4a84373b2acda060fe2bb1d45ea2598efab3ef2d44bdeda74f15581  mesa-19.1.1.tar.xz
+
+ + +

New features

+

None

+ + +

Bug fixes

+ +
    + +
  • Bug 110709 - g_glxglvnddispatchfuncs.c and glxglvnd.c fail to build with clang 8.0
  • + +
  • Bug 110901 - mesa-19.1.0/src/util/futex.h:82: use of out of scope variable ?
  • + +
  • Bug 110902 - mesa-19.1.0/src/broadcom/compiler/vir_opt_redundant_flags.c:104]: (style) Same expression
  • + +
  • Bug 110921 - virgl on OpenGL 3.3 host regressed to OpenGL 2.1
  • + +
+ + +

Changes

+ +

Alejandro Piñeiro (1):

+
    +
  • v3d: fix checking twice auf flag
  • +
+ +

Bas Nieuwenhuizen (5):

+
    +
  • radv: Skip transitions coming from external queue.
  • +
  • radv: Decompress DCC when the image format is not allowed for buffers.
  • +
  • radv: Fix vulkan build in meson.
  • +
  • anv: Fix vulkan build in meson.
  • +
  • meson: Allow building radeonsi with just the android platform.
  • +
+ +

Dave Airlie (1):

+
    +
  • nouveau: fix frees in unsupported IR error paths.
  • +
+ +

Eduardo Lima Mitev (1):

+
    +
  • freedreno/a5xx: Fix indirect draw max_indices calculation
  • +
+ +

Eric Engestrom (3):

+
    +
  • util/futex: fix dangling pointer use
  • +
  • glx: fix glvnd pointer types
  • +
  • util/os_file: resize buffer to what was actually needed
  • +
+ +

Gert Wollny (1):

+
    +
  • virgl: Assume sRGB write control for older guest kernels or virglrenderer hosts
  • +
+ +

Haihao Xiang (1):

+
    +
  • i965: support UYVY for external import only
  • +
+ +

Jason Ekstrand (1):

+
    +
  • anv: Set STATE_BASE_ADDRESS upper bounds on gen7
  • +
+ +

Juan A. Suarez Romero (2):

+
    +
  • docs: Add SHA256 sums for 19.1.0
  • +
  • Update version to 19.1.1
  • +
+ +

Kenneth Graunke (2):

+
    +
  • glsl: Fix out of bounds read in shader_cache_read_program_metadata
  • +
  • iris: Fix iris_flush_and_dirty_history to actually dirty history.
  • +
+ +

Kevin Strasser (2):

+
    +
  • gallium/winsys/kms: Fix dumb buffer bpp
  • +
  • st/mesa: Add rgbx handling for fp formats
  • +
+ +

Lionel Landwerlin (2):

+
    +
  • anv: do not parse genxml data without INTEL_DEBUG=bat
  • +
  • intel/dump: fix segfault when the app hasn't accessed the device
  • +
+ +

Mathias Fröhlich (1):

+
    +
  • egl: Don't add hardware device if there is no render node v2.
  • +
+ +

Richard Thier (1):

+
    +
  • r300g: restore performance after RADEON_FLAG_NO_INTERPROCESS_SHARING was added
  • +
+ +

Rob Clark (1):

+
    +
  • freedreno/a6xx: un-swap X24S8_UINT
  • +
+ +

Samuel Pitoiset (4):

+
    +
  • radv: fix occlusion queries on VegaM
  • +
  • radv: fix VK_EXT_memory_budget if one heap isn't available
  • +
  • radv: fix FMASK expand with SRGB formats
  • +
  • radv: disable viewport clamping even if FS doesn't write Z
  • +
+ + +
+ + diff --git a/docs/relnotes/19.1.2.html b/docs/relnotes/19.1.2.html new file mode 100644 index 00000000000..b4ecf1d0e06 --- /dev/null +++ b/docs/relnotes/19.1.2.html @@ -0,0 +1,194 @@ + + + + + Mesa Release Notes + + + + +
+

The Mesa 3D Graphics Library

+
+ + +
+ +

Mesa 19.1.2 Release Notes / July 9, 2019

+ +

+Mesa 19.1.2 is a bug fix release which fixes bugs found since the 19.1.1 release. +

+

+Mesa 19.1.2 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

+ +

SHA256 checksums

+
+813a144ea8ebefb7b48b6733f3f603855b0f61268d86cc1cc26a6b4be908fcfd  mesa-19.1.2.tar.xz
+
+ + +

New features

+

None

+ + +

Bug fixes

+ +
    + +
  • Bug 110702 - segfault in radeonsi HEVC hardware decoding with yuv420p10le
  • + +
  • Bug 110783 - Mesa 19.1 rc crashing MPV with VAAPI
  • + +
  • Bug 110944 - [Bisected] Blender 2.8 crashes when closing certain windows
  • + +
  • Bug 110953 - Adding a redundant single-iteration do-while loop causes different image to be rendered
  • + +
  • Bug 110999 - 19.1.0: assert in vkAllocateDescriptorSets using immutable samplers on Ivy Bridge
  • + +
  • Bug 111019 - radv doesn't handle variable descriptor count properly
  • + +
+ + +

Changes

+ +

Anuj Phogat (3):

+
    +
  • Revert "i965/icl: Add WA_2204188704 to disable pixel shader panic dispatch"
  • +
  • Revert "anv/icl: Add WA_2204188704 to disable pixel shader panic dispatch"
  • +
  • Revert "iris/icl: Add WA_2204188704 to disable pixel shader panic dispatch"
  • +
+ +

Arfrever Frehtes Taifersar Arahesis (1):

+
    +
  • meson: Improve detection of Python when using Meson >=0.50.
  • +
+ +

Bas Nieuwenhuizen (2):

+
    +
  • radv: Only allocate supplied number of descriptors when variable.
  • +
  • radv: Fix interactions between variable descriptor count and inline uniform blocks.
  • +
+ +

Caio Marcelo de Oliveira Filho (1):

+
    +
  • spirv: Ignore ArrayStride in OpPtrAccessChain for Workgroup
  • +
+ +

Dylan Baker (2):

+
    +
  • meson: Add support for using cmake for finding LLVM
  • +
  • Revert "meson: Add support for using cmake for finding LLVM"
  • +
+ +

Eric Anholt (2):

+
    +
  • freedreno: Fix UBO load range detection on booleans.
  • +
  • freedreno: Fix up end range of unaligned UBO loads.
  • +
+ +

Eric Engestrom (1):

+
    +
  • meson: bump required libdrm version to 2.4.81
  • +
+ +

Gert Wollny (2):

+
    +
  • gallium: Add CAP for opcode DIV
  • +
  • vl: Use CS composite shader only if TEX_LZ and DIV are supported
  • +
+ +

Ian Romanick (1):

+
    +
  • glsl: Don't increase the iteration count when there are no terminators
  • +
+ +

James Clarke (1):

+
    +
  • meson: GNU/kFreeBSD has DRM/KMS and requires -D_GNU_SOURCE
  • +
+ +

Jason Ekstrand (2):

+
    +
  • anv/descriptor_set: Only write texture swizzles if we have an image view
  • +
  • iris: Use a uint16_t for key sizes
  • +
+ +

Jory Pratt (2):

+
    +
  • util: Heap-allocate 256K zlib buffer
  • +
  • meson: Search for execinfo.h
  • +
+ +

Juan A. Suarez Romero (3):

+
    +
  • docs: add sha256 checksums for 19.1.1
  • +
  • intel: fix wrong format usage
  • +
  • Update version to 19.1.2
  • +
+ +

Kenneth Graunke (2):

+
    +
  • iris: Enable PIPE_CAP_SURFACE_REINTERPRET_BLOCKS
  • +
  • gallium: Make util_copy_image_view handle shader_access
  • +
+ +

Lionel Landwerlin (2):

+
    +
  • intel/compiler: fix derivative on y axis implementation
  • +
  • intel/compiler: don't use byte operands for src1 on ICL
  • +
+ +

Nanley Chery (2):

+
    +
  • intel: Add and use helpers for level0 extent
  • +
  • isl: Don't align phys_level0_sa by block dimension
  • +
+ +

Nataraj Deshpande (1):

+
    +
  • anv: Add HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED in vk_format
  • +
+ +

Pierre-Eric Pelloux-Prayer (2):

+
    +
  • mesa: delete framebuffer texture attachment sampler views
  • +
  • radeon/uvd: fix calc_ctx_size_h265_main10
  • +
+ +

Rob Clark (1):

+
    +
  • freedreno/a5xx: fix batch leak in fd5 blitter path
  • +
+ +

Sagar Ghuge (1):

+
    +
  • glsl: Fix round64 conversion function
  • +
+ +

Samuel Pitoiset (1):

+
    +
  • radv: only enable VK_AMD_gpu_shader_{half_float,int16} on GFX9+
  • +
+ +

Sergii Romantsov (1):

+
    +
  • i965: leaking of upload-BO with push constants
  • +
+ +

Ville Syrjälä (1):

+
    +
  • anv/cmd_buffer: Reuse gen8 Cmd{Set, Reset}Event on gen7
  • +
+ + +
+ + diff --git a/docs/relnotes/19.1.3.html b/docs/relnotes/19.1.3.html new file mode 100644 index 00000000000..abf0a8949c6 --- /dev/null +++ b/docs/relnotes/19.1.3.html @@ -0,0 +1,191 @@ + + + + + Mesa Release Notes + + + + +
+

The Mesa 3D Graphics Library

+
+ + +
+ +

Mesa 19.1.3 Release Notes / July 23, 2019

+ +

+Mesa 19.1.3 is a bug fix release which fixes bugs found since the 19.1.2 release. +

+

+Mesa 19.1.3 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

+ +

SHA256 checksums

+
+845460b2225d15c15d4a9743dec798ff0b7396b533011d43e774e67f7825b7e0  mesa-19.1.3.tar.xz
+
+ + +

New features

+

None

+ + +

Bug fixes

+ +
    + +
  • Bug 109203 - [cfl dxvk] GPU Crash Launching Monopoly Plus (Iris Plus 655 / Wine + DXVK)
  • + +
  • Bug 109524 - "Invalid glsl version in shading_language_version()" when trying to run directX games using wine
  • + +
  • Bug 110309 - [icl][bisected] regression on piglit arb_gpu_shader_int 64.execution.fs-ishl-then-* tests
  • + +
  • Bug 110663 - threads_posix.h:96: undefined reference to `pthread_once'
  • + +
  • Bug 110955 - Mesa 18.2.8 implementation error: Invalid GLSL version in shading_language_version()
  • + +
  • Bug 111010 - Cemu Shader Cache Corruption Displaying Solid Color After commit 11e16ca7ce0
  • + +
  • Bug 111071 - SPIR-V shader processing fails with message about "extra dangling SSA sources"
  • + +
  • Bug 111075 - Processing of SPIR-V shader causes device hang, sometimes leading to system reboot
  • + +
  • Bug 111097 - Can not detect VK_ERROR_OUT_OF_DATE_KHR or VK_SUBOPTIMAL_KHR when window resizing
  • + +
+ + +

Changes

+ +

Bas Nieuwenhuizen (3):

+
    +
  • radv: Handle cmask being disallowed by addrlib.
  • +
  • anv: Add android dependencies on android.
  • +
  • radv: Only save the descriptor set if we have one.
  • +
+ +

Caio Marcelo de Oliveira Filho (2):

+
    +
  • anv: Fix pool allocator when first alloc needs to grow
  • +
  • spirv: Fix stride calculation when lowering Workgroup to offsets
  • +
+ +

Chia-I Wu (2):

+
    +
  • anv: fix VkExternalBufferProperties for unsupported handles
  • +
  • anv: fix VkExternalBufferProperties for host allocation
  • +
+ +

Connor Abbott (1):

+
    +
  • nir: Add a helper to determine if an intrinsic can be reordered
  • +
+ +

Dave Airlie (1):

+
    +
  • radv: fix crash in shader tracing.
  • +
+ +

Eric Anholt (1):

+
    +
  • freedreno: Fix assertion failures in context setup in shader-db mode.
  • +
+ +

Gert Wollny (1):

+
    +
  • softpipe: Remove unused static function
  • +
+ +

Ian Romanick (4):

+
    +
  • intel/vec4: Reswizzle VF immediates too
  • +
  • nir: Add unit tests for nir_opt_comparison_pre
  • +
  • nir: Use nir_src_bit_size instead of alu1->dest.dest.ssa.bit_size
  • +
  • mesa: Set minimum possible GLSL version
  • +
+ +

Jason Ekstrand (13):

+
    +
  • nir/instr_set: Expose nir_instrs_equal()
  • +
  • nir/loop_analyze: Fix phi-of-identical-alu detection
  • +
  • nir: Add more helpers for working with const values
  • +
  • nir/loop_analyze: Handle bit sizes correctly in calculate_iterations
  • +
  • nir/loop_analyze: Bail if we encounter swizzles
  • +
  • anv: Set Stateless Data Port Access MOCS
  • +
  • nir/opt_if: Clean up single-src phis in opt_if_loop_terminator
  • +
  • nir,intel: Add support for lowering 64-bit nir_opt_extract_*
  • +
  • anv: Account for dynamic stencil write disables in the PMA fix
  • +
  • nir/regs_to_ssa: Handle regs in phi sources properly
  • +
  • nir/loop_analyze: Refactor detection of limit vars
  • +
  • nir: Add some helpers for chasing SSA values properly
  • +
  • nir/loop_analyze: Properly handle swizzles in loop conditions
  • +
+ +

Juan A. Suarez Romero (2):

+
    +
  • docs: add sha256 checksums for 19.1.2
  • +
  • Update version to 19.1.3
  • +
+ +

Lepton Wu (1):

+
    +
  • virgl: Set meta data for textures from handle.
  • +
+ +

Lionel Landwerlin (6):

+
    +
  • vulkan/overlay: fix command buffer stats
  • +
  • vulkan/overlay: fix crash on freeing NULL command buffer
  • +
  • anv: fix crash in vkCmdClearAttachments with unused attachment
  • +
  • vulkan/wsi: update swapchain status on vkQueuePresent
  • +
  • anv: report timestampComputeAndGraphics true
  • +
  • anv: fix format mapping for depth/stencil formats
  • +
+ +

Marek Olšák (1):

+
    +
  • radeonsi: don't set READ_ONLY for const_uploader to fix bindless texture hangs
  • +
+ +

Samuel Iglesias Gonsálvez (1):

+
    +
  • anv: fix alphaToCoverage when there is no color attachment
  • +
+ +

Samuel Pitoiset (1):

+
    +
  • radv: fix VGT_GS_MODE if VS uses the primitive ID
  • +
+ +

Sergii Romantsov (1):

+
    +
  • meta: memory leak of CopyPixels usage
  • +
+ +

Timothy Arceri (1):

+
    +
  • mesa: save/restore SSO flag when using ARB_get_program_binary
  • +
+ +

Vinson Lee (1):

+
    +
  • meson: Add dep_thread dependency.
  • +
+ +

Yevhenii Kolesnikov (1):

+
    +
  • meta: leaking of BO with DrawPixels
  • +
+ + +
+ + diff --git a/docs/relnotes/19.1.4.html b/docs/relnotes/19.1.4.html new file mode 100644 index 00000000000..690b49e7ee0 --- /dev/null +++ b/docs/relnotes/19.1.4.html @@ -0,0 +1,227 @@ + + + + + Mesa Release Notes + + + + +
+

The Mesa 3D Graphics Library

+
+ + +
+ +

Mesa 19.1.4 Release Notes / August 7, 2019

+ +

+Mesa 19.1.4 is a bug fix release which fixes bugs found since the 19.1.3 release. +

+

+Mesa 19.1.4 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

+ +

SHA256 checksums

+
+a6d268a7d9edcfd92b6da80f2e34e6e0a7baaa442efbeba2fc66c404943c6bfb  mesa-19.1.4.tar.xz
+
+ + +

New features

+

None

+ + +

Bug fixes

+ +
    + +
  • Bug 109203 - [cfl dxvk] GPU Crash Launching Monopoly Plus (Iris Plus 655 / Wine + DXVK)
  • + +
  • Bug 109524 - "Invalid glsl version in shading_language_version()" when trying to run directX games using wine
  • + +
  • Bug 110309 - [icl][bisected] regression on piglit arb_gpu_shader_int 64.execution.fs-ishl-then-* tests
  • + +
  • Bug 110663 - threads_posix.h:96: undefined reference to `pthread_once'
  • + +
  • Bug 110955 - Mesa 18.2.8 implementation error: Invalid GLSL version in shading_language_version()
  • + +
  • Bug 111010 - Cemu Shader Cache Corruption Displaying Solid Color After commit 11e16ca7ce0
  • + +
  • Bug 111071 - SPIR-V shader processing fails with message about "extra dangling SSA sources"
  • + +
  • Bug 111075 - Processing of SPIR-V shader causes device hang, sometimes leading to system reboot
  • + +
  • Bug 111097 - Can not detect VK_ERROR_OUT_OF_DATE_KHR or VK_SUBOPTIMAL_KHR when window resizing
  • + +
+ + +

Changes

+ +

Andres Rodriguez (1):

+
    +
  • radv: fix queries with WAIT_BIT returning VK_NOT_READY
  • +
+ +

Andrii Simiklit (2):

+
    +
  • intel/compiler: don't use a keyword struct for a class fs_reg
  • +
  • meson: add a warning for meson < 0.46.0
  • +
+ +

Arcady Goldmints-Orlov (1):

+
    +
  • anv: report HOST_ALLOCATION as supported for images
  • +
+ +

Bas Nieuwenhuizen (3):

+
    +
  • radv: Set correct metadata size for GFX9+.
  • +
  • radv: Take variable descriptor counts into account for buffer entries.
  • +
  • radv: Fix descriptor set allocation failure.
  • +
+ +

Boyuan Zhang (4):

+
    +
  • radeon/uvd: fix poc for hevc encode
  • +
  • radeon/vcn: fix poc for hevc encode
  • +
  • radeon/uvd: enable rate control for hevc encoding
  • +
  • radeon/vcn: enable rate control for hevc encoding
  • +
+ +

Caio Marcelo de Oliveira Filho (1):

+
    +
  • anv: Remove special allocation for anv_push_constants
  • +
+ +

Connor Abbott (1):

+
    +
  • nir: Allow qualifiers on copy_deref and image instructions
  • +
+ +

Daniel Schürmann (1):

+
    +
  • spirv: Fix order of barriers in SpvOpControlBarrier
  • +
+ +

Dave Airlie (1):

+
    +
  • st/nir: fix arb fragment stage conversion
  • +
+ +

Dylan Baker (1):

+
    +
  • meson: allow building all glx without any drivers
  • +
+ +

Emil Velikov (1):

+
    +
  • egl/drm: ensure the backing gbm is set before using it
  • +
+ +

Eric Anholt (1):

+
    +
  • freedreno: Fix data races with allocating/freeing struct ir3.
  • +
+ +

Eric Engestrom (5):

+
    +
  • nir: don't return void
  • +
  • util: fix no-op macro (bad number of arguments)
  • +
  • gallium+mesa: fix tgsi_semantic array type
  • +
  • scons+meson: suppress spammy build warning on MacOS
  • +
  • nir: remove explicit nir_intrinsic_index_flag values
  • +
+ +

Francisco Jerez (1):

+
    +
  • intel/ir: Fix CFG corruption in opt_predicated_break().
  • +
+ +

Ilia Mirkin (4):

+
    +
  • gallium/vl: fix compute tgsi shaders to not process undefined components
  • +
  • nv50,nvc0: update sampler/view bind functions to accept NULL array
  • +
  • nvc0: allow a non-user buffer to be bound at position 0
  • +
  • nv50/ir: handle insn not being there for definition of CVT arg
  • +
+ +

Jason Ekstrand (6):

+
    +
  • intel/fs: Stop stack allocating large arrays
  • +
  • anv: Disable transform feedback on gen7
  • +
  • isl/formats: R8G8B8_UNORM_SRGB isn't supported on HSW
  • +
  • anv: Don't claim support for 24 and 48-bit formats on IVB
  • +
  • intel/fs: Use ALIGN16 instructions for all derivatives on gen <= 7
  • +
  • intel/fs: Implement quad_swap_horizontal with a swizzle on gen7
  • +
+ +

Juan A. Suarez Romero (2):

+
    +
  • docs: add sha256 checksums for 19.1.3
  • +
  • Update version to 19.1.4
  • +
+ +

Kenneth Graunke (4):

+
    +
  • mesa: Fix ReadBuffers with pbuffers
  • +
  • egl: Quiet warning about front buffer rendering for pixmaps/pbuffers
  • +
  • egl: Make the 565 pbuffer-only config single buffered.
  • +
  • egl: Only expose 565 pbuffer configs if X can export them as DRI3 images
  • +
+ +

Lionel Landwerlin (5):

+
    +
  • anv: fix use of comma operator
  • +
  • nir: add access to image_deref intrinsics
  • +
  • spirv: wrap push ssa/pointer values
  • +
  • spirv: propagate access qualifiers through ssa & pointer
  • +
  • spirv: don't discard access set by vtn_pointer_dereference
  • +
+ +

Mark Menzynski (1):

+
    +
  • nvc0/ir: Fix assert accessing null pointer
  • +
+ +

Nataraj Deshpande (1):

+
    +
  • egl/android: Update color_buffers querying for buffer age
  • +
+ +

Nicolas Dufresne (1):

+
    +
  • egl: Also query modifiers when exporting DMABuf
  • +
+ +

Rhys Perry (1):

+
    +
  • ac/nir: fix txf_ms with an offset
  • +
+ +

Samuel Pitoiset (1):

+
    +
  • radv: fix crash in vkCmdClearAttachments with unused attachment
  • +
+ +

Tapani Pälli (1):

+
    +
  • mesa: add glsl_type ref to one_time_init and decref to atexit
  • +
+ +

Yevhenii Kolesnikov (1):

+
    +
  • main: Fix memleaks in mesa_use_program
  • +
+ + +
+ + diff --git a/docs/relnotes/19.1.5.html b/docs/relnotes/19.1.5.html new file mode 100644 index 00000000000..f83440e7df5 --- /dev/null +++ b/docs/relnotes/19.1.5.html @@ -0,0 +1,119 @@ + + + + + Mesa Release Notes + + + + +
+

The Mesa 3D Graphics Library

+
+ + +
+ +

Mesa 19.1.5 Release Notes / August 23, 2019

+ +

+Mesa 19.1.5 is a bug fix release which fixes bugs found since the 19.1.4 release. +

+

+Mesa 19.1.5 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

+ +

SHA256 checksums

+
+7b54e14e35c7251b171b4cf9d84cbc1d760eafe00132117db193454999cd6eb4  mesa-19.1.5.tar.xz
+
+ + +

New features

+

None

+ + +

Bug fixes

+ +
    + +
  • Bug 109630 - vkQuake flickering geometry under Intel
  • + +
  • Bug 110395 - Shadows are flickering in SuperTuxKart
  • + +
  • Bug 111113 - ANGLE BlitFramebufferTest.MultisampleDepthClear/ES3_OpenGL fails on Intel Ubuntu19.04
  • + +
  • Bug 111267 - [CM246] Flickering with multiple draw calls within the same graphics pipeline if a compute pipeline is present
  • + +
+ + +

Changes

+ +

Bas Nieuwenhuizen (4):

+
    +
  • radv: Do non-uniform lowering before bool lowering.
  • +
  • ac/nir: Use correct cast for readfirstlane and ptrs.
  • +
  • radv: Avoid binning RAVEN hangs.
  • +
  • radv: Avoid VEGA/RAVEN scissor bug in binning.
  • +
+ +

Danylo Piliaiev (1):

+
    +
  • i965: Emit a dummy MEDIA_VFE_STATE before switching from GPGPU to 3D
  • +
+ +

Eric Engestrom (1):

+
    +
  • util: fix mem leak of program path
  • +
+ +

Erik Faye-Lund (2):

+
    +
  • gallium/dump: add missing query-type to short-list
  • +
  • gallium/dump: add missing query-type to short-list
  • +
+ +

Greg V (2):

+
    +
  • anv: remove unused Linux-specific include
  • +
  • intel/perf: use MAJOR_IN_SYSMACROS/MAJOR_IN_MKDEV
  • +
+ +

Jason Ekstrand (1):

+
    +
  • anv: Emit a dummy MEDIA_VFE_STATE before switching from GPGPU to 3D
  • +
+ +

Juan A. Suarez Romero (3):

+
    +
  • docs: add sha256 checksums for 19.1.4
  • +
  • cherry-ignore: panfrost: Make ctx->job useful
  • +
  • Update version to 19.1.5
  • +
+ +

Marek Olšák (2):

+
    +
  • radeonsi: disable SDMA image copies on dGPUs to fix corruption in games
  • +
  • radeonsi: fix an assertion failure: assert(!res->b.is_shared)
  • +
+ +

Matt Turner (1):

+
    +
  • meson: Test for program_invocation_name
  • +
+ +

Sergii Romantsov (1):

+
    +
  • i965/clear: clear_value better precision
  • +
+ + +
+ + diff --git a/docs/relnotes/19.1.6.html b/docs/relnotes/19.1.6.html new file mode 100644 index 00000000000..bf86b5a56e1 --- /dev/null +++ b/docs/relnotes/19.1.6.html @@ -0,0 +1,132 @@ + + + + + Mesa Release Notes + + + + +
+

The Mesa 3D Graphics Library

+
+ + +
+ +

Mesa 19.1.6 Release Notes / September 3, 2019

+ +

+Mesa 19.1.6 is a bug fix release which fixes bugs found since the 19.1.5 release. +

+

+Mesa 19.1.6 implements the OpenGL 4.5 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.5. OpenGL +4.5 is only available if requested at context creation. +Compatibility contexts may report a lower version depending on each driver. +

+ +

SHA256 checksums

+
+2a369b7b48545c6486e7e44913ad022daca097c8bd937bf30dcf3f17a94d3496  mesa-19.1.6.tar.xz
+
+ + +

New features

+

None

+ + +

Bug fixes

+ +
    + +
  • Bug 104395 - [CTS] GTF-GL46.gtf32.GL3Tests.packed_pixels.packed_pixels tests fail on 32bit Mesa
  • + +
  • Bug 111213 - VA-API nouveau SIGSEGV and asserts
  • + +
  • Bug 111241 - Shadertoy shader causing hang
  • + +
  • Bug 111411 - SPIR-V shader leads to GPU hang, sometimes making machine unstable
  • + +
+ + +

Changes

+ +

Andres Rodriguez (1):

+
    +
  • radv: additional query fixes
  • +
+ +

Daniel Schürmann (1):

+
    +
  • nir/lcssa: handle deref instructions properly
  • +
+ +

Danylo Piliaiev (1):

+
    +
  • nir/loop_unroll: Prepare loop for unrolling in wrapper_unroll
  • +
+ +

Ian Romanick (2):

+
    +
  • nir/algrbraic: Don't optimize open-coded bitfield reverse when lowering is enabled
  • +
  • intel/compiler: Request bitfield_reverse lowering on pre-Gen7 hardware
  • +
+ +

Ilia Mirkin (1):

+
    +
  • gallium/vl: use compute preference for all multimedia, not just blit
  • +
+ +

Jonas Ådahl (1):

+
    +
  • wayland/egl: Ensure correct buffer size when allocating
  • +
+ +

Juan A. Suarez Romero (6):

+
    +
  • docs: add sha256 checksums for 19.1.5
  • +
  • cherry-ignore: add explicit 19.2 only nominations
  • +
  • cherry-ignore: iris: Replace devinfo->gen with GEN_GEN
  • +
  • cherry-ignore: iris: Update fast clear colors on Gen9 with direct immediate writes.
  • +
  • cherry-ignore: iris: Avoid unnecessary resolves on transfer maps
  • +
  • Update version to 19.1.6
  • +
+ +

Kenneth Graunke (6):

+
    +
  • iris: Fix broken aux.possible/sampler_usages bitmask handling
  • +
  • iris: Drop copy format hacks from copy region based transfer path.
  • +
  • iris: Fix large timeout handling in rel2abs()
  • +
  • util: Add a _mesa_i64roundevenf() helper.
  • +
  • mesa: Fix _mesa_float_to_unorm() on 32-bit systems.
  • +
  • intel/compiler: Fix src0/desc setter ordering
  • +
+ +

Marek Olšák (1):

+
    +
  • radeonsi: fix scratch buffer WAVESIZE setting leading to corruption
  • +
+ +

Paulo Zanoni (1):

+
    +
  • intel/fs: grab fail_msg from v32 instead of v16 when v32->run_cs fails
  • +
+ +

Pierre-Eric Pelloux-Prayer (1):

+
    +
  • glsl: replace 'x + (-x)' with constant 0
  • +
+ +

Tapani Pälli (1):

+
    +
  • egl: reset blob cache set/get functions on terminate
  • +
+ + +
+ + diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index af0ee9c5667..194b21eb2dd 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -766,7 +766,11 @@ struct __DRIuseInvalidateExtensionRec { #define __DRI_ATTRIB_YINVERTED 47 #define __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE 48 #define __DRI_ATTRIB_MUTABLE_RENDER_BUFFER 49 /* EGL_MUTABLE_RENDER_BUFFER_BIT_KHR */ -#define __DRI_ATTRIB_MAX 50 +#define __DRI_ATTRIB_RED_SHIFT 50 +#define __DRI_ATTRIB_GREEN_SHIFT 51 +#define __DRI_ATTRIB_BLUE_SHIFT 52 +#define __DRI_ATTRIB_ALPHA_SHIFT 53 +#define __DRI_ATTRIB_MAX 54 /* __DRI_ATTRIB_RENDER_TYPE */ #define __DRI_ATTRIB_RGBA_BIT 0x01 @@ -1053,6 +1057,7 @@ enum dri_loader_cap { * only BGRA ordering can be exposed. */ DRI_LOADER_CAP_RGBA_ORDERING, + DRI_LOADER_CAP_FP16, }; struct __DRIdri2LoaderExtensionRec { @@ -1293,6 +1298,8 @@ struct __DRIdri2ExtensionRec { #define __DRI_IMAGE_FORMAT_ABGR2101010 0x1011 #define __DRI_IMAGE_FORMAT_SABGR8 0x1012 #define __DRI_IMAGE_FORMAT_UYVY 0x1013 +#define __DRI_IMAGE_FORMAT_XBGR16161616F 0x1014 +#define __DRI_IMAGE_FORMAT_ABGR16161616F 0x1015 #define __DRI_IMAGE_USE_SHARE 0x0001 #define __DRI_IMAGE_USE_SCANOUT 0x0002 @@ -1338,6 +1345,8 @@ struct __DRIdri2ExtensionRec { #define __DRI_IMAGE_FOURCC_RGBX1010102 0x30335852 #define __DRI_IMAGE_FOURCC_BGRA1010102 0x30334142 #define __DRI_IMAGE_FOURCC_BGRX1010102 0x30335842 +#define __DRI_IMAGE_FOURCC_ABGR16161616F 0x48344241 +#define __DRI_IMAGE_FOURCC_XBGR16161616F 0x48344258 #define __DRI_IMAGE_FOURCC_YUV410 0x39565559 #define __DRI_IMAGE_FOURCC_YUV411 0x31315559 #define __DRI_IMAGE_FOURCC_YUV420 0x32315559 diff --git a/include/drm-uapi/drm_fourcc.h b/include/drm-uapi/drm_fourcc.h index bab20298f42..08b81ac6ebd 100644 --- a/include/drm-uapi/drm_fourcc.h +++ b/include/drm-uapi/drm_fourcc.h @@ -144,6 +144,17 @@ extern "C" { #define DRM_FORMAT_RGBA1010102 fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */ #define DRM_FORMAT_BGRA1010102 fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */ +/* + * Floating point 64bpp RGB + * IEEE 754-2008 binary16 half-precision float + * [15:0] sign:exponent:mantissa 1:5:10 + */ +#define DRM_FORMAT_XRGB16161616F fourcc_code('X', 'R', '4', 'H') /* [63:0] x:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_XBGR16161616F fourcc_code('X', 'B', '4', 'H') /* [63:0] x:B:G:R 16:16:16:16 little endian */ + +#define DRM_FORMAT_ARGB16161616F fourcc_code('A', 'R', '4', 'H') /* [63:0] A:R:G:B 16:16:16:16 little endian */ +#define DRM_FORMAT_ABGR16161616F fourcc_code('A', 'B', '4', 'H') /* [63:0] A:B:G:R 16:16:16:16 little endian */ + /* packed YCbCr */ #define DRM_FORMAT_YUYV fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */ #define DRM_FORMAT_YVYU fourcc_code('Y', 'V', 'Y', 'U') /* [31:0] Cb0:Y1:Cr0:Y0 8:8:8:8 little endian */ diff --git a/meson.build b/meson.build index f72bdc946bf..09a1d386b59 100644 --- a/meson.build +++ b/meson.build @@ -107,7 +107,7 @@ with_any_opengl = with_opengl or with_gles1 or with_gles2 # Only build shared_glapi if at least one OpenGL API is enabled with_shared_glapi = get_option('shared-glapi') and with_any_opengl -system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'dragonfly', 'linux'].contains(host_machine.system()) +system_has_kms_drm = ['openbsd', 'netbsd', 'freebsd', 'gnu/kfreebsd', 'dragonfly', 'linux'].contains(host_machine.system()) dri_drivers = get_option('dri-drivers') if dri_drivers.contains('auto') @@ -190,6 +190,12 @@ if cc.get_id() == 'intel' endif endif +#This message is needed until we bump meson version to 0.46 because of known 0.45.0 and 0.45.1 issue +#https://bugs.freedesktop.org/show_bug.cgi?id=109791 +if meson.version().version_compare('< 0.46.0') + warning('''Meson < 0.46 doesn't automatically define `NDEBUG`; please update meson to at least 0.46.''') +endif + with_gallium = gallium_drivers.length() != 0 and gallium_drivers != [''] if with_gallium and system_has_kms_drm @@ -244,6 +250,7 @@ endif if host_machine.system() == 'darwin' with_dri_platform = 'apple' + pre_args += '-DBUILDING_MESA' elif ['windows', 'cygwin'].contains(host_machine.system()) with_dri_platform = 'windows' elif system_has_kms_drm @@ -312,7 +319,7 @@ if with_glx == 'dri' endif endif -if not (with_dri or with_gallium or with_glx == 'xlib' or with_glx == 'gallium-xlib') +if not (with_dri or with_gallium or with_glx != 'disabled') with_gles1 = false with_gles2 = false with_opengl = false @@ -353,12 +360,12 @@ else with_egl = false endif -if with_egl and not (with_platform_drm or with_platform_surfaceless) +if with_egl and not (with_platform_drm or with_platform_surfaceless or with_platform_android) if with_gallium_radeonsi - error('RadeonSI requires drm or surfaceless platform when using EGL') + error('RadeonSI requires the drm, surfaceless or android platform when using EGL') endif if with_gallium_virgl - error('Virgl requires drm or surfaceless platform when using EGL') + error('Virgl requires the drm, surfaceless or android platform when using EGL') endif endif @@ -379,9 +386,7 @@ if with_glx != 'disabled' error('xlib conflicts with any dri driver') endif elif with_glx == 'dri' - if not with_dri - error('dri based GLX requires at least one DRI driver') - elif not with_shared_glapi + if not with_shared_glapi error('dri based GLX requires shared-glapi') endif endif @@ -754,7 +759,11 @@ if with_platform_haiku pre_args += '-DHAVE_HAIKU_PLATFORM' endif -prog_python = import('python3').find_python() +if meson.version().version_compare('>=0.50') + prog_python = import('python').find_installation('python3') +else + prog_python = import('python3').find_python() +endif has_mako = run_command( prog_python, '-c', ''' @@ -836,7 +845,7 @@ if cc.compiles('int foo(void) __attribute__((__noreturn__));', endif # TODO: this is very incomplete -if ['linux', 'cygwin', 'gnu'].contains(host_machine.system()) +if ['linux', 'cygwin', 'gnu', 'gnu/kfreebsd'].contains(host_machine.system()) pre_args += '-D_GNU_SOURCE' endif @@ -1040,7 +1049,7 @@ elif cc.has_header_symbol('sys/mkdev.h', 'major') pre_args += '-DMAJOR_IN_MKDEV' endif -foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h', 'dlfcn.h'] +foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h', 'dlfcn.h', 'execinfo.h'] if cc.compiles('#include <@0@>'.format(h), name : '@0@'.format(h)) pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify()) endif @@ -1052,6 +1061,13 @@ foreach f : ['strtof', 'mkostemp', 'posix_memalign', 'timespec_get', 'memfd_crea endif endforeach +if cc.has_header_symbol('errno.h', 'program_invocation_name', + args : '-D_GNU_SOURCE') + pre_args += '-DHAVE_PROGRAM_INVOCATION_NAME' +elif with_tools.contains('intel') + error('Intel tools require the program_invocation_name variable') +endif + # strtod locale support if cc.links(''' #define _GNU_SOURCE @@ -1163,7 +1179,7 @@ _drm_radeon_ver = '2.4.71' _drm_nouveau_ver = '2.4.66' _drm_etnaviv_ver = '2.4.89' _drm_intel_ver = '2.4.75' -_drm_ver = '2.4.75' +_drm_ver = '2.4.81' _libdrm_checks = [ ['intel', with_dri_i915 or with_gallium_i915], @@ -1258,6 +1274,7 @@ if _llvm != 'false' with_gallium_opencl or _llvm == 'true' ), static : not _shared_llvm, + method : 'config-tool', ) with_llvm = dep_llvm.found() endif diff --git a/scons/gallium.py b/scons/gallium.py index 61bbeb2399f..7dae036e5ad 100755 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -352,6 +352,7 @@ def generate(env): '_DARWIN_C_SOURCE', 'GLX_USE_APPLEGL', 'GLX_DIRECT_RENDERING', + 'BUILDING_MESA', ] else: cppdefines += [ diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 22b771db774..d0f69f5176d 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -3438,6 +3438,8 @@ ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef la LLVMConstInt(ctx->i32, i, 0), ""); } } + if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind) + return LLVMBuildIntToPtr(ctx->builder, ret, src_type, ""); return LLVMBuildBitCast(ctx->builder, ret, src_type, ""); } @@ -4016,7 +4018,7 @@ ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws) /* ws->result_reduce is already the correct value */ if (ws->enable_inclusive) - ws->result_inclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->src, ws->op); + ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op); if (ws->enable_exclusive) ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op); } diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index 69446863b95..6063411310b 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -151,13 +151,14 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, LLVMTargetRef target = ac_get_llvm_target(triple); snprintf(features, sizeof(features), - "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s", + "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s%s", HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling", tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "", tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "", tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "", - tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : ""); - + tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "", + tm_options & AC_TM_NO_LOAD_STORE_OPT ? ",-load-store-opt" : ""); + LLVMTargetMachineRef tm = LLVMCreateTargetMachine( target, triple, diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index 6d961c06f8a..ca00540da80 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -65,6 +65,7 @@ enum ac_target_machine_options { AC_TM_CHECK_IR = (1 << 5), AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6), AC_TM_CREATE_LOW_OPT = (1 << 7), + AC_TM_NO_LOAD_STORE_OPT = (1 << 8), }; enum ac_float_mode { diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 151e0d0f961..bbc2a522cef 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -38,6 +38,7 @@ struct ac_nir_context { struct ac_shader_abi *abi; gl_shader_stage stage; + shader_info *info; LLVMValueRef *ssa_defs; @@ -1395,6 +1396,22 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, } args->attributes = AC_FUNC_ATTR_READNONE; + bool cs_derivs = ctx->stage == MESA_SHADER_COMPUTE && + ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE; + if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) { + /* Prevent texture instructions with implicit derivatives from being + * sinked into branches. */ + switch (instr->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_lod: + args->attributes |= AC_FUNC_ATTR_CONVERGENT; + break; + default: + break; + } + } + return ac_build_image_opcode(&ctx->ac, args); } @@ -3730,7 +3747,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) goto write_result; } - if (args.offset && instr->op != nir_texop_txf) { + if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) { LLVMValueRef offset[3], pack; for (unsigned chan = 0; chan < 3; ++chan) offset[chan] = ctx->ac.i32_0; @@ -3864,7 +3881,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) args.coords[sample_chan], fmask_ptr); } - if (args.offset && instr->op == nir_texop_txf) { + if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) { int num_offsets = instr->src[offset_src].src.ssa->num_components; num_offsets = MIN2(num_offsets, instr->coord_components); for (unsigned i = 0; i < num_offsets; ++i) { @@ -4351,6 +4368,7 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, ctx.abi = abi; ctx.stage = nir->info.stage; + ctx.info = &nir->info; ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 532267343d1..2dd2a7f246e 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -129,21 +129,27 @@ if with_xlib_lease radv_flags += '-DVK_USE_PLATFORM_XLIB_XRANDR_EXT' endif +if with_platform_android + radv_flags += [ + '-DVK_USE_PLATFORM_ANDROID_KHR' + ] + libradv_files += files('radv_android.c') +endif + libvulkan_radeon = shared_library( 'vulkan_radeon', [libradv_files, radv_entrypoints, radv_extensions_c, amd_vk_format_table_c, sha1_h, xmlpool_options_h], include_directories : [ - inc_common, inc_amd, inc_amd_common, inc_compiler, inc_util, inc_vulkan_util, - inc_vulkan_wsi, + inc_common, inc_amd, inc_amd_common, inc_compiler, inc_util, inc_vulkan_wsi, ], link_with : [ - libamd_common, libamdgpu_addrlib, libvulkan_util, libvulkan_wsi, + libamd_common, libamdgpu_addrlib, libvulkan_wsi, libmesa_util, libxmlconfig ], dependencies : [ dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m, dep_valgrind, radv_deps, - idep_nir, + idep_nir, idep_vulkan_util, ], c_args : [c_vis_args, no_override_init_args, radv_flags], cpp_args : [cpp_vis_args, radv_flags], diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index b04c998fac2..90c6153e875 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -301,7 +301,6 @@ radv_cmd_buffer_destroy(struct radv_cmd_buffer *cmd_buffer) static VkResult radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) { - cmd_buffer->device->ws->cs_reset(cmd_buffer->cs); list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, @@ -326,6 +325,8 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->record_result = VK_SUCCESS; + memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings)); + for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) { cmd_buffer->descriptors[i].dirty = 0; cmd_buffer->descriptors[i].valid = 0; @@ -565,8 +566,8 @@ radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, for_each_bit(i, descriptors_state->valid) { struct radv_descriptor_set *set = descriptors_state->sets[i]; - data[i * 2] = (uintptr_t)set; - data[i * 2 + 1] = (uintptr_t)set >> 32; + data[i * 2] = (uint64_t)(uintptr_t)set; + data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32; } radv_emit_write_data_packet(cmd_buffer, va, MAX_SETS * 2, data); @@ -4663,6 +4664,9 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, assert(src_family == cmd_buffer->queue_family_index || dst_family == cmd_buffer->queue_family_index); + if (src_family == VK_QUEUE_FAMILY_EXTERNAL) + return; + if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER) return; @@ -4824,7 +4828,7 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer, radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18); + MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 21); /* Flags that only require a top-of-pipe event. */ VkPipelineStageFlags top_of_pipe_flags = diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index 17a2f3370c0..652a3b677d2 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -51,6 +51,7 @@ enum { RADV_DEBUG_CHECKIR = 0x200000, RADV_DEBUG_NOTHREADLLVM = 0x400000, RADV_DEBUG_NOBINNING = 0x800000, + RADV_DEBUG_NO_LOAD_STORE_OPT = 0x1000000, }; enum { diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c index 4e9c73c94d0..33615af9784 100644 --- a/src/amd/vulkan/radv_descriptor_set.c +++ b/src/amd/vulkan/radv_descriptor_set.c @@ -200,7 +200,7 @@ VkResult radv_CreateDescriptorSetLayout( break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: /* main descriptor + fmask descriptor + sampler */ - set_layout->binding[b].size = 32 + 32 * max_sampled_image_descriptors; + set_layout->binding[b].size = 96; binding_buffer_count = 1; alignment = 32; break; @@ -247,7 +247,8 @@ VkResult radv_CreateDescriptorSetLayout( /* Don't reserve space for the samplers if they're not accessed. */ if (set_layout->binding[b].immutable_samplers_equal) { - if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER && + max_sampled_image_descriptors <= 2) set_layout->binding[b].size -= 32; else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) set_layout->binding[b].size -= 16; @@ -476,8 +477,17 @@ radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_set **out_set) { struct radv_descriptor_set *set; + uint32_t buffer_count = layout->buffer_count; + if (variable_count) { + unsigned stride = 1; + if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_SAMPLER || + layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) + stride = 0; + buffer_count = layout->binding[layout->binding_count - 1].buffer_offset + + *variable_count * stride; + } unsigned range_offset = sizeof(struct radv_descriptor_set) + - sizeof(struct radeon_winsys_bo *) * layout->buffer_count; + sizeof(struct radeon_winsys_bo *) * buffer_count; unsigned mem_size = range_offset + sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count; @@ -502,7 +512,17 @@ radv_descriptor_set_create(struct radv_device *device, } set->layout = layout; - uint32_t layout_size = align_u32(layout->size, 32); + uint32_t layout_size = layout->size; + if (variable_count) { + assert(layout->has_variable_descriptors); + uint32_t stride = layout->binding[layout->binding_count - 1].size; + if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) + stride = 1; + + layout_size = layout->binding[layout->binding_count - 1].offset + + *variable_count * stride; + } + layout_size = align_u32(layout_size, 32); if (layout_size) { set->size = layout_size; @@ -776,9 +796,13 @@ VkResult radv_AllocateDescriptorSets( pDescriptorSets[i] = radv_descriptor_set_to_handle(set); } - if (result != VK_SUCCESS) + if (result != VK_SUCCESS) { radv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, i, pDescriptorSets); + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + pDescriptorSets[i] = VK_NULL_HANDLE; + } + } return result; } diff --git a/src/amd/vulkan/radv_descriptor_set.h b/src/amd/vulkan/radv_descriptor_set.h index 5fd19d94482..89be6e69068 100644 --- a/src/amd/vulkan/radv_descriptor_set.h +++ b/src/amd/vulkan/radv_descriptor_set.h @@ -104,7 +104,7 @@ radv_immutable_samplers(const struct radv_descriptor_set_layout *set, static inline unsigned radv_combined_image_descriptor_sampler_offset(const struct radv_descriptor_set_binding_layout *binding) { - return binding->size - ((!binding->immutable_samplers_equal) ? 32 : 0); + return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0); } static inline const struct radv_sampler_ycbcr_conversion * diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 10956ded66f..809675c44a0 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -464,6 +464,7 @@ static const struct debug_control radv_debug_options[] = { {"checkir", RADV_DEBUG_CHECKIR}, {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM}, {"nobinning", RADV_DEBUG_NOBINNING}, + {"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT}, {NULL, 0} }; @@ -510,6 +511,13 @@ radv_handle_per_app_options(struct radv_instance *instance, } else if (!strcmp(name, "DOOM_VFR")) { /* Work around a Doom VFR game bug */ instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS; + } else if (!strcmp(name, "MonsterHunterWorld.exe")) { + /* Workaround for a WaW hazard when LLVM moves/merges + * load/store memory operations. + * See https://reviews.llvm.org/D61313 + */ + if (HAVE_LLVM < 0x900) + instance->debug_flags |= RADV_DEBUG_NO_LOAD_STORE_OPT; } } @@ -1477,40 +1485,46 @@ radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice, * Note that the application heap usages are not really accurate (eg. * in presence of shared buffers). */ - if (vram_size) { - heap_usage = device->ws->query_value(device->ws, - RADEON_ALLOCATED_VRAM); - - heap_budget = vram_size - - device->ws->query_value(device->ws, RADEON_VRAM_USAGE) + - heap_usage; + for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) { + uint32_t heap_index = device->memory_properties.memoryTypes[i].heapIndex; - memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = heap_budget; - memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] = heap_usage; - } + switch (device->mem_type_indices[i]) { + case RADV_MEM_TYPE_VRAM: + heap_usage = device->ws->query_value(device->ws, + RADEON_ALLOCATED_VRAM); - if (visible_vram_size) { - heap_usage = device->ws->query_value(device->ws, - RADEON_ALLOCATED_VRAM_VIS); + heap_budget = vram_size - + device->ws->query_value(device->ws, RADEON_VRAM_USAGE) + + heap_usage; - heap_budget = visible_vram_size - - device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) + - heap_usage; + memoryBudget->heapBudget[heap_index] = heap_budget; + memoryBudget->heapUsage[heap_index] = heap_usage; + break; + case RADV_MEM_TYPE_VRAM_CPU_ACCESS: + heap_usage = device->ws->query_value(device->ws, + RADEON_ALLOCATED_VRAM_VIS); - memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_budget; - memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_usage; - } + heap_budget = visible_vram_size - + device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) + + heap_usage; - if (gtt_size) { - heap_usage = device->ws->query_value(device->ws, - RADEON_ALLOCATED_GTT); + memoryBudget->heapBudget[heap_index] = heap_budget; + memoryBudget->heapUsage[heap_index] = heap_usage; + break; + case RADV_MEM_TYPE_GTT_WRITE_COMBINE: + heap_usage = device->ws->query_value(device->ws, + RADEON_ALLOCATED_GTT); - heap_budget = gtt_size - - device->ws->query_value(device->ws, RADEON_GTT_USAGE) + - heap_usage; + heap_budget = gtt_size - + device->ws->query_value(device->ws, RADEON_GTT_USAGE) + + heap_usage; - memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] = heap_budget; - memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] = heap_usage; + memoryBudget->heapBudget[heap_index] = heap_budget; + memoryBudget->heapUsage[heap_index] = heap_usage; + break; + default: + break; + } } /* The heapBudget and heapUsage values must be zero for array elements diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index 23e76bfcc11..b1921f53ada 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -127,8 +127,8 @@ def __init__(self, name, ext_version, enable): Extension('VK_EXT_ycbcr_image_arrays', 1, True), Extension('VK_AMD_draw_indirect_count', 1, True), Extension('VK_AMD_gcn_shader', 1, True), - Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800'), - Extension('VK_AMD_gpu_shader_int16', 1, 'device->rad_info.chip_class >= VI'), + Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= GFX9 && HAVE_LLVM >= 0x0800'), + Extension('VK_AMD_gpu_shader_int16', 1, 'device->rad_info.chip_class >= GFX9'), Extension('VK_AMD_rasterization_order', 1, 'device->has_out_of_order_rast'), Extension('VK_AMD_shader_core_properties', 1, True), Extension('VK_AMD_shader_info', 1, True), diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c index 5af172c8e7f..d6a5872d2cc 100644 --- a/src/amd/vulkan/radv_formats.c +++ b/src/amd/vulkan/radv_formats.c @@ -547,7 +547,7 @@ static bool radv_is_storage_image_format_supported(struct radv_physical_device * } } -static bool radv_is_buffer_format_supported(VkFormat format, bool *scaled) +bool radv_is_buffer_format_supported(VkFormat format, bool *scaled) { const struct vk_format_description *desc = vk_format_description(format); unsigned data_format, num_format; @@ -559,7 +559,8 @@ static bool radv_is_buffer_format_supported(VkFormat format, bool *scaled) num_format = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format)); - *scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) || (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED); + if (scaled) + *scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) || (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED); return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID && num_format != ~0; } @@ -635,7 +636,8 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical const struct vk_format_description *desc = vk_format_description(format); bool blendable; bool scaled = false; - if (!desc) { + /* TODO: implement some software emulation of SUBSAMPLED formats. */ + if (!desc || desc->layout == VK_FORMAT_LAYOUT_SUBSAMPLED) { out_properties->linearTilingFeatures = linear; out_properties->optimalTilingFeatures = tiled; out_properties->bufferFeatures = buffer; @@ -655,6 +657,7 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical uint32_t tiling = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT | VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT; /* The subsampled formats have no support for linear filters. */ diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 92409d147f1..a5a7aa3767f 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -729,7 +729,8 @@ radv_query_opaque_metadata(struct radv_device *device, for (i = 0; i <= image->info.levels - 1; i++) md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8; md->size_metadata = (11 + image->info.levels - 1) * 4; - } + } else + md->size_metadata = 10 * 4; } void @@ -860,6 +861,11 @@ radv_image_alloc_cmask(struct radv_device *device, uint32_t clear_value_size = 0; radv_image_get_cmask_info(device, image, &image->cmask); + if (!image->cmask.size) + return; + + assert(image->cmask.alignment); + image->cmask.offset = align64(image->size, image->cmask.alignment); /* + 8 for storing the clear values */ if (!image->clear_value_offset) { diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c index ec4fc4a6d4b..0606d49392f 100644 --- a/src/amd/vulkan/radv_meta.c +++ b/src/amd/vulkan/radv_meta.c @@ -81,7 +81,7 @@ radv_meta_save(struct radv_meta_saved_state *state, if (state->flags & RADV_META_SAVE_DESCRIPTORS) { state->old_descriptor_set0 = descriptors_state->sets[0]; - if (!state->old_descriptor_set0) + if (!(descriptors_state->valid & 1) || !state->old_descriptor_set0) state->flags &= ~RADV_META_SAVE_DESCRIPTORS; } diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index 40ecfe001d1..15eac4793ba 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -650,6 +650,7 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer, if (radv_image_has_htile(iview->image) && iview->base_mip == 0 && iview->base_layer == 0 && + iview->layer_count == iview->image->info.array_size && radv_layout_is_htile_compressed(iview->image, layout, queue_mask) && radv_image_extent_compare(iview->image, &iview->extent)) return true; @@ -1575,6 +1576,9 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer, emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask); } } else { + if (!subpass->depth_stencil_attachment) + return; + const uint32_t pass_att = subpass->depth_stencil_attachment->attachment; if (pass_att == VK_ATTACHMENT_UNUSED) return; diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c index 8081057d9df..9b92f64dc89 100644 --- a/src/amd/vulkan/radv_meta_copy.c +++ b/src/amd/vulkan/radv_meta_copy.c @@ -187,6 +187,24 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, &pRegions[r].imageSubresource, pRegions[r].imageSubresource.aspectMask); + if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) { + uint32_t queue_mask = radv_image_queue_family_mask(image, + cmd_buffer->queue_family_index, + cmd_buffer->queue_family_index); + MAYBE_UNUSED bool compressed = radv_layout_dcc_compressed(image, layout, queue_mask); + if (compressed) { + radv_decompress_dcc(cmd_buffer, image, &(VkImageSubresourceRange) { + .aspectMask = pRegions[r].imageSubresource.aspectMask, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, + .layerCount = pRegions[r].imageSubresource.layerCount, + }); + } + img_bsurf.format = vk_format_for_size(vk_format_get_blocksize(img_bsurf.format)); + img_bsurf.current_layout = VK_IMAGE_LAYOUT_GENERAL; + } + struct radv_meta_blit2d_buffer buf_bsurf = { .bs = img_bsurf.bs, .format = img_bsurf.format, @@ -313,6 +331,24 @@ meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, &pRegions[r].imageSubresource, pRegions[r].imageSubresource.aspectMask); + if (!radv_is_buffer_format_supported(img_info.format, NULL)) { + uint32_t queue_mask = radv_image_queue_family_mask(image, + cmd_buffer->queue_family_index, + cmd_buffer->queue_family_index); + MAYBE_UNUSED bool compressed = radv_layout_dcc_compressed(image, layout, queue_mask); + if (compressed) { + radv_decompress_dcc(cmd_buffer, image, &(VkImageSubresourceRange) { + .aspectMask = pRegions[r].imageSubresource.aspectMask, + .baseMipLevel = pRegions[r].imageSubresource.mipLevel, + .levelCount = 1, + .baseArrayLayer = pRegions[r].imageSubresource.baseArrayLayer, + .layerCount = pRegions[r].imageSubresource.layerCount, + }); + } + img_info.format = vk_format_for_size(vk_format_get_blocksize(img_info.format)); + img_info.current_layout = VK_IMAGE_LAYOUT_GENERAL; + } + struct radv_meta_blit2d_buffer buf_info = { .bs = img_info.bs, .format = img_info.format, diff --git a/src/amd/vulkan/radv_meta_fmask_expand.c b/src/amd/vulkan/radv_meta_fmask_expand.c index f4b55328929..425f473fe7f 100644 --- a/src/amd/vulkan/radv_meta_fmask_expand.c +++ b/src/amd/vulkan/radv_meta_fmask_expand.c @@ -24,6 +24,7 @@ #include "radv_meta.h" #include "radv_private.h" +#include "vk_format.h" static nir_shader * build_fmask_expand_compute_shader(struct radv_device *device, int samples) @@ -132,7 +133,7 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = radv_image_to_handle(image), .viewType = radv_meta_get_view_type(image), - .format = image->vk_format, + .format = vk_format_no_srgb(image->vk_format), .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, diff --git a/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c b/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c index 77f2e6ac66e..5d771c2fc2e 100644 --- a/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c +++ b/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c @@ -156,6 +156,73 @@ convert_ycbcr(struct ycbcr_state *state, converted_channels[2], nir_imm_float(b, 1.0f)); } +static nir_ssa_def * +get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture) +{ + nir_builder *b = state->builder; + const struct glsl_type *type = texture->type; + nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1); + + tex->op = nir_texop_txs; + tex->sampler_dim = glsl_get_sampler_dim(type); + tex->is_array = glsl_sampler_type_is_array(type); + tex->is_shadow = glsl_sampler_type_is_shadow(type); + tex->dest_type = nir_type_int; + + tex->src[0].src_type = nir_tex_src_texture_deref; + tex->src[0].src = nir_src_for_ssa(&texture->dest.ssa); + + nir_ssa_dest_init(&tex->instr, &tex->dest, + nir_tex_instr_dest_size(tex), 32, NULL); + nir_builder_instr_insert(b, &tex->instr); + + return nir_i2f32(b, &tex->dest.ssa); +} + +static nir_ssa_def * +implicit_downsampled_coord(nir_builder *b, + nir_ssa_def *value, + nir_ssa_def *max_value, + int div_scale) +{ + return nir_fadd(b, + value, + nir_fdiv(b, + nir_imm_float(b, 1.0f), + nir_fmul(b, + nir_imm_float(b, div_scale), + max_value))); +} + +static nir_ssa_def * +implicit_downsampled_coords(struct ycbcr_state *state, + nir_ssa_def *old_coords) +{ + nir_builder *b = state->builder; + const struct radv_sampler_ycbcr_conversion *conversion = state->conversion; + nir_ssa_def *image_size = NULL; + nir_ssa_def *comp[4] = { NULL, }; + const struct vk_format_description *fmt_desc = vk_format_description(state->conversion->format); + const unsigned divisors[2] = {fmt_desc->width_divisor, fmt_desc->height_divisor}; + + for (int c = 0; c < old_coords->num_components; c++) { + if (c < ARRAY_SIZE(divisors) && divisors[c] > 1 && + conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) { + if (!image_size) + image_size = get_texture_size(state, state->tex_deref); + + comp[c] = implicit_downsampled_coord(b, + nir_channel(b, old_coords, c), + nir_channel(b, image_size, c), + divisors[c]); + } else { + comp[c] = nir_channel(b, old_coords, c); + } + } + + return nir_vec(b, comp, old_coords->num_components); +} + static nir_ssa_def * create_plane_tex_instr_implicit(struct ycbcr_state *state, uint32_t plane) @@ -163,10 +230,23 @@ create_plane_tex_instr_implicit(struct ycbcr_state *state, nir_builder *b = state->builder; nir_tex_instr *old_tex = state->origin_tex; nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs+ 1); - for (uint32_t i = 0; i < old_tex->num_srcs; i++) { tex->src[i].src_type = old_tex->src[i].src_type; - nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex); + + switch (old_tex->src[i].src_type) { + case nir_tex_src_coord: + if (plane && true/*state->conversion->chroma_reconstruction*/) { + assert(old_tex->src[i].src.is_ssa); + tex->src[i].src = + nir_src_for_ssa(implicit_downsampled_coords(state, + old_tex->src[i].src.ssa)); + break; + } + /* fall through */ + default: + nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex); + break; + } } tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane)); diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index d83f0bd547f..5201f46b3a8 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2019,16 +2019,34 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, assert(stride % type_size == 0); - if (!index) - index = ctx->ac.i32_0; + LLVMValueRef adjusted_index = index; + if (!adjusted_index) + adjusted_index = ctx->ac.i32_0; - index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); + adjusted_index = LLVMBuildMul(builder, adjusted_index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0)); list = LLVMBuildPointerCast(builder, list, ac_array_in_const32_addr_space(type), ""); - return ac_build_load_to_sgpr(&ctx->ac, list, index); + LLVMValueRef descriptor = ac_build_load_to_sgpr(&ctx->ac, list, adjusted_index); + + /* 3 plane formats always have same size and format for plane 1 & 2, so + * use the tail from plane 1 so that we can store only the first 16 bytes + * of the last plane. */ + if (desc_type == AC_DESC_PLANE_2) { + LLVMValueRef descriptor2 = radv_get_sampler_desc(abi, descriptor_set, base_index, constant_index, index, AC_DESC_PLANE_1,image, write, bindless); + + LLVMValueRef components[8]; + for (unsigned i = 0; i < 4; ++i) + components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i); + + for (unsigned i = 4; i < 8; ++i) + components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i); + descriptor = ac_build_gather_values(&ctx->ac, components, 8); + } + + return descriptor; } /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. @@ -3592,9 +3610,10 @@ ac_setup_rings(struct radv_shader_context *ctx) unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class, + gl_shader_stage stage, const struct nir_shader *nir) { - switch (nir->info.stage) { + switch (stage) { case MESA_SHADER_TESS_CTRL: return chip_class >= CIK ? 128 : 64; case MESA_SHADER_GEOMETRY: @@ -3605,6 +3624,8 @@ radv_nir_get_max_workgroup_size(enum chip_class chip_class, return 0; } + if (!nir) + return chip_class >= GFX9 ? 128 : 64; unsigned max_workgroup_size = nir->info.cs.local_size[0] * nir->info.cs.local_size[1] * nir->info.cs.local_size[2]; @@ -3671,7 +3692,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, for (int i = 0; i < shader_count; ++i) { ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size, radv_nir_get_max_workgroup_size(ctx.options->chip_class, - shaders[i])); + shaders[i]->info.stage, + shaders[i])); } create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2, diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 5383f00e754..cfa374cd437 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -524,7 +524,7 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline, col_format |= cf << (4 * i); } - if (!col_format && blend->need_src_alpha & (1 << 0)) { + if (!(col_format & 0xf) && blend->need_src_alpha & (1 << 0)) { /* When a subpass doesn't have any color attachments, write the * alpha channel of MRT0 when alpha coverage is enabled because * the depth attachment needs it. @@ -542,10 +542,13 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline, } } - blend->cb_shader_mask = ac_get_cb_shader_mask(col_format); - + /* The output for dual source blending should have the same format as + * the first output. + */ if (blend->mrt0_is_dual_src) col_format |= (col_format & 0xf) << 4; + + blend->cb_shader_mask = ac_get_cb_shader_mask(col_format); blend->spi_shader_col_format = col_format; } @@ -1417,11 +1420,13 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT); - if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) { + if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) { dynamic->discard_rectangle.count = discard_rectangle_info->discardRectangleCount; - typed_memcpy(dynamic->discard_rectangle.rectangles, - discard_rectangle_info->pDiscardRectangles, - discard_rectangle_info->discardRectangleCount); + if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) { + typed_memcpy(dynamic->discard_rectangle.rectangles, + discard_rectangle_info->pDiscardRectangles, + discard_rectangle_info->discardRectangleCount); + } } pipeline->dynamic_state.mask = states; @@ -2177,12 +2182,12 @@ void radv_create_shaders(struct radv_pipeline *pipeline, for (int i = 0; i < MESA_SHADER_STAGES; ++i) { if (nir[i]) { - NIR_PASS_V(nir[i], nir_lower_bool_to_int32); NIR_PASS_V(nir[i], nir_lower_non_uniform_access, nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access | nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access); + NIR_PASS_V(nir[i], nir_lower_bool_to_int32); } if (radv_can_dump_shader(device, modules[i], false)) @@ -2668,8 +2673,10 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs, break; case CHIP_RAVEN: case CHIP_RAVEN2: - context_states_per_bin = 6; - persistent_states_per_bin = 32; + /* The context states are affected by the scissor bug. */ + context_states_per_bin = pipeline->device->physical_device->has_scissor_bug ? 1 : 6; + /* 32 causes hangs for RAVEN. */ + persistent_states_per_bin = 16; fpovs_per_batch = 63; break; default: @@ -2706,7 +2713,6 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, const VkPipelineDepthStencilStateCreateInfo *vkds = pCreateInfo->pDepthStencilState; RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; - struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; struct radv_render_pass_attachment *attachment = NULL; uint32_t db_depth_control = 0, db_stencil_control = 0; uint32_t db_render_control = 0, db_render_override2 = 0; @@ -2755,8 +2761,7 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); - if (!pCreateInfo->pRasterizationState->depthClampEnable && - ps->info.info.ps.writes_z) { + if (!pCreateInfo->pRasterizationState->depthClampEnable) { /* From VK_EXT_depth_range_unrestricted spec: * * "The behavior described in Primitive Clipping still applies. @@ -2927,8 +2932,11 @@ radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline) { const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); - uint32_t vgt_primitiveid_en = false; + const struct radv_shader_variant *vs = + pipeline->shaders[MESA_SHADER_TESS_EVAL] ? + pipeline->shaders[MESA_SHADER_TESS_EVAL] : + pipeline->shaders[MESA_SHADER_VERTEX]; uint32_t vgt_gs_mode = 0; if (radv_pipeline_has_gs(pipeline)) { @@ -2937,7 +2945,7 @@ radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs, vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out, pipeline->device->physical_device->rad_info.chip_class); - } else if (outinfo->export_prim_id) { + } else if (outinfo->export_prim_id || vs->info.info.uses_prim_id) { vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A); vgt_primitiveid_en = true; } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index df85d0cf889..31c829d345b 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1456,6 +1456,7 @@ uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *de int first_non_void); uint32_t radv_translate_buffer_numformat(const struct vk_format_description *desc, int first_non_void); +bool radv_is_buffer_format_supported(VkFormat format, bool *scaled); uint32_t radv_translate_colorformat(VkFormat format); uint32_t radv_translate_color_numformat(VkFormat format, const struct vk_format_description *desc, @@ -1993,6 +1994,7 @@ void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, const struct radv_nir_compiler_options *options); unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class, + gl_shader_stage stage, const struct nir_shader *nir); /* radv_shader_info.h */ diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 63a2ab773a8..08314e09a9f 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -40,18 +40,6 @@ static const int pipelinestat_block_size = 11 * 8; static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0, 8, 9, 10}; -static unsigned get_max_db(struct radv_device *device) -{ - unsigned num_db = device->physical_device->rad_info.num_render_backends; - MAYBE_UNUSED unsigned rb_mask = device->physical_device->rad_info.enabled_rb_mask; - - /* Otherwise we need to change the query reset procedure */ - assert(rb_mask == ((1ull << num_db) - 1)); - - return num_db; -} - - static nir_ssa_def *nir_test_flag(nir_builder *b, nir_ssa_def *flags, uint32_t flag) { return nir_i2b(b, nir_iand(b, flags, nir_imm_int(b, flag))); @@ -108,12 +96,14 @@ build_occlusion_query_shader(struct radv_device *device) { * uint64_t dst_offset = dst_stride * global_id.x; * bool available = true; * for (int i = 0; i < db_count; ++i) { - * uint64_t start = src_buf[src_offset + 16 * i]; - * uint64_t end = src_buf[src_offset + 16 * i + 8]; - * if ((start & (1ull << 63)) && (end & (1ull << 63))) - * result += end - start; - * else - * available = false; + * if (enabled_rb_mask & (1 << i)) { + * uint64_t start = src_buf[src_offset + 16 * i]; + * uint64_t end = src_buf[src_offset + 16 * i + 8]; + * if ((start & (1ull << 63)) && (end & (1ull << 63))) + * result += end - start; + * else + * available = false; + * } * } * uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4; * if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) { @@ -139,7 +129,8 @@ build_occlusion_query_shader(struct radv_device *device) { nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start"); nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end"); nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available"); - unsigned db_count = get_max_db(device); + unsigned enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask; + unsigned db_count = device->physical_device->rad_info.num_render_backends; nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags"); @@ -187,6 +178,16 @@ build_occlusion_query_shader(struct radv_device *device) { nir_ssa_def *current_outer_count = nir_load_var(&b, outer_counter); radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count)); + nir_ssa_def *enabled_cond = + nir_iand(&b, nir_imm_int(&b, enabled_rb_mask), + nir_ishl(&b, nir_imm_int(&b, 1), current_outer_count)); + + nir_if *enabled_if = nir_if_create(b.shader); + enabled_if->condition = nir_src_for_ssa(nir_i2b(&b, enabled_cond)); + nir_cf_node_insert(b.cursor, &enabled_if->cf_node); + + b.cursor = nir_after_cf_list(&enabled_if->then_list); + nir_ssa_def *load_offset = nir_imul(&b, current_outer_count, nir_imm_int(&b, 16)); load_offset = nir_iadd(&b, input_base, load_offset); @@ -1044,7 +1045,7 @@ VkResult radv_CreateQueryPool( switch(pCreateInfo->queryType) { case VK_QUERY_TYPE_OCCLUSION: - pool->stride = 16 * get_max_db(device); + pool->stride = 16 * device->physical_device->rad_info.num_render_backends; break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: pool->stride = pipelinestat_block_size * 2; @@ -1128,17 +1129,18 @@ VkResult radv_GetQueryPoolResults( if (flags & VK_QUERY_RESULT_WAIT_BIT) while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query)) ; - available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query); + available = *(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query); } switch (pool->type) { case VK_QUERY_TYPE_TIMESTAMP: { - available = *(uint64_t *)src != TIMESTAMP_NOT_READY; + volatile uint64_t const *src64 = (volatile uint64_t const *)src; + available = *src64 != TIMESTAMP_NOT_READY; if (flags & VK_QUERY_RESULT_WAIT_BIT) { - while (*(volatile uint64_t *)src == TIMESTAMP_NOT_READY) + while (*src64 == TIMESTAMP_NOT_READY) ; - available = *(uint64_t *)src != TIMESTAMP_NOT_READY; + available = true; } if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) @@ -1146,23 +1148,28 @@ VkResult radv_GetQueryPoolResults( if (flags & VK_QUERY_RESULT_64_BIT) { if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) - *(uint64_t*)dest = *(uint64_t*)src; + *(uint64_t*)dest = *src64; dest += 8; } else { if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) - *(uint32_t*)dest = *(uint32_t*)src; + *(uint32_t*)dest = *(volatile uint32_t*)src; dest += 4; } break; } case VK_QUERY_TYPE_OCCLUSION: { volatile uint64_t const *src64 = (volatile uint64_t const *)src; + uint32_t db_count = device->physical_device->rad_info.num_render_backends; + uint32_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask; uint64_t sample_count = 0; - int db_count = get_max_db(device); available = 1; for (int i = 0; i < db_count; ++i) { uint64_t start, end; + + if (!(enabled_rb_mask & (1 << i))) + continue; + do { start = src64[2 * i]; end = src64[2 * i + 1]; @@ -1193,8 +1200,8 @@ VkResult radv_GetQueryPoolResults( if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) result = VK_NOT_READY; - const uint64_t *start = (uint64_t*)src; - const uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size); + const volatile uint64_t *start = (uint64_t*)src; + const volatile uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size); if (flags & VK_QUERY_RESULT_64_BIT) { uint64_t *dst = (uint64_t*)dest; dest += util_bitcount(pool->pipeline_stats_mask) * 8; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 648fb6586f7..1f9fa487688 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -624,6 +624,8 @@ shader_variant_create(struct radv_device *device, tm_options |= AC_TM_SISCHED; if (options->check_ir) tm_options |= AC_TM_CHECK_IR; + if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT) + tm_options |= AC_TM_NO_LOAD_STORE_OPT; thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM); radv_init_llvm_once(); @@ -763,7 +765,7 @@ generate_shader_stats(struct radv_device *device, lds_increment); } else if (stage == MESA_SHADER_COMPUTE) { unsigned max_workgroup_size = - radv_nir_get_max_workgroup_size(chip_class, variant->nir); + radv_nir_get_max_workgroup_size(chip_class, stage, variant->nir); lds_per_wave = (conf->lds_size * lds_increment) / DIV_ROUND_UP(max_workgroup_size, 64); } diff --git a/src/broadcom/compiler/vir_opt_redundant_flags.c b/src/broadcom/compiler/vir_opt_redundant_flags.c index 61ebf5dfa24..8749f3cd647 100644 --- a/src/broadcom/compiler/vir_opt_redundant_flags.c +++ b/src/broadcom/compiler/vir_opt_redundant_flags.c @@ -102,7 +102,7 @@ vir_opt_redundant_flags_block(struct v3d_compile *c, struct qblock *block) vir_for_each_inst(inst, block) { if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || inst->qpu.flags.auf != V3D_QPU_UF_NONE || - inst->qpu.flags.auf != V3D_QPU_UF_NONE) { + inst->qpu.flags.muf != V3D_QPU_UF_NONE) { last_flags = NULL; continue; } diff --git a/src/compiler/Android.glsl.gen.mk b/src/compiler/Android.glsl.gen.mk index 3b94ea7bd2f..1308de2db97 100644 --- a/src/compiler/Android.glsl.gen.mk +++ b/src/compiler/Android.glsl.gen.mk @@ -90,8 +90,6 @@ $(intermediates)/glsl/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glsl/glcpp/glcpp-lex.l $(intermediates)/glsl/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glsl/glcpp/glcpp-parse.y $(call glsl_local-y-to-c-and-h) -$(LOCAL_PATH)/glsl/ir.h: $(intermediates)/glsl/ir_expression_operation.h - $(intermediates)/glsl/ir_expression_operation.h: $(LOCAL_PATH)/glsl/ir_expression_operation.py @mkdir -p $(dir $@) $(hide) $(MESA_PYTHON2) $< enum > $@ diff --git a/src/compiler/Android.glsl.mk b/src/compiler/Android.glsl.mk index 0aabafa2673..37b3cb80251 100644 --- a/src/compiler/Android.glsl.mk +++ b/src/compiler/Android.glsl.mk @@ -48,7 +48,7 @@ LOCAL_STATIC_LIBRARIES := \ libmesa_nir LOCAL_MODULE := libmesa_glsl - +LOCAL_CFLAGS += -Wno-error include $(LOCAL_PATH)/Android.glsl.gen.mk include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/compiler/Android.nir.gen.mk b/src/compiler/Android.nir.gen.mk index 894fb12c4be..26115f446a3 100644 --- a/src/compiler/Android.nir.gen.mk +++ b/src/compiler/Android.nir.gen.mk @@ -76,8 +76,6 @@ $(intermediates)/nir/nir_opcodes.h: $(nir_opcodes_h_deps) @mkdir -p $(dir $@) $(hide) $(MESA_PYTHON2) $(nir_opcodes_h_gen) $< > $@ -$(LOCAL_PATH)/nir/nir.h: $(intermediates)/nir/nir_opcodes.h - nir_opcodes_c_gen := $(LOCAL_PATH)/nir/nir_opcodes_c.py nir_opcodes_c_deps := \ $(LOCAL_PATH)/nir/nir_opcodes.py \ diff --git a/src/compiler/Android.nir.mk b/src/compiler/Android.nir.mk index 75a247a245d..59da5dbdc1c 100644 --- a/src/compiler/Android.nir.mk +++ b/src/compiler/Android.nir.mk @@ -41,6 +41,9 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary +LOCAL_CFLAGS := \ + -Wno-missing-braces + LOCAL_STATIC_LIBRARIES := libmesa_compiler LOCAL_MODULE := libmesa_nir diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 9bebc3d8867..005256725ff 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -244,6 +244,7 @@ NIR_FILES = \ nir/nir_lower_constant_initializers.c \ nir/nir_lower_double_ops.c \ nir/nir_lower_drawpixels.c \ + nir/nir_lower_fb_read.c \ nir/nir_lower_fragcoord_wtrans.c \ nir/nir_lower_frexp.c \ nir/nir_lower_global_vars_to_local.c \ diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl index 415dde3907c..c92577c4e4c 100644 --- a/src/compiler/glsl/float64.glsl +++ b/src/compiler/glsl/float64.glsl @@ -1681,17 +1681,22 @@ __fround64(uint64_t __a) if (unbiasedExp < 20) { if (unbiasedExp < 0) { + if ((aHi & 0x80000000u) != 0u && aLo == 0u) { + return 0; + } aHi &= 0x80000000u; - if (unbiasedExp == -1 && aLo != 0u) - aHi |= (1023u << 20); + if ((a.y & 0x000FFFFFu) == 0u && a.x == 0u) { + aLo = 0u; + return packUint2x32(uvec2(aLo, aHi)); + } + aHi = mix(aHi, (aHi | 0x3FF00000u), unbiasedExp == -1); aLo = 0u; } else { uint maskExp = 0x000FFFFFu >> unbiasedExp; - /* a is an integral value */ - if (((aHi & maskExp) == 0u) && (aLo == 0u)) - return __a; - + uint lastBit = maskExp + 1; aHi += 0x00080000u >> unbiasedExp; + if ((aHi & maskExp) == 0u) + aHi &= ~lastBit; aHi &= ~maskExp; aLo = 0u; } @@ -1708,9 +1713,7 @@ __fround64(uint64_t __a) aLo &= ~maskExp; } - a.x = aLo; - a.y = aHi; - return packUint2x32(a); + return packUint2x32(uvec2(aLo, aHi)); } uint64_t diff --git a/src/compiler/glsl/gl_nir_lower_buffers.c b/src/compiler/glsl/gl_nir_lower_buffers.c index b9195329f4a..595eb6d9bdf 100644 --- a/src/compiler/glsl/gl_nir_lower_buffers.c +++ b/src/compiler/glsl/gl_nir_lower_buffers.c @@ -48,7 +48,6 @@ get_block_array_index(nir_builder *b, nir_deref_instr *deref, if (nir_src_is_const(deref->arr.index)) { unsigned arr_index = nir_src_as_uint(deref->arr.index); - arr_index = MIN2(arr_index, arr_size - 1); /* We're walking the deref from the tail so prepend the array index */ block_name = ralloc_asprintf(b->shader, "[%u]%s", arr_index, diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index 1c095cb66f9..c951d9526ac 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -224,10 +224,12 @@ expanded_line: glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); _glcpp_parser_skip_stack_change_if (parser, & @1, "elif", $2.value); } -| LINE_EXPANDED integer_constant NEWLINE { +| LINE_EXPANDED expression NEWLINE { + if (parser->is_gles && $2.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); parser->has_new_line_number = 1; - parser->new_line_number = $2; - _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2); + parser->new_line_number = $2.value; + _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2.value); } | LINE_EXPANDED integer_constant integer_constant NEWLINE { parser->has_new_line_number = 1; @@ -238,6 +240,17 @@ expanded_line: "#line %" PRIiMAX " %" PRIiMAX "\n", $2, $3); } +| LINE_EXPANDED '(' expression ')' '(' expression ')' NEWLINE { + if (parser->is_gles && $3.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $3.undefined_macro); + if (parser->is_gles && $6.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $6.undefined_macro); + parser->has_new_line_number = 1; + parser->new_line_number = $3.value; + parser->has_new_source_number = 1; + parser->new_source_number = $6.value; + _mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX " %" PRIiMAX "\n", $3.value, $6.value); + } ; define: diff --git a/src/compiler/glsl/link_uniform_block_active_visitor.cpp b/src/compiler/glsl/link_uniform_block_active_visitor.cpp index 368981852c0..5bf0a8bc5a7 100644 --- a/src/compiler/glsl/link_uniform_block_active_visitor.cpp +++ b/src/compiler/glsl/link_uniform_block_active_visitor.cpp @@ -103,6 +103,8 @@ process_arrays(void *mem_ctx, ir_dereference_array *ir, if (*ub_array_ptr == NULL) { *ub_array_ptr = rzalloc(mem_ctx, struct uniform_block_array_elements); (*ub_array_ptr)->ir = ir; + (*ub_array_ptr)->total_num_array_elements = + ir->array->type->arrays_of_arrays_size(); } struct uniform_block_array_elements *ub_array = *ub_array_ptr; @@ -199,6 +201,7 @@ link_uniform_block_active_visitor::visit(ir_variable *var) (*ub_array)->array_elements, unsigned, (*ub_array)->num_array_elements); + (*ub_array)->total_num_array_elements = type->arrays_of_arrays_size(); for (unsigned i = 0; i < (*ub_array)->num_array_elements; i++) { (*ub_array)->array_elements[i] = i; diff --git a/src/compiler/glsl/link_uniform_block_active_visitor.h b/src/compiler/glsl/link_uniform_block_active_visitor.h index fbac65d5b67..462a2efdb9b 100644 --- a/src/compiler/glsl/link_uniform_block_active_visitor.h +++ b/src/compiler/glsl/link_uniform_block_active_visitor.h @@ -30,6 +30,15 @@ struct uniform_block_array_elements { unsigned *array_elements; unsigned num_array_elements; + /** + * Size of the array before array-trimming optimizations. + * + * Locations are only assigned to active array elements, but the location + * values are calculated as if all elements are active. The total number + * of elements in an array including the elements in arrays of arrays before + * inactive elements are removed is needed to be perform that calculation. + */ + unsigned total_num_array_elements; ir_dereference_array *ir; diff --git a/src/compiler/glsl/link_uniform_blocks.cpp b/src/compiler/glsl/link_uniform_blocks.cpp index 45f1c0fe98d..85a246cb7c4 100644 --- a/src/compiler/glsl/link_uniform_blocks.cpp +++ b/src/compiler/glsl/link_uniform_blocks.cpp @@ -222,7 +222,7 @@ static void process_block_array_leaf(const char *name, gl_uniform_block *blocks, gl_uniform_buffer_variable *variables, const struct link_uniform_block_active *const b, unsigned *block_index, - unsigned *binding_offset, + unsigned binding_offset, unsigned linearized_index, struct gl_context *ctx, struct gl_shader_program *prog); @@ -237,25 +237,28 @@ process_block_array(struct uniform_block_array_elements *ub_array, char **name, size_t name_length, gl_uniform_block *blocks, ubo_visitor *parcel, gl_uniform_buffer_variable *variables, const struct link_uniform_block_active *const b, - unsigned *block_index, unsigned *binding_offset, + unsigned *block_index, unsigned binding_offset, struct gl_context *ctx, struct gl_shader_program *prog, unsigned first_index) { for (unsigned j = 0; j < ub_array->num_array_elements; j++) { size_t new_length = name_length; + unsigned int element_idx = ub_array->array_elements[j]; /* Append the subscript to the current variable name */ - ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", - ub_array->array_elements[j]); + ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", element_idx); if (ub_array->array) { + unsigned boffset = binding_offset + (element_idx * + ub_array->array->total_num_array_elements); process_block_array(ub_array->array, name, new_length, blocks, parcel, variables, b, block_index, - binding_offset, ctx, prog, first_index); + boffset, ctx, prog, first_index); } else { + unsigned boffset = binding_offset + element_idx; process_block_array_leaf(*name, blocks, parcel, variables, b, block_index, - binding_offset, *block_index - first_index, + boffset, *block_index - first_index, ctx, prog); } } @@ -266,7 +269,7 @@ process_block_array_leaf(const char *name, gl_uniform_block *blocks, ubo_visitor *parcel, gl_uniform_buffer_variable *variables, const struct link_uniform_block_active *const b, - unsigned *block_index, unsigned *binding_offset, + unsigned *block_index, unsigned binding_offset, unsigned linearized_index, struct gl_context *ctx, struct gl_shader_program *prog) { @@ -283,7 +286,7 @@ process_block_array_leaf(const char *name, * block binding and each subsequent element takes the next consecutive * uniform block binding point. */ - blocks[i].Binding = (b->has_binding) ? b->binding + *binding_offset : 0; + blocks[i].Binding = (b->has_binding) ? b->binding + binding_offset : 0; blocks[i].UniformBufferSize = 0; blocks[i]._Packing = glsl_interface_packing(type->interface_packing); @@ -307,7 +310,6 @@ process_block_array_leaf(const char *name, (unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms); *block_index = *block_index + 1; - *binding_offset = *binding_offset + 1; } /* This function resizes the array types of the block so that later we can use @@ -370,7 +372,6 @@ create_buffer_blocks(void *mem_ctx, struct gl_context *ctx, if ((create_ubo_blocks && !b->is_shader_storage) || (!create_ubo_blocks && b->is_shader_storage)) { - unsigned binding_offset = 0; if (b->array != NULL) { char *name = ralloc_strdup(NULL, block_type->without_array()->name); @@ -378,12 +379,12 @@ create_buffer_blocks(void *mem_ctx, struct gl_context *ctx, assert(b->has_instance_name); process_block_array(b->array, &name, name_length, blocks, &parcel, - variables, b, &i, &binding_offset, ctx, prog, + variables, b, &i, 0, ctx, prog, i); ralloc_free(name); } else { process_block_array_leaf(block_type->name, blocks, &parcel, - variables, b, &i, &binding_offset, + variables, b, &i, 0, 0, ctx, prog); } } @@ -440,6 +441,7 @@ link_uniform_blocks(void *mem_ctx, GLSL_INTERFACE_PACKING_PACKED)) { b->type = resize_block_array(b->type, b->array); b->var->type = b->type; + b->var->data.max_array_access = b->type->length - 1; } block_size.num_active_uniforms = 0; diff --git a/src/compiler/glsl/loop_unroll.cpp b/src/compiler/glsl/loop_unroll.cpp index 874f4185681..7e97c3cddf1 100644 --- a/src/compiler/glsl/loop_unroll.cpp +++ b/src/compiler/glsl/loop_unroll.cpp @@ -180,6 +180,11 @@ loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) void *const mem_ctx = ralloc_parent(ir); loop_variable_state *const ls = this->state->get(ir); + /* If there are no terminators, then the loop iteration count must be 1. + * This is the 'do { } while (false);' case. + */ + assert(!ls->terminators.is_empty() || iterations == 1); + ir_instruction *first_ir = (ir_instruction *) ir->body_instructions.get_head(); @@ -221,7 +226,8 @@ loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) * the loop, or it the exit branch contains instructions. This ensures we * execute any instructions before the terminator or in its exit branch. */ - if (limit_if != first_ir->as_if() || exit_branch_has_instructions) + if (!ls->terminators.is_empty() && + (limit_if != first_ir->as_if() || exit_branch_has_instructions)) iterations++; for (int i = 0; i < iterations; i++) { diff --git a/src/compiler/glsl/opt_algebraic.cpp b/src/compiler/glsl/opt_algebraic.cpp index ff4be269578..3147d25aea8 100644 --- a/src/compiler/glsl/opt_algebraic.cpp +++ b/src/compiler/glsl/opt_algebraic.cpp @@ -507,6 +507,18 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) if (is_vec_zero(op_const[1])) return ir->operands[0]; + /* Replace (x + (-x)) with constant 0 */ + for (int i = 0; i < 2; i++) { + if (op_expr[i]) { + if (op_expr[i]->operation == ir_unop_neg) { + ir_rvalue *other = ir->operands[(i + 1) % 2]; + if (other && op_expr[i]->operands[0]->equals(other)) { + return ir_constant::zero(ir, ir->type); + } + } + } + } + /* Reassociate addition of constants so that we can do constant * folding. */ diff --git a/src/compiler/glsl/shader_cache.cpp b/src/compiler/glsl/shader_cache.cpp index 581098b88f0..97049043345 100644 --- a/src/compiler/glsl/shader_cache.cpp +++ b/src/compiler/glsl/shader_cache.cpp @@ -165,9 +165,8 @@ shader_cache_read_program_metadata(struct gl_context *ctx, prog->FragDataIndexBindings->iterate(create_binding_str, &buf); ralloc_asprintf_append(&buf, "tf: %d ", prog->TransformFeedback.BufferMode); for (unsigned int i = 0; i < prog->TransformFeedback.NumVarying; i++) { - ralloc_asprintf_append(&buf, "%s:%d ", - prog->TransformFeedback.VaryingNames[i], - prog->TransformFeedback.BufferStride[i]); + ralloc_asprintf_append(&buf, "%s ", + prog->TransformFeedback.VaryingNames[i]); } /* SSO has an effect on the linked program so include this when generating diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index 9938b3df450..8e5087e2e1a 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -50,7 +50,7 @@ glsl_type::glsl_type(GLenum gl_type, gl_type(gl_type), base_type(base_type), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - interface_packing(0), interface_row_major(row_major), + interface_packing(0), interface_row_major(row_major), packed(0), vector_elements(vector_elements), matrix_columns(matrix_columns), length(0), explicit_stride(explicit_stride) { @@ -85,7 +85,7 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, base_type(base_type), sampled_type(type), sampler_dimensionality(dim), sampler_shadow(shadow), sampler_array(array), interface_packing(0), - interface_row_major(0), + interface_row_major(0), packed(0), length(0), explicit_stride(0) { this->mem_ctx = ralloc_context(NULL); @@ -134,7 +134,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, base_type(GLSL_TYPE_INTERFACE), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), interface_packing((unsigned) packing), - interface_row_major((unsigned) row_major), + interface_row_major((unsigned) row_major), packed(0), vector_elements(0), matrix_columns(0), length(num_fields), explicit_stride(0) { @@ -159,7 +159,7 @@ glsl_type::glsl_type(const glsl_type *return_type, gl_type(0), base_type(GLSL_TYPE_FUNCTION), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - interface_packing(0), interface_row_major(0), + interface_packing(0), interface_row_major(0), packed(0), vector_elements(0), matrix_columns(0), length(num_params), explicit_stride(0) { @@ -188,7 +188,7 @@ glsl_type::glsl_type(const char *subroutine_name) : gl_type(0), base_type(GLSL_TYPE_SUBROUTINE), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - interface_packing(0), interface_row_major(0), + interface_packing(0), interface_row_major(0), packed(0), vector_elements(1), matrix_columns(1), length(0), explicit_stride(0) { @@ -534,7 +534,7 @@ glsl_type::glsl_type(const glsl_type *array, unsigned length, unsigned explicit_stride) : base_type(GLSL_TYPE_ARRAY), sampled_type(GLSL_TYPE_VOID), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - interface_packing(0), interface_row_major(0), + interface_packing(0), interface_row_major(0), packed(0), vector_elements(0), matrix_columns(0), length(length), name(NULL), explicit_stride(explicit_stride) { diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index a8faeb9c018..18aa44ab9c2 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -299,4 +299,16 @@ if with_tests link_with : libmesa_util, ) ) + + test( + 'comparison_pre', + executable( + 'comparison_pre', + files('tests/comparison_pre_tests.cpp'), + c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args], + include_directories : [inc_common], + dependencies : [dep_thread, idep_gtest, idep_nir], + link_with : libmesa_util, + ) + ) endif diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 5b75585498e..87a66d35b62 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -1204,6 +1204,41 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) return nir_foreach_dest(instr, visit_dest_indirect, &dest_state); } +nir_const_value +nir_const_value_for_float(double f, unsigned bit_size) +{ + nir_const_value v; + memset(&v, 0, sizeof(v)); + + switch (bit_size) { + case 16: + v.u16 = _mesa_float_to_half(f); + break; + case 32: + v.f32 = f; + break; + case 64: + v.f64 = f; + break; + default: + unreachable("Invalid bit size"); + } + + return v; +} + +double +nir_const_value_as_float(nir_const_value value, unsigned bit_size) +{ + switch (bit_size) { + case 16: return _mesa_half_to_float(value.u16); + case 32: return value.f32; + case 64: return value.f64; + default: + unreachable("Invalid bit size"); + } +} + int64_t nir_src_comp_as_int(nir_src src, unsigned comp) { @@ -1997,6 +2032,8 @@ void nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src, bool bindless) { + enum gl_access_qualifier access = nir_intrinsic_access(intrin); + switch (intrin->intrinsic) { #define CASE(op) \ case nir_intrinsic_image_deref_##op: \ @@ -2028,7 +2065,7 @@ nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src, nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type)); nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type)); - nir_intrinsic_set_access(intrin, var->data.image.access); + nir_intrinsic_set_access(intrin, access | var->data.image.access); nir_intrinsic_set_format(intrin, var->data.image.format); nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 37161e83e4d..2c5abe47220 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -140,6 +140,106 @@ typedef union { arr[i] = c[i].m; \ } while (false) +static inline nir_const_value +nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size) +{ + nir_const_value v; + memset(&v, 0, sizeof(v)); + + switch (bit_size) { + case 1: v.b = x; break; + case 8: v.u8 = x; break; + case 16: v.u16 = x; break; + case 32: v.u32 = x; break; + case 64: v.u64 = x; break; + default: + unreachable("Invalid bit size"); + } + + return v; +} + +static inline nir_const_value +nir_const_value_for_int(int64_t i, unsigned bit_size) +{ + nir_const_value v; + memset(&v, 0, sizeof(v)); + + assert(bit_size <= 64); + if (bit_size < 64) { + assert(i >= (-(1ll << (bit_size - 1)))); + assert(i < (1ll << (bit_size - 1))); + } + + return nir_const_value_for_raw_uint(i, bit_size); +} + +static inline nir_const_value +nir_const_value_for_uint(uint64_t u, unsigned bit_size) +{ + nir_const_value v; + memset(&v, 0, sizeof(v)); + + assert(bit_size <= 64); + if (bit_size < 64) + assert(u < (1ull << bit_size)); + + return nir_const_value_for_raw_uint(u, bit_size); +} + +static inline nir_const_value +nir_const_value_for_bool(bool b, unsigned bit_size) +{ + /* Booleans use a 0/-1 convention */ + return nir_const_value_for_int(-(int)b, bit_size); +} + +/* This one isn't inline because it requires half-float conversion */ +nir_const_value nir_const_value_for_float(double b, unsigned bit_size); + +static inline int64_t +nir_const_value_as_int(nir_const_value value, unsigned bit_size) +{ + switch (bit_size) { + /* int1_t uses 0/-1 convention */ + case 1: return -(int)value.b; + case 8: return value.i8; + case 16: return value.i16; + case 32: return value.i32; + case 64: return value.i64; + default: + unreachable("Invalid bit size"); + } +} + +static inline int64_t +nir_const_value_as_uint(nir_const_value value, unsigned bit_size) +{ + switch (bit_size) { + case 1: return value.b; + case 8: return value.u8; + case 16: return value.u16; + case 32: return value.u32; + case 64: return value.u64; + default: + unreachable("Invalid bit size"); + } +} + +static inline bool +nir_const_value_as_bool(nir_const_value value, unsigned bit_size) +{ + int64_t i = nir_const_value_as_int(value, bit_size); + + /* Booleans of any size use 0/-1 convention */ + assert(i == 0 || i == -1); + + return i; +} + +/* This one isn't inline because it requires half-float conversion */ +double nir_const_value_as_float(nir_const_value value, unsigned bit_size); + typedef struct nir_constant { /** * Value of the constant. @@ -1281,6 +1381,10 @@ typedef enum { */ NIR_INTRINSIC_DESC_TYPE = 19, + /* Separate source/dest access flags for copies */ + NIR_INTRINSIC_SRC_ACCESS, + NIR_INTRINSIC_DST_ACCESS, + NIR_INTRINSIC_NUM_INDEX_FLAGS, } nir_intrinsic_index_flag; @@ -1381,6 +1485,8 @@ INTRINSIC_IDX_ACCESSORS(param_idx, PARAM_IDX, unsigned) INTRINSIC_IDX_ACCESSORS(image_dim, IMAGE_DIM, enum glsl_sampler_dim) INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool) INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier) +INTRINSIC_IDX_ACCESSORS(src_access, SRC_ACCESS, enum gl_access_qualifier) +INTRINSIC_IDX_ACCESSORS(dst_access, DST_ACCESS, enum gl_access_qualifier) INTRINSIC_IDX_ACCESSORS(format, FORMAT, unsigned) INTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned) INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned) @@ -1416,6 +1522,16 @@ nir_intrinsic_align(const nir_intrinsic_instr *intrin) void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr, nir_ssa_def *handle, bool bindless); +/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */ +static inline bool +nir_intrinsic_can_reorder(nir_intrinsic_instr *instr) +{ + const nir_intrinsic_info *info = + &nir_intrinsic_infos[instr->intrinsic]; + return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && + (info->flags & NIR_INTRINSIC_CAN_REORDER); +} + /** * \group texture information * @@ -1815,6 +1931,85 @@ NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, nir_parallel_copy_instr, instr, type, nir_instr_type_parallel_copy) +typedef struct { + nir_ssa_def *def; + unsigned comp; +} nir_ssa_scalar; + +static inline bool +nir_ssa_scalar_is_const(nir_ssa_scalar s) +{ + return s.def->parent_instr->type == nir_instr_type_load_const; +} + +static inline nir_const_value +nir_ssa_scalar_as_const_value(nir_ssa_scalar s) +{ + assert(s.comp < s.def->num_components); + nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr); + return load->value[s.comp]; +} + +#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \ +static inline type \ +nir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \ +{ \ + return nir_const_value_as_##suffix( \ + nir_ssa_scalar_as_const_value(s), s.def->bit_size); \ +} + +NIR_DEFINE_SCALAR_AS_CONST(int64_t, int) +NIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint) +NIR_DEFINE_SCALAR_AS_CONST(bool, bool) +NIR_DEFINE_SCALAR_AS_CONST(double, float) + +#undef NIR_DEFINE_SCALAR_AS_CONST + +static inline bool +nir_ssa_scalar_is_alu(nir_ssa_scalar s) +{ + return s.def->parent_instr->type == nir_instr_type_alu; +} + +static inline nir_op +nir_ssa_scalar_alu_op(nir_ssa_scalar s) +{ + return nir_instr_as_alu(s.def->parent_instr)->op; +} + +static inline nir_ssa_scalar +nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx) +{ + nir_ssa_scalar out = { NULL, 0 }; + + nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); + assert(alu_src_idx < nir_op_infos[alu->op].num_inputs); + + /* Our component must be written */ + assert(s.comp < s.def->num_components); + assert(alu->dest.write_mask & (1u << s.comp)); + + assert(alu->src[alu_src_idx].src.is_ssa); + out.def = alu->src[alu_src_idx].src.ssa; + + if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) { + /* The ALU src is unsized so the source component follows the + * destination component. + */ + out.comp = alu->src[alu_src_idx].swizzle[s.comp]; + } else { + /* This is a sized source so all source components work together to + * produce all the destination components. Since we need to return a + * scalar, this only works if the source is a scalar. + */ + assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1); + out.comp = alu->src[alu_src_idx].swizzle[0]; + } + assert(out.comp < out.def->num_components); + + return out; +} + /* * Control flow * @@ -2196,6 +2391,7 @@ typedef enum { nir_lower_minmax64 = (1 << 10), nir_lower_shift64 = (1 << 11), nir_lower_imul_2x32_64 = (1 << 12), + nir_lower_extract64 = (1 << 13), } nir_lower_int64_options; typedef enum { @@ -2785,6 +2981,7 @@ NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref) bool nir_src_is_dynamically_uniform(nir_src src); bool nir_srcs_equal(nir_src src1, nir_src src2); +bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2); void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); @@ -3487,6 +3684,9 @@ bool nir_lower_phis_to_regs_block(nir_block *block); bool nir_lower_ssa_defs_to_regs_block(nir_block *block); bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); +/* This is here for unit tests. */ +bool nir_opt_comparison_pre_impl(nir_function_impl *impl); + bool nir_opt_comparison_pre(nir_shader *shader); bool nir_opt_algebraic(nir_shader *shader); @@ -3535,6 +3735,7 @@ bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, bool indirect_load_ok, bool expensive_alu_ok); bool nir_opt_remove_phis(nir_shader *shader); +bool nir_opt_remove_phis_block(nir_block *block); bool nir_opt_shrink_load(nir_shader *shader); diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index ced009a66c7..f56e8beff28 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -1124,15 +1124,28 @@ nir_store_deref(nir_builder *build, nir_deref_instr *deref, } static inline void -nir_copy_deref(nir_builder *build, nir_deref_instr *dest, nir_deref_instr *src) +nir_copy_deref_with_access(nir_builder *build, nir_deref_instr *dest, + nir_deref_instr *src, + enum gl_access_qualifier dest_access, + enum gl_access_qualifier src_access) { nir_intrinsic_instr *copy = nir_intrinsic_instr_create(build->shader, nir_intrinsic_copy_deref); copy->src[0] = nir_src_for_ssa(&dest->dest.ssa); copy->src[1] = nir_src_for_ssa(&src->dest.ssa); + nir_intrinsic_set_dst_access(copy, dest_access); + nir_intrinsic_set_src_access(copy, src_access); nir_builder_instr_insert(build, ©->instr); } +static inline void +nir_copy_deref(nir_builder *build, nir_deref_instr *dest, nir_deref_instr *src) +{ + nir_copy_deref_with_access(build, dest, src, + (enum gl_access_qualifier) 0, + (enum gl_access_qualifier) 0); +} + static inline nir_ssa_def * nir_load_var(nir_builder *build, nir_variable *var) { diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c index eec10a1b847..f4000321575 100644 --- a/src/compiler/nir/nir_clone.c +++ b/src/compiler/nir/nir_clone.c @@ -151,9 +151,11 @@ nir_variable_clone(const nir_variable *var, nir_shader *shader) nvar->name = ralloc_strdup(nvar, var->name); nvar->data = var->data; nvar->num_state_slots = var->num_state_slots; - nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots); - memcpy(nvar->state_slots, var->state_slots, - var->num_state_slots * sizeof(nir_state_slot)); + if (var->num_state_slots) { + nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots); + memcpy(nvar->state_slots, var->state_slots, + var->num_state_slots * sizeof(nir_state_slot)); + } if (var->constant_initializer) { nvar->constant_initializer = nir_constant_clone(var->constant_initializer, nvar); diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py index f26fd0a3ea2..ed420c300f2 100644 --- a/src/compiler/nir/nir_constant_expressions.py +++ b/src/compiler/nir/nir_constant_expressions.py @@ -414,7 +414,8 @@ def get_const_field(type_): switch (op) { % for name in sorted(opcodes.keys()): case nir_op_${name}: - return evaluate_${name}(dest, num_components, bit_width, src); + evaluate_${name}(dest, num_components, bit_width, src); + return; % endfor default: unreachable("shouldn't get here"); diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c index f1e6eee7745..835c39cff93 100644 --- a/src/compiler/nir/nir_deref.c +++ b/src/compiler/nir/nir_deref.c @@ -124,17 +124,15 @@ nir_deref_instr_has_indirect(nir_deref_instr *instr) unsigned nir_deref_instr_ptr_as_array_stride(nir_deref_instr *deref) { - assert(deref->deref_type == nir_deref_type_ptr_as_array); - nir_deref_instr *parent = nir_deref_instr_parent(deref); - switch (parent->deref_type) { + switch (deref->deref_type) { case nir_deref_type_array: - return glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type); + return glsl_get_explicit_stride(nir_deref_instr_parent(deref)->type); case nir_deref_type_ptr_as_array: - return nir_deref_instr_ptr_as_array_stride(parent); + return nir_deref_instr_ptr_as_array_stride(nir_deref_instr_parent(deref)); case nir_deref_type_cast: - return parent->cast.ptr_stride; + return deref->cast.ptr_stride; default: - unreachable("Invalid parent for ptr_as_array deref"); + return 0; } } diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c index bd62bc974ed..e2a0b32cab0 100644 --- a/src/compiler/nir/nir_instr_set.c +++ b/src/compiler/nir/nir_instr_set.c @@ -25,6 +25,64 @@ #include "nir_vla.h" #include "util/half_float.h" +static bool +src_is_ssa(nir_src *src, void *data) +{ + (void) data; + return src->is_ssa; +} + +static bool +dest_is_ssa(nir_dest *dest, void *data) +{ + (void) data; + return dest->is_ssa; +} + +static inline bool +instr_each_src_and_dest_is_ssa(const nir_instr *instr) +{ + if (!nir_foreach_dest((nir_instr *)instr, dest_is_ssa, NULL) || + !nir_foreach_src((nir_instr *)instr, src_is_ssa, NULL)) + return false; + + return true; +} + +/* This function determines if uses of an instruction can safely be rewritten + * to use another identical instruction instead. Note that this function must + * be kept in sync with hash_instr() and nir_instrs_equal() -- only + * instructions that pass this test will be handed on to those functions, and + * conversely they must handle everything that this function returns true for. + */ +static bool +instr_can_rewrite(const nir_instr *instr) +{ + /* We only handle SSA. */ + assert(instr_each_src_and_dest_is_ssa(instr)); + + switch (instr->type) { + case nir_instr_type_alu: + case nir_instr_type_deref: + case nir_instr_type_tex: + case nir_instr_type_load_const: + case nir_instr_type_phi: + return true; + case nir_instr_type_intrinsic: + return nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr)); + case nir_instr_type_call: + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + return false; + case nir_instr_type_parallel_copy: + default: + unreachable("Invalid instruction type"); + } + + return false; +} + + #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data)) static uint32_t @@ -430,12 +488,16 @@ nir_alu_srcs_negative_equal(const nir_alu_instr *alu1, if (const2 == NULL) return false; + if (nir_src_bit_size(alu1->src[src1].src) != + nir_src_bit_size(alu2->src[src2].src)) + return false; + /* FINISHME: Apply the swizzle? */ return nir_const_value_negative_equal(const1, const2, nir_ssa_alu_instr_src_components(alu1, src1), nir_op_infos[alu1->op].input_types[src1], - alu1->dest.dest.ssa.bit_size); + nir_src_bit_size(alu1->src[src1].src)); } uint8_t alu1_swizzle[4] = {0}; @@ -503,9 +565,11 @@ nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, * the same hash for (ignoring collisions, of course). */ -static bool +bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) { + assert(instr_can_rewrite(instr1) && instr_can_rewrite(instr2)); + if (instr1->type != instr2->type) return false; @@ -701,68 +765,6 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) unreachable("All cases in the above switch should return"); } -static bool -src_is_ssa(nir_src *src, void *data) -{ - (void) data; - return src->is_ssa; -} - -static bool -dest_is_ssa(nir_dest *dest, void *data) -{ - (void) data; - return dest->is_ssa; -} - -static inline bool -instr_each_src_and_dest_is_ssa(nir_instr *instr) -{ - if (!nir_foreach_dest(instr, dest_is_ssa, NULL) || - !nir_foreach_src(instr, src_is_ssa, NULL)) - return false; - - return true; -} - -/* This function determines if uses of an instruction can safely be rewritten - * to use another identical instruction instead. Note that this function must - * be kept in sync with hash_instr() and nir_instrs_equal() -- only - * instructions that pass this test will be handed on to those functions, and - * conversely they must handle everything that this function returns true for. - */ - -static bool -instr_can_rewrite(nir_instr *instr) -{ - /* We only handle SSA. */ - assert(instr_each_src_and_dest_is_ssa(instr)); - - switch (instr->type) { - case nir_instr_type_alu: - case nir_instr_type_deref: - case nir_instr_type_tex: - case nir_instr_type_load_const: - case nir_instr_type_phi: - return true; - case nir_instr_type_intrinsic: { - const nir_intrinsic_info *info = - &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; - return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && - (info->flags & NIR_INTRINSIC_CAN_REORDER); - } - case nir_instr_type_call: - case nir_instr_type_jump: - case nir_instr_type_ssa_undef: - return false; - case nir_instr_type_parallel_copy: - default: - unreachable("Invalid instruction type"); - } - - return false; -} - static nir_ssa_def * nir_instr_get_dest_ssa_def(nir_instr *instr) { diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 3a0470c2ca1..a0c115ff84d 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -111,6 +111,8 @@ def __init__(self, name, src_components, dest_components, IMAGE_ARRAY = "NIR_INTRINSIC_IMAGE_ARRAY" # Access qualifiers for image and memory access intrinsics ACCESS = "NIR_INTRINSIC_ACCESS" +DST_ACCESS = "NIR_INTRINSIC_DST_ACCESS" +SRC_ACCESS = "NIR_INTRINSIC_SRC_ACCESS" # Image format for image intrinsics FORMAT = "NIR_INTRINSIC_FORMAT" # Offset or address alignment @@ -152,7 +154,7 @@ def intrinsic(name, src_comp=[], dest_comp=-1, indices=[], intrinsic("load_deref", dest_comp=0, src_comp=[-1], indices=[ACCESS], flags=[CAN_ELIMINATE]) intrinsic("store_deref", src_comp=[-1, 0], indices=[WRMASK, ACCESS]) -intrinsic("copy_deref", src_comp=[-1, -1]) +intrinsic("copy_deref", src_comp=[-1, -1], indices=[DST_ACCESS, SRC_ACCESS]) # Interpolation of input. The interp_deref_at* intrinsics are similar to the # load_var intrinsic acting on a shader input except that they interpolate the @@ -333,7 +335,8 @@ def atomic3(name): # either one or two additional scalar arguments with the same meaning as in # the ARB_shader_image_load_store specification. def image(name, src_comp=[], **kwargs): - intrinsic("image_deref_" + name, src_comp=[1] + src_comp, **kwargs) + intrinsic("image_deref_" + name, src_comp=[1] + src_comp, + indices=[ACCESS], **kwargs) intrinsic("image_" + name, src_comp=[1] + src_comp, indices=[IMAGE_DIM, IMAGE_ARRAY, FORMAT, ACCESS], **kwargs) intrinsic("bindless_image_" + name, src_comp=[1] + src_comp, diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index 0ae9533e007..d484c1439e3 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -32,7 +32,10 @@ typedef enum { basic_induction } nir_loop_variable_type; -struct nir_basic_induction_var; +typedef struct nir_basic_induction_var { + nir_alu_instr *alu; /* The def of the alu-operation */ + nir_ssa_def *def_outside_loop; /* The phi-src outside the loop */ +} nir_basic_induction_var; typedef struct { /* A link for the work list */ @@ -57,13 +60,6 @@ typedef struct { } nir_loop_variable; -typedef struct nir_basic_induction_var { - nir_op alu_op; /* The type of alu-operation */ - nir_loop_variable *alu_def; /* The def of the alu-operation */ - nir_loop_variable *invariant; /* The invariant alu-operand */ - nir_loop_variable *def_outside_loop; /* The phi-src outside the loop */ -} nir_basic_induction_var; - typedef struct { /* The loop we store information for */ nir_loop *loop; @@ -274,6 +270,44 @@ compute_invariance_information(loop_info_state *state) } } +/* If all of the instruction sources point to identical ALU instructions (as + * per nir_instrs_equal), return one of the ALU instructions. Otherwise, + * return NULL. + */ +static nir_alu_instr * +phi_instr_as_alu(nir_phi_instr *phi) +{ + nir_alu_instr *first = NULL; + nir_foreach_phi_src(src, phi) { + assert(src->src.is_ssa); + if (src->src.ssa->parent_instr->type != nir_instr_type_alu) + return NULL; + + nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr); + if (first == NULL) { + first = alu; + } else { + if (!nir_instrs_equal(&first->instr, &alu->instr)) + return NULL; + } + } + + return first; +} + +static bool +alu_src_has_identity_swizzle(nir_alu_instr *alu, unsigned src_idx) +{ + assert(nir_op_infos[alu->op].input_sizes[src_idx] == 0); + assert(alu->dest.dest.is_ssa); + for (unsigned i = 0; i < alu->dest.dest.ssa.num_components; i++) { + if (alu->src[src_idx].swizzle[i] != i) + return false; + } + + return true; +} + static bool compute_induction_information(loop_info_state *state) { @@ -298,6 +332,7 @@ compute_induction_information(loop_info_state *state) nir_phi_instr *phi = nir_instr_as_phi(var->def->parent_instr); nir_basic_induction_var *biv = rzalloc(state, nir_basic_induction_var); + nir_loop_variable *alu_src_var = NULL; nir_foreach_phi_src(src, phi) { nir_loop_variable *src_var = get_loop_var(src->src.ssa, state); @@ -313,60 +348,44 @@ compute_induction_information(loop_info_state *state) if (is_var_phi(src_var)) { nir_phi_instr *src_phi = nir_instr_as_phi(src_var->def->parent_instr); - - nir_op alu_op = nir_num_opcodes; /* avoid uninitialized warning */ - nir_ssa_def *alu_srcs[2] = {0}; - nir_foreach_phi_src(src2, src_phi) { - nir_loop_variable *src_var2 = - get_loop_var(src2->src.ssa, state); - - if (!src_var2->in_if_branch || !is_var_alu(src_var2)) + nir_alu_instr *src_phi_alu = phi_instr_as_alu(src_phi); + if (src_phi_alu) { + src_var = get_loop_var(&src_phi_alu->dest.dest.ssa, state); + if (!src_var->in_if_branch) break; - - nir_alu_instr *alu = - nir_instr_as_alu(src_var2->def->parent_instr); - if (nir_op_infos[alu->op].num_inputs != 2) - break; - - if (alu->src[0].src.ssa == alu_srcs[0] && - alu->src[1].src.ssa == alu_srcs[1] && - alu->op == alu_op) { - /* Both branches perform the same calculation so we can use - * one of them to find the induction variable. - */ - src_var = src_var2; - } else { - alu_srcs[0] = alu->src[0].src.ssa; - alu_srcs[1] = alu->src[1].src.ssa; - alu_op = alu->op; - } } } - if (!src_var->in_loop) { - biv->def_outside_loop = src_var; - } else if (is_var_alu(src_var)) { + if (!src_var->in_loop && !biv->def_outside_loop) { + biv->def_outside_loop = src_var->def; + } else if (is_var_alu(src_var) && !biv->alu) { + alu_src_var = src_var; nir_alu_instr *alu = nir_instr_as_alu(src_var->def->parent_instr); if (nir_op_infos[alu->op].num_inputs == 2) { - biv->alu_def = src_var; - biv->alu_op = alu->op; - for (unsigned i = 0; i < 2; i++) { - /* Is one of the operands const, and the other the phi */ - if (alu->src[i].src.ssa->parent_instr->type == nir_instr_type_load_const && - alu->src[1-i].src.ssa == &phi->dest.ssa) - biv->invariant = get_loop_var(alu->src[i].src.ssa, state); + /* Is one of the operands const, and the other the phi. The + * phi source can't be swizzled in any way. + */ + if (nir_src_is_const(alu->src[i].src) && + alu->src[1-i].src.ssa == &phi->dest.ssa && + alu_src_has_identity_swizzle(alu, 1 - i)) + biv->alu = alu; } } + + if (!biv->alu) + break; + } else { + biv->alu = NULL; + break; } } - if (biv->alu_def && biv->def_outside_loop && biv->invariant && - is_var_constant(biv->def_outside_loop)) { - assert(is_var_constant(biv->invariant)); - biv->alu_def->type = basic_induction; - biv->alu_def->ind = biv; + if (biv->alu && biv->def_outside_loop && + biv->def_outside_loop->parent_instr->type == nir_instr_type_load_const) { + alu_src_var->type = basic_induction; + alu_src_var->ind = biv; var->type = basic_induction; var->ind = biv; @@ -493,7 +512,7 @@ find_array_access_via_induction(loop_info_state *state, static bool guess_loop_limit(loop_info_state *state, nir_const_value *limit_val, - nir_loop_variable *basic_ind) + nir_ssa_scalar basic_ind) { unsigned min_array_size = 0; @@ -514,8 +533,10 @@ guess_loop_limit(loop_info_state *state, nir_const_value *limit_val, find_array_access_via_induction(state, nir_src_as_deref(intrin->src[0]), &array_idx); - if (basic_ind == array_idx && + if (array_idx && basic_ind.def == array_idx->def && (min_array_size == 0 || min_array_size > array_size)) { + /* Array indices are scalars */ + assert(basic_ind.def->num_components == 1); min_array_size = array_size; } @@ -526,8 +547,10 @@ guess_loop_limit(loop_info_state *state, nir_const_value *limit_val, find_array_access_via_induction(state, nir_src_as_deref(intrin->src[1]), &array_idx); - if (basic_ind == array_idx && + if (array_idx && basic_ind.def == array_idx->def && (min_array_size == 0 || min_array_size > array_size)) { + /* Array indices are scalars */ + assert(basic_ind.def->num_components == 1); min_array_size = array_size; } } @@ -535,7 +558,8 @@ guess_loop_limit(loop_info_state *state, nir_const_value *limit_val, } if (min_array_size) { - limit_val->i32 = min_array_size; + *limit_val = nir_const_value_for_uint(min_array_size, + basic_ind.def->bit_size); return true; } @@ -543,71 +567,84 @@ guess_loop_limit(loop_info_state *state, nir_const_value *limit_val, } static bool -try_find_limit_of_alu(nir_loop_variable *limit, nir_const_value *limit_val, +try_find_limit_of_alu(nir_ssa_scalar limit, nir_const_value *limit_val, nir_loop_terminator *terminator, loop_info_state *state) { - if(!is_var_alu(limit)) + if (!nir_ssa_scalar_is_alu(limit)) return false; - nir_alu_instr *limit_alu = nir_instr_as_alu(limit->def->parent_instr); - - if (limit_alu->op == nir_op_imin || - limit_alu->op == nir_op_fmin) { - limit = get_loop_var(limit_alu->src[0].src.ssa, state); - - if (!is_var_constant(limit)) - limit = get_loop_var(limit_alu->src[1].src.ssa, state); - - if (!is_var_constant(limit)) - return false; - - *limit_val = nir_instr_as_load_const(limit->def->parent_instr)->value[0]; - - terminator->exact_trip_count_unknown = true; - - return true; + nir_op limit_op = nir_ssa_scalar_alu_op(limit); + if (limit_op == nir_op_imin || limit_op == nir_op_fmin) { + for (unsigned i = 0; i < 2; i++) { + nir_ssa_scalar src = nir_ssa_scalar_chase_alu_src(limit, i); + if (nir_ssa_scalar_is_const(src)) { + *limit_val = nir_ssa_scalar_as_const_value(src); + terminator->exact_trip_count_unknown = true; + return true; + } + } } return false; } +static nir_const_value +eval_const_unop(nir_op op, unsigned bit_size, nir_const_value src0) +{ + assert(nir_op_infos[op].num_inputs == 1); + nir_const_value dest; + nir_const_value *src[1] = { &src0 }; + nir_eval_const_opcode(op, &dest, 1, bit_size, src); + return dest; +} + +static nir_const_value +eval_const_binop(nir_op op, unsigned bit_size, + nir_const_value src0, nir_const_value src1) +{ + assert(nir_op_infos[op].num_inputs == 2); + nir_const_value dest; + nir_const_value *src[2] = { &src0, &src1 }; + nir_eval_const_opcode(op, &dest, 1, bit_size, src); + return dest; +} + static int32_t -get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step, - nir_const_value *limit) +get_iteration(nir_op cond_op, nir_const_value initial, nir_const_value step, + nir_const_value limit, unsigned bit_size) { - int32_t iter; + nir_const_value span, iter; switch (cond_op) { case nir_op_ige: case nir_op_ilt: case nir_op_ieq: - case nir_op_ine: { - int32_t initial_val = initial->i32; - int32_t span = limit->i32 - initial_val; - iter = span / step->i32; + case nir_op_ine: + span = eval_const_binop(nir_op_isub, bit_size, limit, initial); + iter = eval_const_binop(nir_op_idiv, bit_size, span, step); break; - } + case nir_op_uge: - case nir_op_ult: { - uint32_t initial_val = initial->u32; - uint32_t span = limit->u32 - initial_val; - iter = span / step->u32; + case nir_op_ult: + span = eval_const_binop(nir_op_isub, bit_size, limit, initial); + iter = eval_const_binop(nir_op_udiv, bit_size, span, step); break; - } + case nir_op_fge: case nir_op_flt: case nir_op_feq: - case nir_op_fne: { - float initial_val = initial->f32; - float span = limit->f32 - initial_val; - iter = span / step->f32; + case nir_op_fne: + span = eval_const_binop(nir_op_fsub, bit_size, limit, initial); + iter = eval_const_binop(nir_op_fdiv, bit_size, span, step); + iter = eval_const_unop(nir_op_f2i64, bit_size, iter); break; - } + default: return -1; } - return iter; + uint64_t iter_u64 = nir_const_value_as_uint(iter, bit_size); + return iter_u64 > INT_MAX ? -1 : (int)iter_u64; } static bool @@ -618,18 +655,18 @@ test_iterations(int32_t iter_int, nir_const_value *step, { assert(nir_op_infos[cond_op].num_inputs == 2); - nir_const_value iter_src = {0, }; + nir_const_value iter_src; nir_op mul_op; nir_op add_op; switch (induction_base_type) { case nir_type_float: - iter_src.f32 = (float) iter_int; + iter_src = nir_const_value_for_float(iter_int, bit_size); mul_op = nir_op_fmul; add_op = nir_op_fadd; break; case nir_type_int: case nir_type_uint: - iter_src.i32 = iter_int; + iter_src = nir_const_value_for_int(iter_int, bit_size); mul_op = nir_op_imul; add_op = nir_op_iadd; break; @@ -662,14 +699,12 @@ test_iterations(int32_t iter_int, nir_const_value *step, static int calculate_iterations(nir_const_value *initial, nir_const_value *step, - nir_const_value *limit, nir_loop_variable *alu_def, - nir_alu_instr *cond_alu, nir_op alu_op, bool limit_rhs, + nir_const_value *limit, nir_alu_instr *alu, + nir_ssa_scalar cond, nir_op alu_op, bool limit_rhs, bool invert_cond) { assert(initial != NULL && step != NULL && limit != NULL); - nir_alu_instr *alu = nir_instr_as_alu(alu_def->def->parent_instr); - /* nir_op_isub should have been lowered away by this point */ assert(alu->op != nir_op_isub); @@ -701,12 +736,16 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step, * condition and if so we assume we need to step the initial value. */ unsigned trip_offset = 0; - if (cond_alu->src[0].src.ssa == alu_def->def || - cond_alu->src[1].src.ssa == alu_def->def) { + nir_alu_instr *cond_alu = nir_instr_as_alu(cond.def->parent_instr); + if (cond_alu->src[0].src.ssa == &alu->dest.dest.ssa || + cond_alu->src[1].src.ssa == &alu->dest.dest.ssa) { trip_offset = 1; } - int iter_int = get_iteration(alu_op, initial, step, limit); + assert(nir_src_bit_size(alu->src[0].src) == + nir_src_bit_size(alu->src[1].src)); + unsigned bit_size = nir_src_bit_size(alu->src[0].src); + int iter_int = get_iteration(alu_op, *initial, *step, *limit, bit_size); /* If iter_int is negative the loop is ill-formed or is the conditional is * unsigned with a huge iteration count so don't bother going any further. @@ -723,9 +762,6 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step, * * for (float x = 0.0; x != 0.9; x += 0.2); */ - assert(nir_src_bit_size(alu->src[0].src) == - nir_src_bit_size(alu->src[1].src)); - unsigned bit_size = nir_src_bit_size(alu->src[0].src); for (int bias = -1; bias <= 1; bias++) { const int iter_bias = iter_int + bias; @@ -740,9 +776,9 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step, } static nir_op -inverse_comparison(nir_alu_instr *alu) +inverse_comparison(nir_op alu_op) { - switch (alu->op) { + switch (alu_op) { case nir_op_fge: return nir_op_flt; case nir_op_ige: @@ -769,95 +805,97 @@ inverse_comparison(nir_alu_instr *alu) } static bool -is_supported_terminator_condition(nir_alu_instr *alu) +is_supported_terminator_condition(nir_ssa_scalar cond) { + if (!nir_ssa_scalar_is_alu(cond)) + return false; + + nir_alu_instr *alu = nir_instr_as_alu(cond.def->parent_instr); return nir_alu_instr_is_comparison(alu) && nir_op_infos[alu->op].num_inputs == 2; } static bool -get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind, - nir_loop_variable **limit, +get_induction_and_limit_vars(nir_ssa_scalar cond, + nir_ssa_scalar *ind, + nir_ssa_scalar *limit, + bool *limit_rhs, loop_info_state *state) { - bool limit_rhs = true; - - /* We assume that the limit is the "right" operand */ - *ind = get_loop_var(alu->src[0].src.ssa, state); - *limit = get_loop_var(alu->src[1].src.ssa, state); - - if ((*ind)->type != basic_induction) { - /* We had it the wrong way, flip things around */ - *ind = get_loop_var(alu->src[1].src.ssa, state); - *limit = get_loop_var(alu->src[0].src.ssa, state); - limit_rhs = false; + nir_ssa_scalar rhs, lhs; + lhs = nir_ssa_scalar_chase_alu_src(cond, 0); + rhs = nir_ssa_scalar_chase_alu_src(cond, 1); + + if (get_loop_var(lhs.def, state)->type == basic_induction) { + *ind = lhs; + *limit = rhs; + *limit_rhs = true; + return true; + } else if (get_loop_var(rhs.def, state)->type == basic_induction) { + *ind = rhs; + *limit = lhs; + *limit_rhs = false; + return true; + } else { + return false; } - - return limit_rhs; } -static void -try_find_trip_count_vars_in_iand(nir_alu_instr **alu, - nir_loop_variable **ind, - nir_loop_variable **limit, +static bool +try_find_trip_count_vars_in_iand(nir_ssa_scalar *cond, + nir_ssa_scalar *ind, + nir_ssa_scalar *limit, bool *limit_rhs, loop_info_state *state) { - assert((*alu)->op == nir_op_ieq || (*alu)->op == nir_op_inot); - - nir_ssa_def *iand_def = (*alu)->src[0].src.ssa; + const nir_op alu_op = nir_ssa_scalar_alu_op(*cond); + assert(alu_op == nir_op_ieq || alu_op == nir_op_inot); - if ((*alu)->op == nir_op_ieq) { - nir_ssa_def *zero_def = (*alu)->src[1].src.ssa; + nir_ssa_scalar iand = nir_ssa_scalar_chase_alu_src(*cond, 0); - if (iand_def->parent_instr->type != nir_instr_type_alu || - zero_def->parent_instr->type != nir_instr_type_load_const) { + if (alu_op == nir_op_ieq) { + nir_ssa_scalar zero = nir_ssa_scalar_chase_alu_src(*cond, 1); + if (!nir_ssa_scalar_is_alu(iand) || !nir_ssa_scalar_is_const(zero)) { /* Maybe we had it the wrong way, flip things around */ - iand_def = (*alu)->src[1].src.ssa; - zero_def = (*alu)->src[0].src.ssa; + nir_ssa_scalar tmp = zero; + zero = iand; + iand = tmp; /* If we still didn't find what we need then return */ - if (zero_def->parent_instr->type != nir_instr_type_load_const) - return; + if (!nir_ssa_scalar_is_const(zero)) + return false; } /* If the loop is not breaking on (x && y) == 0 then return */ - nir_const_value *zero = - nir_instr_as_load_const(zero_def->parent_instr)->value; - if (zero[0].i32 != 0) - return; + if (nir_ssa_scalar_as_uint(zero) != 0) + return false; } - if (iand_def->parent_instr->type != nir_instr_type_alu) - return; + if (!nir_ssa_scalar_is_alu(iand)) + return false; - nir_alu_instr *iand = nir_instr_as_alu(iand_def->parent_instr); - if (iand->op != nir_op_iand) - return; + if (nir_ssa_scalar_alu_op(iand) != nir_op_iand) + return false; /* Check if iand src is a terminator condition and try get induction var * and trip limit var. */ - nir_ssa_def *src = iand->src[0].src.ssa; - if (src->parent_instr->type == nir_instr_type_alu) { - *alu = nir_instr_as_alu(src->parent_instr); - if (is_supported_terminator_condition(*alu)) - *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state); - } + bool found_induction_var = false; + for (unsigned i = 0; i < 2; i++) { + nir_ssa_scalar src = nir_ssa_scalar_chase_alu_src(iand, i); + if (is_supported_terminator_condition(src) && + get_induction_and_limit_vars(src, ind, limit, limit_rhs, state)) { + *cond = src; + found_induction_var = true; - /* Try the other iand src if needed */ - if (*ind == NULL || (*ind && (*ind)->type != basic_induction) || - !is_var_constant(*limit)) { - src = iand->src[1].src.ssa; - if (src->parent_instr->type == nir_instr_type_alu) { - nir_alu_instr *tmp_alu = nir_instr_as_alu(src->parent_instr); - if (is_supported_terminator_condition(tmp_alu)) { - *alu = tmp_alu; - *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state); - } + /* If we've found one with a constant limit, stop. */ + if (nir_ssa_scalar_is_const(*limit)) + return true; } } + + return found_induction_var; } /* Run through each of the terminators of the loop and try to infer a possible @@ -877,8 +915,10 @@ find_trip_count(loop_info_state *state) list_for_each_entry(nir_loop_terminator, terminator, &state->loop->info->loop_terminator_list, loop_terminator_link) { + assert(terminator->nif->condition.is_ssa); + nir_ssa_scalar cond = { terminator->nif->condition.ssa, 0 }; - if (terminator->conditional_instr->type != nir_instr_type_alu) { + if (!nir_ssa_scalar_is_alu(cond)) { /* If we get here the loop is dead and will get cleaned up by the * nir_opt_dead_cf pass. */ @@ -886,43 +926,35 @@ find_trip_count(loop_info_state *state) continue; } - nir_alu_instr *alu = nir_instr_as_alu(terminator->conditional_instr); - nir_op alu_op = alu->op; + nir_op alu_op = nir_ssa_scalar_alu_op(cond); bool limit_rhs; - nir_loop_variable *basic_ind = NULL; - nir_loop_variable *limit; - if (alu->op == nir_op_inot || alu->op == nir_op_ieq) { - nir_alu_instr *new_alu = alu; - try_find_trip_count_vars_in_iand(&new_alu, &basic_ind, &limit, - &limit_rhs, state); + nir_ssa_scalar basic_ind = { NULL, 0 }; + nir_ssa_scalar limit; + if ((alu_op == nir_op_inot || alu_op == nir_op_ieq) && + try_find_trip_count_vars_in_iand(&cond, &basic_ind, &limit, + &limit_rhs, state)) { /* The loop is exiting on (x && y) == 0 so we need to get the * inverse of x or y (i.e. which ever contained the induction var) in * order to compute the trip count. */ - if (basic_ind && basic_ind->type == basic_induction) { - alu = new_alu; - alu_op = inverse_comparison(alu); - trip_count_known = false; - terminator->exact_trip_count_unknown = true; - } + alu_op = inverse_comparison(nir_ssa_scalar_alu_op(cond)); + trip_count_known = false; + terminator->exact_trip_count_unknown = true; } - if (!basic_ind) { - if (!is_supported_terminator_condition(alu)) { - trip_count_known = false; - continue; + if (!basic_ind.def) { + if (is_supported_terminator_condition(cond)) { + get_induction_and_limit_vars(cond, &basic_ind, + &limit, &limit_rhs, state); } - - limit_rhs = get_induction_and_limit_vars(alu, &basic_ind, &limit, - state); } /* The comparison has to have a basic induction variable for us to be * able to find trip counts. */ - if (basic_ind->type != basic_induction) { + if (!basic_ind.def) { trip_count_known = false; continue; } @@ -931,9 +963,8 @@ find_trip_count(loop_info_state *state) /* Attempt to find a constant limit for the loop */ nir_const_value limit_val; - if (is_var_constant(limit)) { - limit_val = - nir_instr_as_load_const(limit->def->parent_instr)->value[0]; + if (nir_ssa_scalar_is_const(limit)) { + limit_val = nir_ssa_scalar_as_const_value(limit); } else { trip_count_known = false; @@ -955,17 +986,38 @@ find_trip_count(loop_info_state *state) * Thats all thats needed to calculate the trip-count */ - nir_const_value *initial_val = - nir_instr_as_load_const(basic_ind->ind->def_outside_loop-> - def->parent_instr)->value; + nir_basic_induction_var *ind_var = + get_loop_var(basic_ind.def, state)->ind; + + /* The basic induction var might be a vector but, because we guarantee + * earlier that the phi source has a scalar swizzle, we can take the + * component from basic_ind. + */ + nir_ssa_scalar initial_s = { ind_var->def_outside_loop, basic_ind.comp }; + nir_ssa_scalar alu_s = { &ind_var->alu->dest.dest.ssa, basic_ind.comp }; + + nir_const_value initial_val = nir_ssa_scalar_as_const_value(initial_s); - nir_const_value *step_val = - nir_instr_as_load_const(basic_ind->ind->invariant->def-> - parent_instr)->value; + /* We are guaranteed by earlier code that at least one of these sources + * is a constant but we don't know which. + */ + nir_const_value step_val; + memset(&step_val, 0, sizeof(step_val)); + UNUSED bool found_step_value = false; + assert(nir_op_infos[ind_var->alu->op].num_inputs == 2); + for (unsigned i = 0; i < 2; i++) { + nir_ssa_scalar alu_src = nir_ssa_scalar_chase_alu_src(alu_s, i); + if (nir_ssa_scalar_is_const(alu_src)) { + found_step_value = true; + step_val = nir_ssa_scalar_as_const_value(alu_src); + break; + } + } + assert(found_step_value); - int iterations = calculate_iterations(initial_val, step_val, + int iterations = calculate_iterations(&initial_val, &step_val, &limit_val, - basic_ind->ind->alu_def, alu, + ind_var->alu, cond, alu_op, limit_rhs, terminator->continue_from_then); diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c index b3b78c6649a..84ec2a77f1e 100644 --- a/src/compiler/nir/nir_lower_int64.c +++ b/src/compiler/nir/nir_lower_int64.c @@ -629,6 +629,34 @@ lower_irem64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d) return nir_bcsel(b, n_is_neg, nir_ineg(b, r), r); } +static nir_ssa_def * +lower_extract(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *c) +{ + assert(op == nir_op_extract_u8 || op == nir_op_extract_i8 || + op == nir_op_extract_u16 || op == nir_op_extract_i16); + + const int chunk = nir_src_as_uint(nir_src_for_ssa(c)); + const int chunk_bits = + (op == nir_op_extract_u8 || op == nir_op_extract_i8) ? 8 : 16; + const int num_chunks_in_32 = 32 / chunk_bits; + + nir_ssa_def *extract32; + if (chunk < num_chunks_in_32) { + extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_x(b, x), + nir_imm_int(b, chunk), + NULL, NULL); + } else { + extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_y(b, x), + nir_imm_int(b, chunk - num_chunks_in_32), + NULL, NULL); + } + + if (op == nir_op_extract_i8 || op == nir_op_extract_i16) + return lower_i2i64(b, extract32); + else + return lower_u2u64(b, extract32); +} + nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode) { @@ -685,6 +713,11 @@ nir_lower_int64_op_to_options_mask(nir_op opcode) case nir_op_ishr: case nir_op_ushr: return nir_lower_shift64; + case nir_op_extract_u8: + case nir_op_extract_i8: + case nir_op_extract_u16: + case nir_op_extract_i16: + return nir_lower_extract64; default: return 0; } @@ -779,6 +812,11 @@ lower_int64_alu_instr(nir_builder *b, nir_alu_instr *alu) return lower_ishr64(b, src[0], src[1]); case nir_op_ushr: return lower_ushr64(b, src[0], src[1]); + case nir_op_extract_u8: + case nir_op_extract_i8: + case nir_op_extract_u16: + case nir_op_extract_i16: + return lower_extract(b, alu->op, src[0], src[1]); default: unreachable("Invalid ALU opcode to lower"); } diff --git a/src/compiler/nir/nir_lower_non_uniform_access.c b/src/compiler/nir/nir_lower_non_uniform_access.c index 6aa11f9bebe..0ab32100ef2 100644 --- a/src/compiler/nir/nir_lower_non_uniform_access.c +++ b/src/compiler/nir/nir_lower_non_uniform_access.c @@ -34,6 +34,7 @@ read_first_invocation(nir_builder *b, nir_ssa_def *x) first->src[0] = nir_src_for_ssa(x); nir_ssa_dest_init(&first->instr, &first->dest, x->num_components, x->bit_size, NULL); + nir_builder_instr_insert(b, &first->instr); return &first->dest.ssa; } @@ -128,8 +129,8 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl, nir_builder b; nir_builder_init(&b, impl); - nir_foreach_block(block, impl) { - nir_foreach_instr(instr, block) { + nir_foreach_block_safe(block, impl) { + nir_foreach_instr_safe(instr, block) { switch (instr->type) { case nir_instr_type_tex: { nir_tex_instr *tex = nir_instr_as_tex(instr); diff --git a/src/compiler/nir/nir_lower_regs_to_ssa.c b/src/compiler/nir/nir_lower_regs_to_ssa.c index 0db11ff1d1c..76ed1287379 100644 --- a/src/compiler/nir/nir_lower_regs_to_ssa.c +++ b/src/compiler/nir/nir_lower_regs_to_ssa.c @@ -251,9 +251,17 @@ nir_lower_regs_to_ssa_impl(nir_function_impl *impl) nir_foreach_block(block, impl) { nir_foreach_instr(instr, block) { - if (instr->type == nir_instr_type_alu) { + switch (instr->type) { + case nir_instr_type_alu: rewrite_alu_instr(nir_instr_as_alu(instr), &state); - } else { + break; + + case nir_instr_type_phi: + /* We rewrite sources as a separate pass */ + nir_foreach_dest(instr, rewrite_dest, &state); + break; + + default: nir_foreach_src(instr, rewrite_src, &state); nir_foreach_dest(instr, rewrite_dest, &state); } @@ -262,6 +270,28 @@ nir_lower_regs_to_ssa_impl(nir_function_impl *impl) nir_if *following_if = nir_block_get_following_if(block); if (following_if) rewrite_if_condition(following_if, &state); + + /* Handle phi sources that source from this block. We have to do this + * as a separate pass because the phi builder assumes that uses and + * defs are processed in an order that respects dominance. When we have + * loops, a phi source may be a back-edge so we have to handle it as if + * it were one of the last instructions in the predecessor block. + */ + for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) { + if (block->successors[i] == NULL) + continue; + + nir_foreach_instr(instr, block->successors[i]) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_foreach_phi_src(phi_src, phi) { + if (phi_src->pred == block) + rewrite_src(&phi_src->src, &state); + } + } + } } nir_phi_builder_finish(phi_build); diff --git a/src/compiler/nir/nir_lower_var_copies.c b/src/compiler/nir/nir_lower_var_copies.c index 0ba398698f0..e6ade733eba 100644 --- a/src/compiler/nir/nir_lower_var_copies.c +++ b/src/compiler/nir/nir_lower_var_copies.c @@ -56,7 +56,9 @@ emit_deref_copy_load_store(nir_builder *b, nir_deref_instr *dst_deref, nir_deref_instr **dst_deref_arr, nir_deref_instr *src_deref, - nir_deref_instr **src_deref_arr) + nir_deref_instr **src_deref_arr, + enum gl_access_qualifier dst_access, + enum gl_access_qualifier src_access) { if (dst_deref_arr || src_deref_arr) { assert(dst_deref_arr && src_deref_arr); @@ -79,14 +81,16 @@ emit_deref_copy_load_store(nir_builder *b, nir_build_deref_array_imm(b, dst_deref, i), dst_deref_arr + 1, nir_build_deref_array_imm(b, src_deref, i), - src_deref_arr + 1); + src_deref_arr + 1, dst_access, src_access); } } else { assert(glsl_get_bare_type(dst_deref->type) == glsl_get_bare_type(src_deref->type)); assert(glsl_type_is_vector_or_scalar(dst_deref->type)); - nir_store_deref(b, dst_deref, nir_load_deref(b, src_deref), ~0); + nir_store_deref_with_access(b, dst_deref, + nir_load_deref_with_access(b, src_deref, src_access), + ~0, src_access); } } @@ -106,7 +110,9 @@ nir_lower_deref_copy_instr(nir_builder *b, nir_intrinsic_instr *copy) b->cursor = nir_before_instr(©->instr); emit_deref_copy_load_store(b, dst_path.path[0], &dst_path.path[1], - src_path.path[0], &src_path.path[1]); + src_path.path[0], &src_path.path[1], + nir_intrinsic_dst_access(copy), + nir_intrinsic_src_access(copy)); nir_deref_path_finish(&dst_path); nir_deref_path_finish(&src_path); diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 55e46b04466..ce1298ccab1 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -985,7 +985,7 @@ def bitfield_reverse(u): return step5 -optimizations += [(bitfield_reverse('x@32'), ('bitfield_reverse', 'x'))] +optimizations += [(bitfield_reverse('x@32'), ('bitfield_reverse', 'x'), '!options->lower_bitfield_reverse')] # For any float comparison operation, "cmp", if you have "a == a && a cmp b" # then the "a == a" is redundant because it's equivalent to "a is not NaN" @@ -1086,9 +1086,6 @@ def bitfield_reverse(u): (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'), (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'), - (('b2f(is_used_more_than_once)', ('inot', 'a@1')), ('bcsel', a, 0.0, 1.0)), - (('fneg(is_used_more_than_once)', ('b2f', ('inot', 'a@1'))), ('bcsel', a, -0.0, -1.0)), - # we do these late so that we don't get in the way of creating ffmas (('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))), (('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))), diff --git a/src/compiler/nir/nir_opt_comparison_pre.c b/src/compiler/nir/nir_opt_comparison_pre.c index eee496251a7..a7a227ce371 100644 --- a/src/compiler/nir/nir_opt_comparison_pre.c +++ b/src/compiler/nir/nir_opt_comparison_pre.c @@ -346,7 +346,7 @@ comparison_pre_block(nir_block *block, struct block_queue *bq, nir_builder *bld) return progress; } -static bool +bool nir_opt_comparison_pre_impl(nir_function_impl *impl) { struct block_queue bq; diff --git a/src/compiler/nir/nir_opt_dead_cf.c b/src/compiler/nir/nir_opt_dead_cf.c index b0e9723d36c..33f3565c564 100644 --- a/src/compiler/nir/nir_opt_dead_cf.c +++ b/src/compiler/nir/nir_opt_dead_cf.c @@ -216,7 +216,7 @@ node_is_dead(nir_cf_node *node) nir_foreach_instr(instr, block) { if (instr->type == nir_instr_type_call) - return true; + return false; /* Return instructions can cause us to skip over other side-effecting * instructions after the loop, so consider them to have side effects diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c index e7d3f8ec424..aeae2ad6401 100644 --- a/src/compiler/nir/nir_opt_gcm.c +++ b/src/compiler/nir/nir_opt_gcm.c @@ -152,11 +152,7 @@ gcm_pin_instructions_block(nir_block *block, struct gcm_state *state) break; case nir_instr_type_intrinsic: { - const nir_intrinsic_info *info = - &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; - - if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && - (info->flags & NIR_INTRINSIC_CAN_REORDER)) { + if (nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr))) { instr->pass_flags = 0; } else { instr->pass_flags = GCM_INSTR_PINNED; diff --git a/src/compiler/nir/nir_opt_idiv_const.c b/src/compiler/nir/nir_opt_idiv_const.c index 8f70821ca2f..688186779e6 100644 --- a/src/compiler/nir/nir_opt_idiv_const.c +++ b/src/compiler/nir/nir_opt_idiv_const.c @@ -65,15 +65,17 @@ build_umod(nir_builder *b, nir_ssa_def *n, uint64_t d) static nir_ssa_def * build_idiv(nir_builder *b, nir_ssa_def *n, int64_t d) { + uint64_t abs_d = d < 0 ? -d : d; + if (d == 0) { return nir_imm_intN_t(b, 0, n->bit_size); } else if (d == 1) { return n; } else if (d == -1) { return nir_ineg(b, n); - } else if (util_is_power_of_two_or_zero64(d)) { - uint64_t abs_d = d < 0 ? -d : d; - nir_ssa_def *uq = nir_ishr(b, n, nir_imm_int(b, util_logbase2_64(abs_d))); + } else if (util_is_power_of_two_or_zero64(abs_d)) { + nir_ssa_def *uq = nir_ushr(b, nir_iabs(b, n), + nir_imm_int(b, util_logbase2_64(abs_d))); nir_ssa_def *n_neg = nir_ilt(b, n, nir_imm_intN_t(b, 0, n->bit_size)); nir_ssa_def *neg = d < 0 ? nir_inot(b, n_neg) : n_neg; return nir_bcsel(b, neg, nir_ineg(b, uq), uq); diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index f674185f1e2..912580be840 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -1040,6 +1040,13 @@ opt_if_loop_terminator(nir_if *nif) if (!nir_is_trivial_loop_if(nif, break_blk)) return false; + /* Even though this if statement has a jump on one side, we may still have + * phis afterwards. Single-source phis can be produced by loop unrolling + * or dead control-flow passes and are perfectly legal. Run a quick phi + * removal on the block after the if to clean up any such phis. + */ + nir_opt_remove_phis_block(nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node))); + /* Finally, move the continue from branch after the if-statement. */ nir_cf_list tmp; nir_cf_extract(&tmp, nir_before_block(first_continue_from_blk), diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c index c0198390749..9e697fc4b45 100644 --- a/src/compiler/nir/nir_opt_loop_unroll.c +++ b/src/compiler/nir/nir_opt_loop_unroll.c @@ -560,31 +560,7 @@ wrapper_unroll(nir_loop *loop) nir_after_block(nir_if_last_else_block(terminator->nif)); } } else { - nir_block *blk_after_loop = - nir_cursor_current_block(nir_after_cf_node(&loop->cf_node)); - - /* There may still be some single src phis following the loop that - * have not yet been cleaned up by another pass. Tidy those up - * before unrolling the loop. - */ - nir_foreach_instr_safe(instr, blk_after_loop) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - assert(exec_list_length(&phi->srcs) == 1); - - nir_phi_src *phi_src = - exec_node_data(nir_phi_src, exec_list_get_head(&phi->srcs), node); - - nir_ssa_def_rewrite_uses(&phi->dest.ssa, phi_src->src); - nir_instr_remove(instr); - } - - /* Remove break at end of the loop */ - nir_block *last_loop_blk = nir_loop_last_block(loop); - nir_instr *break_instr = nir_block_last_instr(last_loop_blk); - nir_instr_remove(break_instr); + loop_prepare_for_unroll(loop); } /* Pluck out the loop body. */ diff --git a/src/compiler/nir/nir_opt_move_load_ubo.c b/src/compiler/nir/nir_opt_move_load_ubo.c index a32f1704427..f36a62a5308 100644 --- a/src/compiler/nir/nir_opt_move_load_ubo.c +++ b/src/compiler/nir/nir_opt_move_load_ubo.c @@ -91,7 +91,7 @@ move_load_ubo(nir_block *block) } } - return false; + return progress; } bool diff --git a/src/compiler/nir/nir_opt_remove_phis.c b/src/compiler/nir/nir_opt_remove_phis.c index 9efbf422624..b03a0ab41b3 100644 --- a/src/compiler/nir/nir_opt_remove_phis.c +++ b/src/compiler/nir/nir_opt_remove_phis.c @@ -139,6 +139,14 @@ remove_phis_block(nir_block *block, nir_builder *b) return progress; } +bool +nir_opt_remove_phis_block(nir_block *block) +{ + nir_builder b; + nir_builder_init(&b, nir_cf_node_get_function(&block->cf_node)); + return remove_phis_block(block, &b); +} + static bool nir_opt_remove_phis_impl(nir_function_impl *impl) { diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 4056bd77455..92707d72990 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -771,6 +771,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) [NIR_INTRINSIC_IMAGE_DIM] = "image_dim", [NIR_INTRINSIC_IMAGE_ARRAY] = "image_array", [NIR_INTRINSIC_ACCESS] = "access", + [NIR_INTRINSIC_SRC_ACCESS] = "src-access", + [NIR_INTRINSIC_DST_ACCESS] = "dst-access", [NIR_INTRINSIC_FORMAT] = "format", [NIR_INTRINSIC_ALIGN_MUL] = "align_mul", [NIR_INTRINSIC_ALIGN_OFFSET] = "align_offset", diff --git a/src/compiler/nir/nir_propagate_invariant.c b/src/compiler/nir/nir_propagate_invariant.c index 103b2422b83..a0cfde67891 100644 --- a/src/compiler/nir/nir_propagate_invariant.c +++ b/src/compiler/nir/nir_propagate_invariant.c @@ -65,12 +65,21 @@ add_cf_node(nir_cf_node *cf, struct set *invariants) static void add_var(nir_variable *var, struct set *invariants) { - _mesa_set_add(invariants, var); + /* Because we pass the result of nir_intrinsic_get_var directly to this + * function, it's possible for var to be NULL if, for instance, there's a + * cast somewhere in the chain. + */ + if (var != NULL) + _mesa_set_add(invariants, var); } static bool var_is_invariant(nir_variable *var, struct set * invariants) { + /* Because we pass the result of nir_intrinsic_get_var directly to this + * function, it's possible for var to be NULL if, for instance, there's a + * cast somewhere in the chain. + */ return var && (var->data.invariant || _mesa_set_search(invariants, var)); } diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 1624508993d..e3fdc08c4d7 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -143,22 +143,6 @@ is_not_const(nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, return !nir_src_is_const(instr->src[src].src); } -static inline bool -is_used_more_than_once(nir_alu_instr *instr) -{ - bool zero_if_use = list_empty(&instr->dest.dest.ssa.if_uses); - bool zero_use = list_empty(&instr->dest.dest.ssa.uses); - - if (zero_use && zero_if_use) - return false; - else if (zero_use && list_is_singular(&instr->dest.dest.ssa.if_uses)) - return false; - else if (zero_if_use && list_is_singular(&instr->dest.dest.ssa.uses)) - return false; - - return true; -} - static inline bool is_used_once(nir_alu_instr *instr) { diff --git a/src/compiler/nir/nir_split_var_copies.c b/src/compiler/nir/nir_split_var_copies.c index 355a4e56d01..10b71c16c17 100644 --- a/src/compiler/nir/nir_split_var_copies.c +++ b/src/compiler/nir/nir_split_var_copies.c @@ -64,21 +64,25 @@ static void split_deref_copy_instr(nir_builder *b, - nir_deref_instr *dst, nir_deref_instr *src) + nir_deref_instr *dst, nir_deref_instr *src, + enum gl_access_qualifier dst_access, + enum gl_access_qualifier src_access) { assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type)); if (glsl_type_is_vector_or_scalar(src->type)) { - nir_copy_deref(b, dst, src); + nir_copy_deref_with_access(b, dst, src, dst_access, src_access); } else if (glsl_type_is_struct_or_ifc(src->type)) { for (unsigned i = 0; i < glsl_get_length(src->type); i++) { split_deref_copy_instr(b, nir_build_deref_struct(b, dst, i), - nir_build_deref_struct(b, src, i)); + nir_build_deref_struct(b, src, i), + dst_access, src_access); } } else { assert(glsl_type_is_matrix(src->type) || glsl_type_is_array(src->type)); split_deref_copy_instr(b, nir_build_deref_array_wildcard(b, dst), - nir_build_deref_array_wildcard(b, src)); + nir_build_deref_array_wildcard(b, src), + dst_access, src_access); } } @@ -105,7 +109,9 @@ split_var_copies_impl(nir_function_impl *impl) nir_instr_as_deref(copy->src[0].ssa->parent_instr); nir_deref_instr *src = nir_instr_as_deref(copy->src[1].ssa->parent_instr); - split_deref_copy_instr(&b, dst, src); + split_deref_copy_instr(&b, dst, src, + nir_intrinsic_dst_access(copy), + nir_intrinsic_src_access(copy)); progress = true; } diff --git a/src/compiler/nir/nir_to_lcssa.c b/src/compiler/nir/nir_to_lcssa.c index 7948b117927..6057c66586d 100644 --- a/src/compiler/nir/nir_to_lcssa.c +++ b/src/compiler/nir/nir_to_lcssa.c @@ -111,9 +111,6 @@ convert_loop_exit_for_ssa(nir_ssa_def *def, void *void_state) if (all_uses_inside_loop) return true; - /* We don't want derefs ending up in phi sources */ - assert(def->parent_instr->type != nir_instr_type_deref); - /* Initialize a phi-instruction */ nir_phi_instr *phi = nir_phi_instr_create(state->shader); nir_ssa_dest_init(&phi->instr, &phi->dest, @@ -131,6 +128,25 @@ convert_loop_exit_for_ssa(nir_ssa_def *def, void *void_state) } nir_instr_insert_before_block(block_after_loop, &phi->instr); + nir_ssa_def *dest = &phi->dest.ssa; + + /* deref instructions need a cast after the phi */ + if (def->parent_instr->type == nir_instr_type_deref) { + nir_deref_instr *cast = + nir_deref_instr_create(state->shader, nir_deref_type_cast); + + nir_deref_instr *instr = nir_instr_as_deref(def->parent_instr); + cast->mode = instr->mode; + cast->type = instr->type; + cast->parent = nir_src_for_ssa(&phi->dest.ssa); + cast->cast.ptr_stride = nir_deref_instr_ptr_as_array_stride(instr); + + nir_ssa_dest_init(&cast->instr, &cast->dest, + phi->dest.ssa.num_components, + phi->dest.ssa.bit_size, NULL); + nir_instr_insert(nir_after_phis(block_after_loop), &cast->instr); + dest = &cast->dest.ssa; + } /* Run through all uses and rewrite those outside the loop to point to * the phi instead of pointing to the ssa-def. @@ -142,15 +158,13 @@ convert_loop_exit_for_ssa(nir_ssa_def *def, void *void_state) } if (!is_use_inside_loop(use, state->loop)) { - nir_instr_rewrite_src(use->parent_instr, use, - nir_src_for_ssa(&phi->dest.ssa)); + nir_instr_rewrite_src(use->parent_instr, use, nir_src_for_ssa(dest)); } } nir_foreach_if_use_safe(use, def) { if (!is_if_use_inside_loop(use, state->loop)) { - nir_if_rewrite_condition(use->parent_if, - nir_src_for_ssa(&phi->dest.ssa)); + nir_if_rewrite_condition(use->parent_if, nir_src_for_ssa(dest)); } } diff --git a/src/compiler/nir/tests/comparison_pre_tests.cpp b/src/compiler/nir/tests/comparison_pre_tests.cpp new file mode 100644 index 00000000000..fe1cc23fb3b --- /dev/null +++ b/src/compiler/nir/tests/comparison_pre_tests.cpp @@ -0,0 +1,531 @@ +/* + * Copyright © 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "nir.h" +#include "nir_builder.h" + +class comparison_pre_test : public ::testing::Test { +protected: + comparison_pre_test() + { + static const nir_shader_compiler_options options = { }; + nir_builder_init_simple_shader(&bld, NULL, MESA_SHADER_VERTEX, &options); + + v1 = nir_imm_vec4(&bld, -2.0, -1.0, 1.0, 2.0); + v2 = nir_imm_vec4(&bld, 2.0, 1.0, -1.0, -2.0); + v3 = nir_imm_vec4(&bld, 3.0, 4.0, 5.0, 6.0); + } + + ~comparison_pre_test() + { + ralloc_free(bld.shader); + } + + struct nir_builder bld; + + nir_ssa_def *v1; + nir_ssa_def *v2; + nir_ssa_def *v3; + + const uint8_t xxxx[4] = { 0, 0, 0, 0 }; + const uint8_t wwww[4] = { 3, 3, 3, 3 }; +}; + +TEST_F(comparison_pre_test, a_lt_b_vs_neg_a_plus_b) +{ + /* Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec4 32 ssa_4 = fadd ssa_0, ssa_2 + * vec1 32 ssa_5 = mov ssa_4.x + * vec1 1 ssa_6 = flt ssa_5, ssa_3 + * + * if ssa_6 { + * vec1 32 ssa_7 = fneg ssa_5 + * vec1 32 ssa_8 = fadd ssa_7, ssa_3 + * } else { + * } + * + * After: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec4 32 ssa_4 = fadd ssa_0, ssa_2 + * vec1 32 ssa_5 = mov ssa_4.x + * vec1 32 ssa_9 = fneg ssa_5 + * vec1 32 ssa_10 = fadd ssa_3, ssa_9 + * vec1 32 ssa_11 = load_const (0.0) + * vec1 1 ssa_12 = flt ssa_11, ssa_10 + * vec1 32 ssa_13 = mov ssa_10 + * vec1 1 ssa_14 = mov ssa_12 + * + * if ssa_14 { + * vec1 32 ssa_7 = fneg ssa_5 + * } else { + * } + */ + nir_ssa_def *one = nir_imm_float(&bld, 1.0f); + nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0); + + nir_ssa_def *flt = nir_flt(&bld, a, one); + + nir_if *nif = nir_push_if(&bld, flt); + + nir_fadd(&bld, nir_fneg(&bld, a), one); + + nir_pop_if(&bld, nif); + + EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); +} + +TEST_F(comparison_pre_test, a_lt_b_vs_a_minus_b) +{ + /* Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec4 32 ssa_4 = fadd ssa_0, ssa_2 + * vec1 32 ssa_5 = mov ssa_4.x + * vec1 1 ssa_6 = flt ssa_3, ssa_5 + * + * if ssa_6 { + * vec1 32 ssa_7 = fneg ssa_5 + * vec1 32 ssa_8 = fadd ssa_3, ssa_7 + * } else { + * } + * + * After: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec4 32 ssa_4 = fadd ssa_0, ssa_2 + * vec1 32 ssa_5 = mov ssa_4.x + * vec1 32 ssa_9 = fneg ssa_5 + * vec1 32 ssa_10 = fadd ssa_3, ssa_9 + * vec1 32 ssa_11 = load_const (0.0) + * vec1 1 ssa_12 = flt ssa_10, ssa_11 + * vec1 32 ssa_13 = mov ssa_10 + * vec1 1 ssa_14 = mov ssa_12 + * + * if ssa_14 { + * vec1 32 ssa_7 = fneg ssa_5 + * } else { + * } + */ + nir_ssa_def *one = nir_imm_float(&bld, 1.0f); + nir_ssa_def *b = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0); + + nir_ssa_def *flt = nir_flt(&bld, one, b); + + nir_if *nif = nir_push_if(&bld, flt); + + nir_fadd(&bld, one, nir_fneg(&bld, b)); + + nir_pop_if(&bld, nif); + + EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); +} + +TEST_F(comparison_pre_test, neg_a_lt_b_vs_a_plus_b) +{ + /* Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec4 32 ssa_4 = fadd ssa_0, ssa_2 + * vec1 32 ssa_5 = mov ssa_4.x + * vec1 32 ssa_6 = fneg ssa_5 + * vec1 1 ssa_7 = flt ssa_6, ssa_3 + * + * if ssa_7 { + * vec1 32 ssa_8 = fadd ssa_5, ssa_3 + * } else { + * } + * + * After: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec4 32 ssa_4 = fadd ssa_0, ssa_2 + * vec1 32 ssa_5 = mov ssa_4.x + * vec1 32 ssa_9 = fneg ssa_5 + * vec1 32 ssa_9 = fneg ssa_6 + * vec1 32 ssa_10 = fadd ssa_3, ssa_9 + * vec1 32 ssa_11 = load_const ( 0.0) + * vec1 1 ssa_12 = flt ssa_11, ssa_10 + * vec1 32 ssa_13 = mov ssa_10 + * vec1 1 ssa_14 = mov ssa_12 + * + * if ssa_14 { + * } else { + * } + */ + + nir_ssa_def *one = nir_imm_float(&bld, 1.0f); + nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0); + + nir_ssa_def *flt = nir_flt(&bld, nir_fneg(&bld, a), one); + + nir_if *nif = nir_push_if(&bld, flt); + + nir_fadd(&bld, a, one); + + nir_pop_if(&bld, nif); + + EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); +} + +TEST_F(comparison_pre_test, a_lt_neg_b_vs_a_plus_b) +{ + /* Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec4 32 ssa_4 = fadd ssa_0, ssa_2 + * vec1 32 ssa_5 = mov ssa_4.x + * vec1 32 ssa_6 = fneg ssa_5 + * vec1 1 ssa_7 = flt ssa_3, ssa_6 + * + * if ssa_7 { + * vec1 32 ssa_8 = fadd ssa_3, ssa_5 + * } else { + * } + * + * After: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec4 32 ssa_4 = fadd ssa_0, ssa_2 + * vec1 32 ssa_5 = mov ssa_4.x + * vec1 32 ssa_9 = fneg ssa_5 + * vec1 32 ssa_9 = fneg ssa_6 + * vec1 32 ssa_10 = fadd ssa_3, ssa_9 + * vec1 32 ssa_11 = load_const ( 0.0) + * vec1 1 ssa_12 = flt ssa_10, ssa_11 + * vec1 32 ssa_13 = mov ssa_10 + * vec1 1 ssa_14 = mov ssa_12 + * + * if ssa_14 { + * } else { + * } + */ + nir_ssa_def *one = nir_imm_float(&bld, 1.0f); + nir_ssa_def *b = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0); + + nir_ssa_def *flt = nir_flt(&bld, one, nir_fneg(&bld, b)); + + nir_if *nif = nir_push_if(&bld, flt); + + nir_fadd(&bld, one, b); + + nir_pop_if(&bld, nif); + + EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); +} + +TEST_F(comparison_pre_test, imm_lt_b_vs_neg_imm_plus_b) +{ + /* Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 1 ssa_7 = flt ssa_3, ssa_6 + * + * if ssa_7 { + * vec1 32 ssa_8 = fadd ssa_4, ssa_6 + * } else { + * } + * + * After: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 32 ssa_9 = fneg ssa_3 + * vec1 32 ssa_10 = fadd ssa_6, ssa_9 + * vec1 32 ssa_11 = load_const ( 0.0) + * vec1 1 ssa_12 = flt ssa_11, ssa_10 + * vec1 32 ssa_13 = mov ssa_10 + * vec1 1 ssa_14 = mov ssa_12 + * + * if ssa_14 { + * } else { + * } + */ + nir_ssa_def *one = nir_imm_float(&bld, 1.0f); + nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f); + nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0); + + nir_ssa_def *flt = nir_flt(&bld, one, a); + + nir_if *nif = nir_push_if(&bld, flt); + + nir_fadd(&bld, neg_one, a); + + nir_pop_if(&bld, nif); + + EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); +} + +TEST_F(comparison_pre_test, a_lt_imm_vs_a_minus_imm) +{ + /* Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 1 ssa_7 = flt ssa_6, ssa_3 + * + * if ssa_6 { + * vec1 32 ssa_8 = fadd ssa_6, ssa_4 + * } else { + * } + * + * After: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 32 ssa_9 = fneg ssa_3 + * vec1 32 ssa_10 = fadd ssa_6, ssa_9 + * vec1 32 ssa_11 = load_const ( 0.0) + * vec1 1 ssa_12 = flt ssa_10, ssa_11 + * vec1 32 ssa_13 = mov ssa_10 + * vec1 1 ssa_14 = mov ssa_12 + * + * if ssa_14 { + * } else { + * } + */ + nir_ssa_def *one = nir_imm_float(&bld, 1.0f); + nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f); + nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0); + + nir_ssa_def *flt = nir_flt(&bld, a, one); + + nir_if *nif = nir_push_if(&bld, flt); + + nir_fadd(&bld, a, neg_one); + + nir_pop_if(&bld, nif); + + EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); +} + +TEST_F(comparison_pre_test, neg_imm_lt_a_vs_a_plus_imm) +{ + /* Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 1 ssa_7 = flt ssa_4, ssa_6 + * + * if ssa_7 { + * vec1 32 ssa_8 = fadd ssa_6, ssa_3 + * } else { + * } + * + * After: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 32 ssa_9 = fneg ssa_4 + * vec1 32 ssa_10 = fadd ssa_6, ssa_9 + * vec1 32 ssa_11 = load_const ( 0.0) + * vec1 1 ssa_12 = flt ssa_11, ssa_10 + * vec1 32 ssa_13 = mov ssa_10 + * vec1 1 ssa_14 = mov ssa_12 + * + * if ssa_14 { + * } else { + * } + */ + + nir_ssa_def *one = nir_imm_float(&bld, 1.0f); + nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f); + nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0); + + nir_ssa_def *flt = nir_flt(&bld, neg_one, a); + + nir_if *nif = nir_push_if(&bld, flt); + + nir_fadd(&bld, a, one); + + nir_pop_if(&bld, nif); + + EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); +} + +TEST_F(comparison_pre_test, a_lt_neg_imm_vs_a_plus_imm) +{ + /* Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 1 ssa_7 = flt ssa_6, ssa_4 + * + * if ssa_7 { + * vec1 32 ssa_8 = fadd ssa_6, ssa_3 + * } else { + * } + * + * After: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec1 32 ssa_3 = load_const ( 1.0) + * vec1 32 ssa_4 = load_const (-1.0) + * vec4 32 ssa_5 = fadd ssa_0, ssa_2 + * vec1 32 ssa_6 = mov ssa_5.x + * vec1 32 ssa_9 = fneg ssa_4 + * vec1 32 ssa_10 = fadd ssa_6, ssa_9 + * vec1 32 ssa_11 = load_const ( 0.0) + * vec1 1 ssa_12 = flt ssa_10, ssa_11 + * vec1 32 ssa_13 = mov ssa_10 + * vec1 1 ssa_14 = mov ssa_12 + * + * if ssa_14 { + * } else { + * } + */ + nir_ssa_def *one = nir_imm_float(&bld, 1.0f); + nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f); + nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0); + + nir_ssa_def *flt = nir_flt(&bld, a, neg_one); + + nir_if *nif = nir_push_if(&bld, flt); + + nir_fadd(&bld, a, one); + + nir_pop_if(&bld, nif); + + EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl)); +} + +TEST_F(comparison_pre_test, non_scalar_add_result) +{ + /* The optimization pass should not do anything because the result of the + * fadd is not a scalar. + * + * Before: + * + * vec4 32 ssa_0 = load_const (-2.0, -1.0, 1.0, 2.0) + * vec4 32 ssa_1 = load_const ( 2.0, 1.0, -1.0, -2.0) + * vec4 32 ssa_2 = load_const ( 3.0, 4.0, 5.0, 6.0) + * vec4 32 ssa_3 = fadd ssa_0, ssa_2 + * vec1 1 ssa_4 = flt ssa_0.x, ssa_3.x + * + * if ssa_4 { + * vec2 32 ssa_5 = fadd ssa_1.xx, ssa_3.xx + * } else { + * } + * + * After: + * + * No change. + */ + nir_ssa_def *a = nir_fadd(&bld, v1, v3); + + nir_alu_instr *flt = nir_alu_instr_create(bld.shader, nir_op_flt); + + flt->src[0].src = nir_src_for_ssa(v1); + flt->src[1].src = nir_src_for_ssa(a); + + memcpy(&flt->src[0].swizzle, xxxx, sizeof(xxxx)); + memcpy(&flt->src[1].swizzle, xxxx, sizeof(xxxx)); + + nir_builder_alu_instr_finish_and_insert(&bld, flt); + + flt->dest.dest.ssa.num_components = 1; + flt->dest.write_mask = 1; + + nir_if *nif = nir_push_if(&bld, &flt->dest.dest.ssa); + + nir_alu_instr *fadd = nir_alu_instr_create(bld.shader, nir_op_fadd); + + fadd->src[0].src = nir_src_for_ssa(v2); + fadd->src[1].src = nir_src_for_ssa(a); + + memcpy(&fadd->src[0].swizzle, xxxx, sizeof(xxxx)); + memcpy(&fadd->src[1].swizzle, xxxx, sizeof(xxxx)); + + nir_builder_alu_instr_finish_and_insert(&bld, fadd); + + fadd->dest.dest.ssa.num_components = 2; + fadd->dest.write_mask = 3; + + nir_pop_if(&bld, nif); + + EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl)); +} diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 5e91f8815e8..e993ec32382 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1422,15 +1422,17 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode, default: break; } - } - - if (storage_class == SpvStorageClassWorkgroup && - b->options->lower_workgroup_access_to_offsets) { + } else if (storage_class == SpvStorageClassWorkgroup && + b->options->lower_workgroup_access_to_offsets) { + /* Workgroup is laid out by the implementation. */ uint32_t size, align; val->type->deref = vtn_type_layout_std430(b, val->type->deref, &size, &align); val->type->length = size; val->type->align = align; + + /* Override any ArrayStride previously set. */ + val->type->stride = vtn_align_u32(size, align); } } break; @@ -2089,19 +2091,17 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, vtn_value(b, w[4], vtn_value_type_pointer)->pointer; return; } else if (opcode == SpvOpImage) { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_pointer); struct vtn_value *src_val = vtn_untyped_value(b, w[3]); if (src_val->value_type == vtn_value_type_sampled_image) { - val->pointer = src_val->sampled_image->image; + vtn_push_value_pointer(b, w[2], src_val->sampled_image->image); } else { vtn_assert(src_val->value_type == vtn_value_type_pointer); - val->pointer = src_val->pointer; + vtn_push_value_pointer(b, w[2], src_val->pointer); } return; } struct vtn_type *ret_type = vtn_value(b, w[1], vtn_value_type_type)->type; - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); struct vtn_sampled_image sampled; struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); @@ -2415,8 +2415,9 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, } } - val->ssa = vtn_create_ssa_value(b, ret_type->type); - val->ssa->def = &instr->dest.ssa; + struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, ret_type->type); + ssa->def = &instr->dest.ssa; + vtn_push_ssa(b, w[2], ret_type, ssa); nir_builder_instr_insert(&b->nb, &instr->instr); } @@ -2606,6 +2607,8 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, intrin->src[2] = nir_src_for_ssa(image.sample); } + nir_intrinsic_set_access(intrin, image.image->access); + switch (opcode) { case SpvOpAtomicLoad: case SpvOpImageQuerySize: @@ -2644,7 +2647,6 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, } if (opcode != SpvOpImageWrite && opcode != SpvOpAtomicStore) { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; unsigned dest_components = glsl_get_vector_elements(type->type); @@ -2661,7 +2663,8 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode, if (intrin->num_components != dest_components) result = nir_channels(&b->nb, result, (1 << dest_components) - 1); - val->ssa = vtn_create_ssa_value(b, type->type); + struct vtn_value *val = + vtn_push_ssa(b, w[2], type, vtn_create_ssa_value(b, type->type)); val->ssa->def = result; } else { nir_builder_instr_insert(&b->nb, &intrin->instr); @@ -2972,10 +2975,10 @@ vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode, glsl_get_vector_elements(type->type), glsl_get_bit_size(type->type), NULL); - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - val->ssa = rzalloc(b, struct vtn_ssa_value); - val->ssa->def = &atomic->dest.ssa; - val->ssa->type = type->type; + struct vtn_ssa_value *ssa = rzalloc(b, struct vtn_ssa_value); + ssa->def = &atomic->dest.ssa; + ssa->type = type->type; + vtn_push_ssa(b, w[2], type, ssa); } nir_builder_instr_insert(&b->nb, &atomic->instr); @@ -3215,65 +3218,65 @@ static void vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, const uint32_t *w, unsigned count) { - struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); - const struct glsl_type *type = - vtn_value(b, w[1], vtn_value_type_type)->type->type; - val->ssa = vtn_create_ssa_value(b, type); + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, type->type); switch (opcode) { case SpvOpVectorExtractDynamic: - val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, - vtn_ssa_value(b, w[4])->def); + ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def); break; case SpvOpVectorInsertDynamic: - val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, - vtn_ssa_value(b, w[4])->def, - vtn_ssa_value(b, w[5])->def); + ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + vtn_ssa_value(b, w[5])->def); break; case SpvOpVectorShuffle: - val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), - vtn_ssa_value(b, w[3])->def, - vtn_ssa_value(b, w[4])->def, - w + 5); + ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type->type), + vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + w + 5); break; case SpvOpCompositeConstruct: { unsigned elems = count - 3; assume(elems >= 1); - if (glsl_type_is_vector_or_scalar(type)) { + if (glsl_type_is_vector_or_scalar(type->type)) { nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS]; for (unsigned i = 0; i < elems; i++) srcs[i] = vtn_ssa_value(b, w[3 + i])->def; - val->ssa->def = - vtn_vector_construct(b, glsl_get_vector_elements(type), + ssa->def = + vtn_vector_construct(b, glsl_get_vector_elements(type->type), elems, srcs); } else { - val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); for (unsigned i = 0; i < elems; i++) - val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); + ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); } break; } case SpvOpCompositeExtract: - val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), - w + 4, count - 4); + ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), + w + 4, count - 4); break; case SpvOpCompositeInsert: - val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), - vtn_ssa_value(b, w[3]), - w + 5, count - 5); + ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), + vtn_ssa_value(b, w[3]), + w + 5, count - 5); break; case SpvOpCopyObject: - val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); break; default: vtn_fail_with_opcode("unknown composite operation", opcode); } + + vtn_push_ssa(b, w[2], type, ssa); } static void @@ -3389,13 +3392,13 @@ vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, } case SpvOpControlBarrier: { - SpvScope execution_scope = vtn_constant_uint(b, w[1]); - if (execution_scope == SpvScopeWorkgroup) - vtn_emit_barrier(b, nir_intrinsic_barrier); - SpvScope memory_scope = vtn_constant_uint(b, w[2]); SpvMemorySemanticsMask memory_semantics = vtn_constant_uint(b, w[3]); vtn_emit_memory_barrier(b, memory_scope, memory_semantics); + + SpvScope execution_scope = vtn_constant_uint(b, w[1]); + if (execution_scope == SpvScopeWorkgroup) + vtn_emit_barrier(b, nir_intrinsic_barrier); break; } diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c index 323e3da52c4..b5d4aeb5210 100644 --- a/src/compiler/spirv/vtn_cfg.c +++ b/src/compiler/spirv/vtn_cfg.c @@ -328,17 +328,12 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode, } else if (type->base_type == vtn_base_type_pointer && type->type != NULL) { /* This is a pointer with an actual storage type */ - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_pointer); nir_ssa_def *ssa_ptr = nir_load_param(&b->nb, b->func_param_idx++); - val->pointer = vtn_pointer_from_ssa(b, ssa_ptr, type); + vtn_push_value_pointer(b, w[2], vtn_pointer_from_ssa(b, ssa_ptr, type)); } else if (type->base_type == vtn_base_type_pointer || type->base_type == vtn_base_type_image || type->base_type == vtn_base_type_sampler) { - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_pointer); - val->pointer = - vtn_load_param_pointer(b, type, b->func_param_idx++); + vtn_push_value_pointer(b, w[2], vtn_load_param_pointer(b, type, b->func_param_idx++)); } else { /* We're a regular SSA value. */ struct vtn_ssa_value *value = vtn_create_ssa_value(b, type->type); diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h index cfe2893e04f..0f1f30e3d0a 100644 --- a/src/compiler/spirv/vtn_private.h +++ b/src/compiler/spirv/vtn_private.h @@ -269,6 +269,9 @@ struct vtn_ssa_value { struct vtn_ssa_value *transposed; const struct glsl_type *type; + + /* Access qualifiers */ + enum gl_access_qualifier access; }; enum vtn_base_type { @@ -416,6 +419,9 @@ struct vtn_access_chain { */ bool ptr_as_array; + /* Access qualifiers */ + enum gl_access_qualifier access; + /** Struct elements and array offsets. * * This is an array of 1 so that it can conveniently be created on the @@ -645,6 +651,10 @@ vtn_untyped_value(struct vtn_builder *b, uint32_t value_id) return &b->values[value_id]; } +/* Consider not using this function directly and instead use + * vtn_push_ssa/vtn_push_value_pointer so that appropriate applying of + * decorations is handled by common code. + */ static inline struct vtn_value * vtn_push_value(struct vtn_builder *b, uint32_t value_id, enum vtn_value_type value_type) @@ -656,22 +666,8 @@ vtn_push_value(struct vtn_builder *b, uint32_t value_id, value_id); val->value_type = value_type; - return &b->values[value_id]; -} -static inline struct vtn_value * -vtn_push_ssa(struct vtn_builder *b, uint32_t value_id, - struct vtn_type *type, struct vtn_ssa_value *ssa) -{ - struct vtn_value *val; - if (type->base_type == vtn_base_type_pointer) { - val = vtn_push_value(b, value_id, vtn_value_type_pointer); - val->pointer = vtn_pointer_from_ssa(b, ssa->def, type); - } else { - val = vtn_push_value(b, value_id, vtn_value_type_ssa); - val->ssa = ssa; - } - return val; + return &b->values[value_id]; } static inline struct vtn_value * @@ -706,8 +702,43 @@ vtn_constant_uint(struct vtn_builder *b, uint32_t value_id) } } +static inline enum gl_access_qualifier vtn_value_access(struct vtn_value *value) +{ + switch (value->value_type) { + case vtn_value_type_invalid: + case vtn_value_type_undef: + case vtn_value_type_string: + case vtn_value_type_decoration_group: + case vtn_value_type_constant: + case vtn_value_type_function: + case vtn_value_type_block: + case vtn_value_type_extension: + return 0; + case vtn_value_type_type: + return value->type->access; + case vtn_value_type_pointer: + return value->pointer->access; + case vtn_value_type_ssa: + return value->ssa->access; + case vtn_value_type_image_pointer: + return value->image->image->access; + case vtn_value_type_sampled_image: + return value->sampled_image->image->access | + value->sampled_image->sampler->access; + } + + unreachable("invalid type"); +} + struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id); +struct vtn_value *vtn_push_value_pointer(struct vtn_builder *b, + uint32_t value_id, + struct vtn_pointer *ptr); + +struct vtn_value *vtn_push_ssa(struct vtn_builder *b, uint32_t value_id, + struct vtn_type *type, struct vtn_ssa_value *ssa); + struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type); diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 6fbe6900e48..aa9ee39ba78 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -30,6 +30,52 @@ #include "nir_deref.h" #include +static void ptr_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, + void *void_ptr); + +struct vtn_value * +vtn_push_value_pointer(struct vtn_builder *b, uint32_t value_id, + struct vtn_pointer *ptr) +{ + struct vtn_value *val = vtn_push_value(b, value_id, vtn_value_type_pointer); + val->pointer = ptr; + vtn_foreach_decoration(b, val, ptr_decoration_cb, ptr); + return val; +} + +static void +ssa_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_ssa) +{ + struct vtn_ssa_value *ssa = void_ssa; + + switch (dec->decoration) { + case SpvDecorationNonUniformEXT: + ssa->access |= ACCESS_NON_UNIFORM; + break; + + default: + break; + } +} + +struct vtn_value * +vtn_push_ssa(struct vtn_builder *b, uint32_t value_id, + struct vtn_type *type, struct vtn_ssa_value *ssa) +{ + struct vtn_value *val; + if (type->base_type == vtn_base_type_pointer) { + val = vtn_push_value_pointer(b, value_id, vtn_pointer_from_ssa(b, ssa->def, type)); + } else { + val = vtn_push_value(b, value_id, vtn_value_type_ssa); + val->ssa = ssa; + vtn_foreach_decoration(b, val, ssa_decoration_cb, val->ssa); + } + return val; +} + static struct vtn_access_chain * vtn_access_chain_create(struct vtn_builder *b, unsigned length) { @@ -189,7 +235,7 @@ vtn_nir_deref_pointer_dereference(struct vtn_builder *b, struct vtn_access_chain *deref_chain) { struct vtn_type *type = base->type; - enum gl_access_qualifier access = base->access; + enum gl_access_qualifier access = base->access | deref_chain->access; unsigned idx = 0; nir_deref_instr *tail; @@ -2349,6 +2395,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, case SpvOpInBoundsAccessChain: case SpvOpInBoundsPtrAccessChain: { struct vtn_access_chain *chain = vtn_access_chain_create(b, count - 4); + enum gl_access_qualifier access = 0; chain->ptr_as_array = (opcode == SpvOpPtrAccessChain || opcode == SpvOpInBoundsPtrAccessChain); unsigned idx = 0; @@ -2376,8 +2423,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, } else { chain->link[idx].mode = vtn_access_mode_id; chain->link[idx].id = w[i]; - } + access |= vtn_value_access(link_val); idx++; } @@ -2404,11 +2451,11 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, val->sampled_image->sampler); } else { vtn_assert(base_val->value_type == vtn_value_type_pointer); - struct vtn_value *val = - vtn_push_value(b, w[2], vtn_value_type_pointer); - val->pointer = vtn_pointer_dereference(b, base_val->pointer, chain); - val->pointer->ptr_type = ptr_type; - vtn_foreach_decoration(b, val, ptr_decoration_cb, val->pointer); + struct vtn_pointer *ptr = + vtn_pointer_dereference(b, base_val->pointer, chain); + ptr->ptr_type = ptr_type; + ptr->access |= access; + vtn_push_value_pointer(b, w[2], ptr); } break; } @@ -2433,7 +2480,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, if (glsl_type_is_image(res_type->type) || glsl_type_is_sampler(res_type->type)) { - vtn_push_value(b, w[2], vtn_value_type_pointer)->pointer = src; + vtn_push_value_pointer(b, w[2], src); return; } @@ -2545,10 +2592,11 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, "scalar type"); /* The pointer will be converted to an SSA value automatically */ - nir_ssa_def *ptr_ssa = vtn_ssa_value(b, w[3])->def; + struct vtn_ssa_value *ptr_ssa = vtn_ssa_value(b, w[3]); u_val->ssa = vtn_create_ssa_value(b, u_val->type->type); - u_val->ssa->def = nir_sloppy_bitcast(&b->nb, ptr_ssa, u_val->type->type); + u_val->ssa->def = nir_sloppy_bitcast(&b->nb, ptr_ssa->def, u_val->type->type); + u_val->ssa->access |= ptr_ssa->access; break; } @@ -2568,6 +2616,8 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, nir_ssa_def *ptr_ssa = nir_sloppy_bitcast(&b->nb, u_val->ssa->def, ptr_val->type->type); ptr_val->pointer = vtn_pointer_from_ssa(b, ptr_ssa, ptr_val->type); + vtn_foreach_decoration(b, ptr_val, ptr_decoration_cb, ptr_val->pointer); + ptr_val->pointer->access |= u_val->ssa->access; break; } diff --git a/src/egl/Android.mk b/src/egl/Android.mk index a9319f56ae7..d69be3350bd 100644 --- a/src/egl/Android.mk +++ b/src/egl/Android.mk @@ -71,6 +71,10 @@ ifeq ($(BOARD_USES_DRM_GRALLOC),true) LOCAL_SHARED_LIBRARIES += libgralloc_drm endif +ifeq ($(strip $(BOARD_USES_GRALLOC1)),true) +LOCAL_CFLAGS += -DHAVE_GRALLOC1 +endif + ifeq ($(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5 6 7),) LOCAL_SHARED_LIBRARIES += libnativewindow endif @@ -88,6 +92,6 @@ endif LOCAL_MODULE := libGLES_mesa LOCAL_MODULE_RELATIVE_PATH := egl - +LOCAL_CFLAGS += -Wno-error include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index d584bccdebe..0efd30a144f 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -65,6 +65,7 @@ #include "util/u_atomic.h" #include "util/u_vector.h" #include "mapi/glapi/glapi.h" +#include "util/bitscan.h" /* Additional definitions not yet in the drm_fourcc.h. */ @@ -152,11 +153,7 @@ const __DRIuseInvalidateExtension use_invalidate = { static const EGLint dri2_to_egl_attribute_map[__DRI_ATTRIB_MAX] = { [__DRI_ATTRIB_BUFFER_SIZE ] = EGL_BUFFER_SIZE, [__DRI_ATTRIB_LEVEL] = EGL_LEVEL, - [__DRI_ATTRIB_RED_SIZE] = EGL_RED_SIZE, - [__DRI_ATTRIB_GREEN_SIZE] = EGL_GREEN_SIZE, - [__DRI_ATTRIB_BLUE_SIZE] = EGL_BLUE_SIZE, [__DRI_ATTRIB_LUMINANCE_SIZE] = EGL_LUMINANCE_SIZE, - [__DRI_ATTRIB_ALPHA_SIZE] = EGL_ALPHA_SIZE, [__DRI_ATTRIB_DEPTH_SIZE] = EGL_DEPTH_SIZE, [__DRI_ATTRIB_STENCIL_SIZE] = EGL_STENCIL_SIZE, [__DRI_ATTRIB_SAMPLE_BUFFERS] = EGL_SAMPLE_BUFFERS, @@ -191,10 +188,50 @@ dri2_match_config(const _EGLConfig *conf, const _EGLConfig *criteria) return EGL_TRUE; } +void +dri2_get_shifts_and_sizes(const __DRIcoreExtension *core, + const __DRIconfig *config, int *shifts, + unsigned int *sizes) +{ + unsigned int mask; + + if (core->getConfigAttrib(config, __DRI_ATTRIB_RED_SHIFT, (unsigned int *)&shifts[0])) { + core->getConfigAttrib(config, __DRI_ATTRIB_GREEN_SHIFT, (unsigned int *)&shifts[1]); + core->getConfigAttrib(config, __DRI_ATTRIB_BLUE_SHIFT, (unsigned int *)&shifts[2]); + core->getConfigAttrib(config, __DRI_ATTRIB_ALPHA_SHIFT, (unsigned int *)&shifts[3]); + } else { + /* Driver isn't exposing shifts, so convert masks to shifts */ + core->getConfigAttrib(config, __DRI_ATTRIB_RED_MASK, &mask); + shifts[0] = ffs(mask) - 1; + core->getConfigAttrib(config, __DRI_ATTRIB_GREEN_MASK, &mask); + shifts[1] = ffs(mask) - 1; + core->getConfigAttrib(config, __DRI_ATTRIB_BLUE_MASK, &mask); + shifts[2] = ffs(mask) - 1; + core->getConfigAttrib(config, __DRI_ATTRIB_ALPHA_MASK, &mask); + shifts[3] = ffs(mask) - 1; + } + + core->getConfigAttrib(config, __DRI_ATTRIB_RED_SIZE, &sizes[0]); + core->getConfigAttrib(config, __DRI_ATTRIB_GREEN_SIZE, &sizes[1]); + core->getConfigAttrib(config, __DRI_ATTRIB_BLUE_SIZE, &sizes[2]); + core->getConfigAttrib(config, __DRI_ATTRIB_ALPHA_SIZE, &sizes[3]); +} + +void +dri2_get_render_type_float(const __DRIcoreExtension *core, + const __DRIconfig *config, + bool *is_float) +{ + unsigned int render_type; + + core->getConfigAttrib(config, __DRI_ATTRIB_RENDER_TYPE, &render_type); + *is_float = (render_type & __DRI_ATTRIB_FLOAT_BIT) ? true : false; +} + struct dri2_egl_config * dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, EGLint surface_type, const EGLint *attr_list, - const unsigned int *rgba_masks) + const int *rgba_shifts, const unsigned int *rgba_sizes) { struct dri2_egl_config *conf; struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); @@ -202,7 +239,8 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, unsigned int attrib, value, double_buffer; bool srgb = false; EGLint key, bind_to_texture_rgb, bind_to_texture_rgba; - unsigned int dri_masks[4] = { 0, 0, 0, 0 }; + int dri_shifts[4] = { -1, -1, -1, -1 }; + unsigned int dri_sizes[4] = { 0, 0, 0, 0 }; _EGLConfig *matching_config; EGLint num_configs = 0; EGLint config_id; @@ -219,6 +257,9 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, switch (attrib) { case __DRI_ATTRIB_RENDER_TYPE: + if (value & __DRI_ATTRIB_FLOAT_BIT) + _eglSetConfigKey(&base, EGL_COLOR_COMPONENT_TYPE_EXT, + EGL_COLOR_COMPONENT_TYPE_FLOAT_EXT); if (value & __DRI_ATTRIB_RGBA_BIT) value = EGL_RGB_BUFFER; else if (value & __DRI_ATTRIB_LUMINANCE_BIT) @@ -250,20 +291,56 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, double_buffer = value; break; + case __DRI_ATTRIB_RED_SIZE: + dri_sizes[0] = value; + _eglSetConfigKey(&base, EGL_RED_SIZE, value); + break; + case __DRI_ATTRIB_RED_MASK: - dri_masks[0] = value; + dri_shifts[0] = ffs(value) - 1; + break; + + case __DRI_ATTRIB_RED_SHIFT: + dri_shifts[0] = value; + break; + + case __DRI_ATTRIB_GREEN_SIZE: + dri_sizes[1] = value; + _eglSetConfigKey(&base, EGL_GREEN_SIZE, value); break; case __DRI_ATTRIB_GREEN_MASK: - dri_masks[1] = value; + dri_shifts[1] = ffs(value) - 1; + break; + + case __DRI_ATTRIB_GREEN_SHIFT: + dri_shifts[1] = value; + break; + + case __DRI_ATTRIB_BLUE_SIZE: + dri_sizes[2] = value; + _eglSetConfigKey(&base, EGL_BLUE_SIZE, value); break; case __DRI_ATTRIB_BLUE_MASK: - dri_masks[2] = value; + dri_shifts[2] = ffs(value) - 1; + break; + + case __DRI_ATTRIB_BLUE_SHIFT: + dri_shifts[2] = value; + break; + + case __DRI_ATTRIB_ALPHA_SIZE: + dri_sizes[3] = value; + _eglSetConfigKey(&base, EGL_ALPHA_SIZE, value); break; case __DRI_ATTRIB_ALPHA_MASK: - dri_masks[3] = value; + dri_shifts[3] = ffs(value) - 1; + break; + + case __DRI_ATTRIB_ALPHA_SHIFT: + dri_shifts[3] = value; break; case __DRI_ATTRIB_ACCUM_RED_SIZE: @@ -305,7 +382,10 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, for (int i = 0; attr_list[i] != EGL_NONE; i += 2) _eglSetConfigKey(&base, attr_list[i], attr_list[i+1]); - if (rgba_masks && memcmp(rgba_masks, dri_masks, sizeof(dri_masks))) + if (rgba_shifts && memcmp(rgba_shifts, dri_shifts, sizeof(dri_shifts))) + return NULL; + + if (rgba_sizes && memcmp(rgba_sizes, dri_sizes, sizeof(dri_sizes))) return NULL; base.NativeRenderable = EGL_TRUE; @@ -1424,6 +1504,37 @@ dri2_surf_update_fence_fd(_EGLContext *ctx, dri2_surface_set_out_fence_fd(surf, fence_fd); } +EGLBoolean +dri2_create_drawable(struct dri2_egl_display *dri2_dpy, + const __DRIconfig *config, + struct dri2_egl_surface *dri2_surf) +{ + __DRIcreateNewDrawableFunc createNewDrawable; + void *loaderPrivate = dri2_surf; + + if (dri2_dpy->image_driver) + createNewDrawable = dri2_dpy->image_driver->createNewDrawable; + else if (dri2_dpy->dri2) + createNewDrawable = dri2_dpy->dri2->createNewDrawable; + else if (dri2_dpy->swrast) + createNewDrawable = dri2_dpy->swrast->createNewDrawable; + else + return _eglError(EGL_BAD_ALLOC, "no createNewDrawable"); + + /* As always gbm is a bit special.. */ +#ifdef HAVE_DRM_PLATFORM + if (dri2_surf->gbm_surf) + loaderPrivate = dri2_surf->gbm_surf; +#endif + + dri2_surf->dri_drawable = (*createNewDrawable)(dri2_dpy->dri_screen, + config, loaderPrivate); + if (dri2_surf->dri_drawable == NULL) + return _eglError(EGL_BAD_ALLOC, "createNewDrawable"); + + return EGL_TRUE; +} + /** * Called via eglMakeCurrent(), drv->API.MakeCurrent(). */ @@ -2244,6 +2355,8 @@ dri2_num_fourcc_format_planes(EGLint format) case DRM_FORMAT_ABGR2101010: case DRM_FORMAT_RGBA1010102: case DRM_FORMAT_BGRA1010102: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: @@ -2627,21 +2740,39 @@ dri2_export_dma_buf_image_query_mesa(_EGLDriver *drv, _EGLDisplay *disp, { struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); struct dri2_egl_image *dri2_img = dri2_egl_image(img); + int num_planes; (void) drv; if (!dri2_can_export_dma_buf_image(disp, img)) return EGL_FALSE; + dri2_dpy->image->queryImage(dri2_img->dri_image, + __DRI_IMAGE_ATTRIB_NUM_PLANES, &num_planes); if (nplanes) - dri2_dpy->image->queryImage(dri2_img->dri_image, - __DRI_IMAGE_ATTRIB_NUM_PLANES, nplanes); + *nplanes = num_planes; + if (fourcc) dri2_dpy->image->queryImage(dri2_img->dri_image, __DRI_IMAGE_ATTRIB_FOURCC, fourcc); - if (modifiers) - *modifiers = 0; + if (modifiers) { + int mod_hi, mod_lo; + uint64_t modifier = DRM_FORMAT_MOD_INVALID; + bool query; + + query = dri2_dpy->image->queryImage(dri2_img->dri_image, + __DRI_IMAGE_ATTRIB_MODIFIER_UPPER, + &mod_hi); + query &= dri2_dpy->image->queryImage(dri2_img->dri_image, + __DRI_IMAGE_ATTRIB_MODIFIER_LOWER, + &mod_lo); + if (query) + modifier = combine_u32_into_u64 (mod_hi, mod_lo); + + for (int i = 0; i < num_planes; i++) + modifiers[i] = modifier; + } return EGL_TRUE; } diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index aa143deb867..2d47b3a195a 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -69,6 +69,10 @@ struct zwp_linux_dmabuf_v1; #include #endif /* HAVE_ANDROID_PLATFORM */ +#ifdef HAVE_GRALLOC1 +#include +#endif + #include "eglconfig.h" #include "eglcontext.h" #include "egldevice.h" @@ -239,6 +243,13 @@ struct dri2_egl_display #ifdef HAVE_ANDROID_PLATFORM const gralloc_module_t *gralloc; +#ifdef HAVE_GRALLOC1 + uint16_t gralloc_version; + gralloc1_device_t *gralloc1_dvc; + GRALLOC1_PFN_LOCK_FLEX pfn_lockflex; + GRALLOC1_PFN_GET_FORMAT pfn_getFormat; + GRALLOC1_PFN_UNLOCK pfn_unlock; +#endif #endif bool is_render_node; @@ -402,10 +413,20 @@ dri2_surface_get_dri_drawable(_EGLSurface *surf); __DRIimage * dri2_lookup_egl_image(__DRIscreen *screen, void *image, void *data); +void +dri2_get_shifts_and_sizes(const __DRIcoreExtension *core, + const __DRIconfig *config, int *shifts, + unsigned int *sizes); + +void +dri2_get_render_type_float(const __DRIcoreExtension *core, + const __DRIconfig *config, + bool *is_float); + struct dri2_egl_config * dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, EGLint surface_type, const EGLint *attr_list, - const unsigned int *rgba_masks); + const int *rgba_shifts, const unsigned int *rgba_sizes); _EGLImage * dri2_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp, @@ -540,6 +561,11 @@ dri2_init_surface(_EGLSurface *surf, _EGLDisplay *disp, EGLint type, void dri2_fini_surface(_EGLSurface *surf); +EGLBoolean +dri2_create_drawable(struct dri2_egl_display *dri2_dpy, + const __DRIconfig *config, + struct dri2_egl_surface *dri2_surf); + static inline uint64_t combine_u32_into_u64(uint32_t hi, uint32_t lo) { diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c index e9ea9e6002b..6e2e0fd3125 100644 --- a/src/egl/drivers/dri2/platform_android.c +++ b/src/egl/drivers/dri2/platform_android.c @@ -64,14 +64,26 @@ struct droid_yuv_format { int fourcc; /* __DRI_IMAGE_FOURCC_ */ }; +/* This enumeration can be deleted if Android defined it in + * system/core/include/system/graphics.h + */ +enum { + HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL = 0x100, + HAL_PIXEL_FORMAT_NV12 = 0x10F, + HAL_PIXEL_FORMAT_P010_INTEL = 0x110 +}; + /* The following table is used to look up a DRI image FourCC based * on native format and information contained in android_ycbcr struct. */ static const struct droid_yuv_format droid_yuv_formats[] = { /* Native format, YCrCb, Chroma step, DRI image FourCC */ { HAL_PIXEL_FORMAT_YCbCr_420_888, YCbCr, 2, __DRI_IMAGE_FOURCC_NV12 }, + { HAL_PIXEL_FORMAT_P010_INTEL, YCbCr, 4, __DRI_IMAGE_FOURCC_P010 }, { HAL_PIXEL_FORMAT_YCbCr_420_888, YCbCr, 1, __DRI_IMAGE_FOURCC_YUV420 }, { HAL_PIXEL_FORMAT_YCbCr_420_888, YCrCb, 1, __DRI_IMAGE_FOURCC_YVU420 }, { HAL_PIXEL_FORMAT_YV12, YCrCb, 1, __DRI_IMAGE_FOURCC_YVU420 }, + { HAL_PIXEL_FORMAT_NV12, YCbCr, 2, __DRI_IMAGE_FOURCC_NV12 }, + { HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL, YCbCr, 2, __DRI_IMAGE_FOURCC_NV12 }, /* HACK: See droid_create_image_from_prime_fd() and * https://issuetracker.google.com/32077885. */ { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED, YCbCr, 2, __DRI_IMAGE_FOURCC_NV12 }, @@ -109,6 +121,9 @@ get_format_bpp(int native) int bpp; switch (native) { + case HAL_PIXEL_FORMAT_RGBA_FP16: + bpp = 8; + break; case HAL_PIXEL_FORMAT_RGBA_8888: case HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED: /* @@ -117,6 +132,7 @@ get_format_bpp(int native) */ case HAL_PIXEL_FORMAT_RGBX_8888: case HAL_PIXEL_FORMAT_BGRA_8888: + case HAL_PIXEL_FORMAT_RGBA_1010102: bpp = 4; break; case HAL_PIXEL_FORMAT_RGB_565: @@ -143,6 +159,8 @@ static int get_fourcc(int native) * TODO: Remove this once https://issuetracker.google.com/32077885 is fixed. */ case HAL_PIXEL_FORMAT_RGBX_8888: return __DRI_IMAGE_FOURCC_XBGR8888; + case HAL_PIXEL_FORMAT_RGBA_FP16: return __DRI_IMAGE_FOURCC_ABGR16161616F; + case HAL_PIXEL_FORMAT_RGBA_1010102: return __DRI_IMAGE_FOURCC_ABGR2101010; default: _eglLog(_EGL_WARNING, "unsupported native buffer format 0x%x", native); } @@ -161,6 +179,8 @@ static int get_format(int format) * TODO: Revert this once https://issuetracker.google.com/32077885 is fixed. */ case HAL_PIXEL_FORMAT_RGBX_8888: return __DRI_IMAGE_FORMAT_XBGR8888; + case HAL_PIXEL_FORMAT_RGBA_FP16: return __DRI_IMAGE_FORMAT_ABGR16161616F; + case HAL_PIXEL_FORMAT_RGBA_1010102: return __DRI_IMAGE_FORMAT_ABGR2101010; default: _eglLog(_EGL_WARNING, "unsupported native buffer format 0x%x", format); } @@ -255,6 +275,47 @@ droid_window_dequeue_buffer(struct dri2_egl_surface *dri2_surf) return EGL_TRUE; } +static int +droid_resolve_format(struct dri2_egl_display *dri2_dpy, + struct ANativeWindowBuffer *buf) +{ + int format = -1; + int ret; + + if (buf->format != HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED) + return buf->format; +#ifdef HAVE_GRALLOC1 + if(dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) { + if (!dri2_dpy->pfn_getFormat) { + _eglLog(_EGL_WARNING, "Gralloc does not support getFormat"); + return -1; + } + ret = dri2_dpy->pfn_getFormat(dri2_dpy->gralloc1_dvc, buf->handle, + &format); + if (ret) { + _eglLog(_EGL_WARNING, "gralloc->getFormat failed: %d", ret); + return -1; + } + } else { +#else + if (!dri2_dpy->gralloc->perform) { + _eglLog(_EGL_WARNING, "gralloc->perform not supported"); + return -1; + } + ret = dri2_dpy->gralloc->perform(dri2_dpy->gralloc, + GRALLOC_DRM_GET_FORMAT, + buf->handle, &format); + if (ret){ + _eglLog(_EGL_WARNING, "gralloc->perform failed with error: %d", ret); + return -1; + } +#endif +#ifdef HAVE_GRALLOC1 + } +#endif + return format; +} + static EGLBoolean droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf) { @@ -341,7 +402,6 @@ droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, _EGLConfig *conf, void *native_window, const EGLint *attrib_list) { - __DRIcreateNewDrawableFunc createNewDrawable; struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); struct dri2_egl_config *dri2_conf = dri2_egl_config(conf); struct dri2_egl_surface *dri2_surf; @@ -385,17 +445,8 @@ droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, goto cleanup_surface; } - if (dri2_dpy->image_driver) - createNewDrawable = dri2_dpy->image_driver->createNewDrawable; - else - createNewDrawable = dri2_dpy->dri2->createNewDrawable; - - dri2_surf->dri_drawable = (*createNewDrawable)(dri2_dpy->dri_screen, config, - dri2_surf); - if (dri2_surf->dri_drawable == NULL) { - _eglError(EGL_BAD_ALLOC, "createNewDrawable"); + if (!dri2_create_drawable(dri2_dpy, config, dri2_surf)) goto cleanup_surface; - } if (window) { window->common.incRef(&window->common); @@ -469,7 +520,7 @@ droid_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf); struct ANativeWindow *window = dri2_surf->window; - if (window->setSwapInterval(window, interval)) + if (window && window->setSwapInterval(window, interval)) return EGL_FALSE; surf->SwapInterval = interval; @@ -670,11 +721,18 @@ droid_query_buffer_age(_EGLDriver *drv, { struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surface); + /* To avoid blocking other EGL calls, release the display mutex before + * we enter droid_window_dequeue_buffer() and re-acquire the mutex upon + * return. + */ + mtx_unlock(&disp->Mutex); if (update_buffers(dri2_surf) < 0) { _eglError(EGL_BAD_ALLOC, "droid_query_buffer_age"); + mtx_lock(&disp->Mutex); return -1; } + mtx_lock(&disp->Mutex); return dri2_surf->back ? dri2_surf->back->age : 0; } @@ -737,6 +795,31 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw) return EGL_TRUE; } +static int get_ycbcr_from_flexlayout(struct android_flex_layout *outFlexLayout, struct android_ycbcr *ycbcr) +{ + + for( int i = 0; i < outFlexLayout->num_planes; i++) { + switch(outFlexLayout->planes[i].component){ + case FLEX_COMPONENT_Y: + ycbcr->y = outFlexLayout->planes[i].top_left; + ycbcr->ystride = outFlexLayout->planes[i].v_increment; + break; + case FLEX_COMPONENT_Cb: + ycbcr->cb = outFlexLayout->planes[i].top_left; + ycbcr->cstride = outFlexLayout->planes[i].v_increment; + break; + case FLEX_COMPONENT_Cr: + ycbcr->cr = outFlexLayout->planes[i].top_left; + ycbcr->chroma_step = outFlexLayout->planes[i].h_increment; + break; + default: + _eglLog(_EGL_WARNING,"unknown component 0x%x", __func__, outFlexLayout->planes[i].component); + break; + } + } + return 0; +} + #if ANDROID_API_LEVEL >= 23 static EGLBoolean droid_set_damage_region(_EGLDriver *drv, @@ -786,12 +869,45 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx, int fourcc; int ret; - if (!dri2_dpy->gralloc->lock_ycbcr) { - _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr"); + int format = droid_resolve_format(dri2_dpy, buf); + if (format < 0) { + _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); return NULL; } +#ifdef HAVE_GRALLOC1 + struct android_flex_layout outFlexLayout; + gralloc1_rect_t accessRegion; + memset(&ycbcr, 0, sizeof(ycbcr)); + + if (dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) { + if (!dri2_dpy->pfn_lockflex) { + _eglLog(_EGL_WARNING, "Gralloc does not support lockflex"); + return NULL; + } + ret = dri2_dpy->pfn_lockflex(dri2_dpy->gralloc1_dvc, buf->handle, + 0, 0, &accessRegion, &outFlexLayout, -1); + if (ret) { + _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret); + return NULL; + } + ret = get_ycbcr_from_flexlayout(&outFlexLayout, &ycbcr); + if (ret) { + _eglLog(_EGL_WARNING, "gralloc->lockflex failed: %d", ret); + return NULL; + } + int outReleaseFence = 0; + dri2_dpy->pfn_unlock(dri2_dpy->gralloc1_dvc, buf->handle, &outReleaseFence); + } else { +#endif + if (!dri2_dpy->gralloc->lock_ycbcr) { + _eglLog(_EGL_WARNING, "Gralloc does not support lock_ycbcr"); + return NULL; + } + + memset(&ycbcr, 0, sizeof(ycbcr)); + ret = dri2_dpy->gralloc->lock_ycbcr(dri2_dpy->gralloc, buf->handle, 0, 0, 0, 0, 0, &ycbcr); if (ret) { @@ -804,6 +920,9 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx, return NULL; } dri2_dpy->gralloc->unlock(dri2_dpy->gralloc, buf->handle); +#ifdef HAVE_GRALLOC1 + } +#endif /* When lock_ycbcr's usage argument contains no SW_READ/WRITE flags * it will return the .y/.cb/.cr pointers based on a NULL pointer, @@ -829,14 +948,15 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx, /* .chroma_step is the byte distance between the same chroma channel * values of subsequent pixels, assumed to be the same for Cb and Cr. */ - fourcc = get_fourcc_yuv(buf->format, chroma_order, ycbcr.chroma_step); + fourcc = get_fourcc_yuv(format, chroma_order, ycbcr.chroma_step); if (fourcc == -1) { _eglLog(_EGL_WARNING, "unsupported YUV format, native = %x, chroma_order = %s, chroma_step = %d", - buf->format, chroma_order == YCbCr ? "YCbCr" : "YCrCb", ycbcr.chroma_step); + format, chroma_order == YCbCr ? "YCbCr" : "YCrCb", ycbcr.chroma_step); return NULL; } - if (ycbcr.chroma_step == 2) { + /* FIXME? we should not rely on chroma_step */ + if (ycbcr.chroma_step == 2 || ycbcr.chroma_step == 4) { /* Semi-planar Y + CbCr or Y + CrCb format. */ const EGLint attr_list_2plane[] = { EGL_WIDTH, buf->width, @@ -878,9 +998,16 @@ static _EGLImage * droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx, struct ANativeWindowBuffer *buf, int fd) { + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); unsigned int pitch; - if (is_yuv(buf->format)) { + int format = droid_resolve_format(dri2_dpy, buf); + if (format < 0) { + _eglLog(_EGL_WARNING, "Could not resolve buffer format"); + return NULL; + } + + if (is_yuv(format)) { _EGLImage *image; image = droid_create_image_from_prime_fd_yuv(disp, ctx, buf, fd); @@ -895,13 +1022,13 @@ droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx, return image; } - const int fourcc = get_fourcc(buf->format); + const int fourcc = get_fourcc(format); if (fourcc == -1) { _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); return NULL; } - pitch = buf->stride * get_format_bpp(buf->format); + pitch = buf->stride * get_format_bpp(format); if (pitch == 0) { _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR"); return NULL; @@ -1143,12 +1270,13 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp) struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); static const struct { int format; - unsigned int rgba_masks[4]; + int rgba_shifts[4]; + unsigned int rgba_sizes[4]; } visuals[] = { - { HAL_PIXEL_FORMAT_RGBA_8888, { 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 } }, - { HAL_PIXEL_FORMAT_RGBX_8888, { 0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000 } }, - { HAL_PIXEL_FORMAT_RGB_565, { 0x0000f800, 0x000007e0, 0x0000001f, 0x00000000 } }, - { HAL_PIXEL_FORMAT_BGRA_8888, { 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 } }, + { HAL_PIXEL_FORMAT_RGBA_8888, { 0, 8, 16, 24 }, { 8, 8, 8, 8 } }, + { HAL_PIXEL_FORMAT_RGBX_8888, { 0, 8, 16, -1 }, { 8, 8, 8, 0 } }, + { HAL_PIXEL_FORMAT_RGB_565, { 11, 5, 0, -1 }, { 5, 6, 5, 0 } }, + { HAL_PIXEL_FORMAT_BGRA_8888, { 16, 8, 0, 24 }, { 8, 8, 8, 8 } }, }; unsigned int format_count[ARRAY_SIZE(visuals)] = { 0 }; @@ -1187,7 +1315,7 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp) struct dri2_egl_config *dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[j], config_count + 1, surface_type, config_attrs, - visuals[i].rgba_masks); + visuals[i].rgba_shifts, visuals[i].rgba_sizes); if (dri2_conf) { if (dri2_conf->base.ConfigID == config_count + 1) config_count++; @@ -1567,6 +1695,22 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp) err = "DRI2: failed to get gralloc module"; goto cleanup; } +#ifdef HAVE_GRALLOC1 + hw_device_t *device; + dri2_dpy->gralloc_version = dri2_dpy->gralloc->common.module_api_version; + if (dri2_dpy->gralloc_version == HARDWARE_MODULE_API_VERSION(1, 0)) { + ret = dri2_dpy->gralloc->common.methods->open(&dri2_dpy->gralloc->common, GRALLOC_HARDWARE_MODULE_ID, &device); + if (ret) { + err = "Failed to open hw_device device"; + goto cleanup; + } else { + dri2_dpy->gralloc1_dvc = (gralloc1_device_t *)device; + dri2_dpy->pfn_lockflex = (GRALLOC1_PFN_LOCK_FLEX)dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_LOCK_FLEX); + dri2_dpy->pfn_getFormat = (GRALLOC1_PFN_GET_FORMAT)dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_GET_FORMAT); + dri2_dpy->pfn_unlock = (GRALLOC1_PFN_UNLOCK)dri2_dpy->gralloc1_dvc->getFunction(dri2_dpy->gralloc1_dvc, GRALLOC1_FUNCTION_UNLOCK); + } + } +#endif disp->DriverData = (void *) dri2_dpy; if (!disp->Options.ForceSoftware) diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c index c1ab1c9b0f6..1da521ada3b 100644 --- a/src/egl/drivers/dri2/platform_drm.c +++ b/src/egl/drivers/dri2/platform_drm.c @@ -96,7 +96,9 @@ dri2_drm_config_is_compatible(struct dri2_egl_display *dri2_dpy, struct gbm_surface *surface) { const struct gbm_dri_visual *visual = NULL; - unsigned int red, green, blue, alpha; + int shifts[4]; + unsigned int sizes[4]; + bool is_float; int i; /* Check that the EGLConfig being used to render to the surface is @@ -104,10 +106,9 @@ dri2_drm_config_is_compatible(struct dri2_egl_display *dri2_dpy, * otherwise-compatible formats is relatively common, explicitly allow * this. */ - dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_RED_MASK, &red); - dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_GREEN_MASK, &green); - dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_BLUE_MASK, &blue); - dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_ALPHA_MASK, &alpha); + dri2_get_shifts_and_sizes(dri2_dpy->core, config, shifts, sizes); + + dri2_get_render_type_float(dri2_dpy->core, config, &is_float); for (i = 0; i < dri2_dpy->gbm_dri->num_visuals; i++) { visual = &dri2_dpy->gbm_dri->visual_table[i]; @@ -118,10 +119,15 @@ dri2_drm_config_is_compatible(struct dri2_egl_display *dri2_dpy, if (i == dri2_dpy->gbm_dri->num_visuals) return false; - if (red != visual->rgba_masks.red || - green != visual->rgba_masks.green || - blue != visual->rgba_masks.blue || - (alpha && visual->rgba_masks.alpha && alpha != visual->rgba_masks.alpha)) { + if (shifts[0] != visual->rgba_shifts.red || + shifts[1] != visual->rgba_shifts.green || + shifts[2] != visual->rgba_shifts.blue || + (shifts[3] > -1 && shifts[3] != visual->rgba_shifts.alpha) || + sizes[0] != visual->rgba_sizes.red || + sizes[1] != visual->rgba_sizes.green || + sizes[2] != visual->rgba_sizes.blue || + (sizes[3] > 0 && sizes[3] != visual->rgba_sizes.alpha) || + is_float != visual->is_float) { return false; } @@ -171,23 +177,8 @@ dri2_drm_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, dri2_surf->base.Height = surf->base.height; surf->dri_private = dri2_surf; - if (dri2_dpy->dri2) { - dri2_surf->dri_drawable = - dri2_dpy->dri2->createNewDrawable(dri2_dpy->dri_screen, config, - dri2_surf->gbm_surf); - - } else { - assert(dri2_dpy->swrast != NULL); - - dri2_surf->dri_drawable = - dri2_dpy->swrast->createNewDrawable(dri2_dpy->dri_screen, config, - dri2_surf->gbm_surf); - - } - if (dri2_surf->dri_drawable == NULL) { - _eglError(EGL_BAD_ALLOC, "createNewDrawable()"); + if (!dri2_create_drawable(dri2_dpy, config, dri2_surf)) goto cleanup_surf; - } return &dri2_surf->base; @@ -627,24 +618,27 @@ drm_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp) memset(format_count, 0, num_visuals * sizeof(unsigned int)); for (unsigned i = 0; dri2_dpy->driver_configs[i]; i++) { - unsigned int red, green, blue, alpha; + const __DRIconfig *config = dri2_dpy->driver_configs[i]; + int shifts[4]; + unsigned int sizes[4]; + bool is_float; + + dri2_get_shifts_and_sizes(dri2_dpy->core, config, shifts, sizes); - dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i], - __DRI_ATTRIB_RED_MASK, &red); - dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i], - __DRI_ATTRIB_GREEN_MASK, &green); - dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i], - __DRI_ATTRIB_BLUE_MASK, &blue); - dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i], - __DRI_ATTRIB_ALPHA_MASK, &alpha); + dri2_get_render_type_float(dri2_dpy->core, config, &is_float); for (unsigned j = 0; j < num_visuals; j++) { struct dri2_egl_config *dri2_conf; - if (visuals[j].rgba_masks.red != red || - visuals[j].rgba_masks.green != green || - visuals[j].rgba_masks.blue != blue || - visuals[j].rgba_masks.alpha != alpha) + if (visuals[j].rgba_shifts.red != shifts[0] || + visuals[j].rgba_shifts.green != shifts[1] || + visuals[j].rgba_shifts.blue != shifts[2] || + visuals[j].rgba_shifts.alpha != shifts[3] || + visuals[j].rgba_sizes.red != sizes[0] || + visuals[j].rgba_sizes.green != sizes[1] || + visuals[j].rgba_sizes.blue != sizes[2] || + visuals[j].rgba_sizes.alpha != sizes[3] || + visuals[j].is_float != is_float) continue; const EGLint attr_list[] = { @@ -653,7 +647,7 @@ drm_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp) }; dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i], - config_count + 1, EGL_WINDOW_BIT, attr_list, NULL); + config_count + 1, EGL_WINDOW_BIT, attr_list, NULL, NULL); if (dri2_conf) { if (dri2_conf->base.ConfigID == config_count + 1) config_count++; @@ -730,6 +724,7 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp) goto cleanup; } } + dri2_dpy->gbm_dri = gbm_dri_device(gbm); if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) { err = "DRI2: gbm device using incorrect/incompatible backend"; @@ -744,7 +739,6 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp) disp->Device = dev; - dri2_dpy->gbm_dri = gbm_dri_device(gbm); dri2_dpy->driver_name = strdup(dri2_dpy->gbm_dri->driver_name); dri2_dpy->dri_screen = dri2_dpy->gbm_dri->screen; diff --git a/src/egl/drivers/dri2/platform_surfaceless.c b/src/egl/drivers/dri2/platform_surfaceless.c index fefb2b449d0..4d594e7e336 100644 --- a/src/egl/drivers/dri2/platform_surfaceless.c +++ b/src/egl/drivers/dri2/platform_surfaceless.c @@ -135,13 +135,8 @@ dri2_surfaceless_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, goto cleanup_surface; } - dri2_surf->dri_drawable = - dri2_dpy->image_driver->createNewDrawable(dri2_dpy->dri_screen, config, - dri2_surf); - if (dri2_surf->dri_drawable == NULL) { - _eglError(EGL_BAD_ALLOC, "image->createNewDrawable"); + if (!dri2_create_drawable(dri2_dpy, config, dri2_surf)) goto cleanup_surface; - } if (conf->RedSize == 5) dri2_surf->visual = __DRI_IMAGE_FORMAT_RGB565; @@ -186,11 +181,12 @@ surfaceless_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp) struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); static const struct { const char *format_name; - unsigned int rgba_masks[4]; + int rgba_shifts[4]; + unsigned int rgba_sizes[4]; } visuals[] = { - { "ARGB8888", { 0xff0000, 0xff00, 0xff, 0xff000000 } }, - { "RGB888", { 0xff0000, 0xff00, 0xff, 0x0 } }, - { "RGB565", { 0x00f800, 0x07e0, 0x1f, 0x0 } }, + { "ARGB8888", { 16, 8, 0, 24 }, { 8, 8, 8, 8 } }, + { "RGB888", { 16, 8, 0, -1 }, { 8, 8, 8, 0 } }, + { "RGB565", { 11, 5, 0, -1 }, { 5, 6, 5, 0 } }, }; unsigned int format_count[ARRAY_SIZE(visuals)] = { 0 }; unsigned int config_count = 0; @@ -201,7 +197,7 @@ surfaceless_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp) dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i], config_count + 1, EGL_PBUFFER_BIT, NULL, - visuals[j].rgba_masks); + visuals[j].rgba_shifts, visuals[j].rgba_sizes); if (dri2_conf) { if (dri2_conf->base.ConfigID == config_count + 1) diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index 2a2c8214169..46e637444be 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -68,49 +68,57 @@ static const struct dri2_wl_visual { */ int alt_dri_image_format; int bpp; - unsigned int rgba_masks[4]; + int rgba_shifts[4]; + unsigned int rgba_sizes[4]; } dri2_wl_visuals[] = { { "XRGB2101010", WL_DRM_FORMAT_XRGB2101010, WL_SHM_FORMAT_XRGB2101010, __DRI_IMAGE_FORMAT_XRGB2101010, __DRI_IMAGE_FORMAT_XBGR2101010, 32, - { 0x3ff00000, 0x000ffc00, 0x000003ff, 0x00000000 } + { 20, 10, 0, -1 }, + { 10, 10, 10, 0 }, }, { "ARGB2101010", WL_DRM_FORMAT_ARGB2101010, WL_SHM_FORMAT_ARGB2101010, __DRI_IMAGE_FORMAT_ARGB2101010, __DRI_IMAGE_FORMAT_ABGR2101010, 32, - { 0x3ff00000, 0x000ffc00, 0x000003ff, 0xc0000000 } + { 20, 10, 0, 30 }, + { 10, 10, 10, 2 }, }, { "XBGR2101010", WL_DRM_FORMAT_XBGR2101010, WL_SHM_FORMAT_XBGR2101010, __DRI_IMAGE_FORMAT_XBGR2101010, __DRI_IMAGE_FORMAT_XRGB2101010, 32, - { 0x000003ff, 0x000ffc00, 0x3ff00000, 0x00000000 } + { 0, 10, 20, -1 }, + { 10, 10, 10, 0 }, }, { "ABGR2101010", WL_DRM_FORMAT_ABGR2101010, WL_SHM_FORMAT_ABGR2101010, __DRI_IMAGE_FORMAT_ABGR2101010, __DRI_IMAGE_FORMAT_ARGB2101010, 32, - { 0x000003ff, 0x000ffc00, 0x3ff00000, 0xc0000000 } + { 0, 10, 20, 30 }, + { 10, 10, 10, 2 }, }, { "XRGB8888", WL_DRM_FORMAT_XRGB8888, WL_SHM_FORMAT_XRGB8888, __DRI_IMAGE_FORMAT_XRGB8888, __DRI_IMAGE_FORMAT_NONE, 32, - { 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000 } + { 16, 8, 0, -1 }, + { 8, 8, 8, 0 }, }, { "ARGB8888", WL_DRM_FORMAT_ARGB8888, WL_SHM_FORMAT_ARGB8888, __DRI_IMAGE_FORMAT_ARGB8888, __DRI_IMAGE_FORMAT_NONE, 32, - { 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 } + { 16, 8, 0, 24 }, + { 8, 8, 8, 8 }, }, { "RGB565", WL_DRM_FORMAT_RGB565, WL_SHM_FORMAT_RGB565, __DRI_IMAGE_FORMAT_RGB565, __DRI_IMAGE_FORMAT_NONE, 16, - { 0xf800, 0x07e0, 0x001f, 0x0000 } + { 11, 5, 0, -1 }, + { 5, 6, 5, 0 }, }, }; @@ -118,20 +126,22 @@ static int dri2_wl_visual_idx_from_config(struct dri2_egl_display *dri2_dpy, const __DRIconfig *config) { - unsigned int red, green, blue, alpha; + int shifts[4]; + unsigned int sizes[4]; - dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_RED_MASK, &red); - dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_GREEN_MASK, &green); - dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_BLUE_MASK, &blue); - dri2_dpy->core->getConfigAttrib(config, __DRI_ATTRIB_ALPHA_MASK, &alpha); + dri2_get_shifts_and_sizes(dri2_dpy->core, config, shifts, sizes); for (unsigned int i = 0; i < ARRAY_SIZE(dri2_wl_visuals); i++) { const struct dri2_wl_visual *wl_visual = &dri2_wl_visuals[i]; - if (red == wl_visual->rgba_masks[0] && - green == wl_visual->rgba_masks[1] && - blue == wl_visual->rgba_masks[2] && - alpha == wl_visual->rgba_masks[3]) { + if (shifts[0] == wl_visual->rgba_shifts[0] && + shifts[1] == wl_visual->rgba_shifts[1] && + shifts[2] == wl_visual->rgba_shifts[2] && + shifts[3] == wl_visual->rgba_shifts[3] && + sizes[0] == wl_visual->rgba_sizes[0] && + sizes[1] == wl_visual->rgba_sizes[1] && + sizes[2] == wl_visual->rgba_sizes[2] && + sizes[3] == wl_visual->rgba_sizes[3]) { return i; } } @@ -272,7 +282,6 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf, void *native_window, const EGLint *attrib_list) { - __DRIcreateNewDrawableFunc createNewDrawable; struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); struct dri2_egl_config *dri2_conf = dri2_egl_config(conf); struct wl_egl_window *window = native_window; @@ -349,19 +358,8 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp, if (dri2_dpy->flush) dri2_surf->wl_win->resize_callback = resize_callback; - if (dri2_dpy->image_driver) - createNewDrawable = dri2_dpy->image_driver->createNewDrawable; - else if (dri2_dpy->dri2) - createNewDrawable = dri2_dpy->dri2->createNewDrawable; - else - createNewDrawable = dri2_dpy->swrast->createNewDrawable; - - dri2_surf->dri_drawable = (*createNewDrawable)(dri2_dpy->dri_screen, config, - dri2_surf); - if (dri2_surf->dri_drawable == NULL) { - _eglError(EGL_BAD_ALLOC, "createNewDrawable"); + if (!dri2_create_drawable(dri2_dpy, config, dri2_surf)) goto cleanup_surf_wrapper; - } dri2_surf->base.SwapInterval = dri2_dpy->default_swap_interval; @@ -675,6 +673,15 @@ update_buffers(struct dri2_egl_surface *dri2_surf) return 0; } +static int +update_buffers_if_needed(struct dri2_egl_surface *dri2_surf) +{ + if (dri2_surf->back != NULL) + return 0; + + return update_buffers(dri2_surf); +} + static __DRIbuffer * dri2_wl_get_buffers_with_format(__DRIdrawable * driDrawable, int *width, int *height, @@ -992,7 +999,7 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv, /* Make sure we have a back buffer in case we're swapping without ever * rendering. */ - if (get_back_bo(dri2_surf) < 0) + if (update_buffers_if_needed(dri2_surf) < 0) return _eglError(EGL_BAD_ALLOC, "dri2_swap_buffers"); if (draw->SwapInterval > 0) { @@ -1078,7 +1085,7 @@ dri2_wl_query_buffer_age(_EGLDriver *drv, { struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surface); - if (get_back_bo(dri2_surf) < 0) { + if (update_buffers_if_needed(dri2_surf) < 0) { _eglError(EGL_BAD_ALLOC, "dri2_query_buffer_age"); return -1; } @@ -1353,7 +1360,7 @@ dri2_wl_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp) continue; dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i], - count + 1, EGL_WINDOW_BIT, NULL, dri2_wl_visuals[j].rgba_masks); + count + 1, EGL_WINDOW_BIT, NULL, dri2_wl_visuals[j].rgba_shifts, dri2_wl_visuals[j].rgba_sizes); if (dri2_conf) { if (dri2_conf->base.ConfigID == count + 1) count++; @@ -1386,7 +1393,8 @@ dri2_wl_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp) */ dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i], count + 1, EGL_WINDOW_BIT, NULL, - dri2_wl_visuals[c].rgba_masks); + dri2_wl_visuals[c].rgba_shifts, + dri2_wl_visuals[c].rgba_sizes); if (dri2_conf) { if (dri2_conf->base.ConfigID == count + 1) count++; diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c index c8c676d2f00..d88e55dbf23 100644 --- a/src/egl/drivers/dri2/platform_x11.c +++ b/src/egl/drivers/dri2/platform_x11.c @@ -42,6 +42,7 @@ #include #include "util/debug.h" #include "util/macros.h" +#include "util/bitscan.h" #include "egl_dri2.h" #include "egl_dri2_fallbacks.h" @@ -261,7 +262,7 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, (void) drv; - dri2_surf = malloc(sizeof *dri2_surf); + dri2_surf = calloc(1, sizeof *dri2_surf); if (!dri2_surf) { _eglError(EGL_BAD_ALLOC, "dri2_create_surface"); return NULL; @@ -289,21 +290,8 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type, goto cleanup_pixmap; } - if (dri2_dpy->dri2) { - dri2_surf->dri_drawable = - dri2_dpy->dri2->createNewDrawable(dri2_dpy->dri_screen, config, - dri2_surf); - } else { - assert(dri2_dpy->swrast); - dri2_surf->dri_drawable = - dri2_dpy->swrast->createNewDrawable(dri2_dpy->dri_screen, config, - dri2_surf); - } - - if (dri2_surf->dri_drawable == NULL) { - _eglError(EGL_BAD_ALLOC, "dri2->createNewDrawable"); + if (!dri2_create_drawable(dri2_dpy, config, dri2_surf)) goto cleanup_pixmap; - } if (type != EGL_PBUFFER_BIT) { cookie = xcb_get_geometry (dri2_dpy->conn, dri2_surf->drawable); @@ -778,9 +766,49 @@ dri2_x11_config_match_attrib(struct dri2_egl_display *dri2_dpy, return config_val == value; } +/** + * See if the X server can export a pixmap with the given color depth. + * + * Glamor in xorg-server 1.20 can't export pixmaps which have a different + * color depth than the root window as a DRI image. This makes it impossible + * to expose pbuffer-only visuals with, say, 16bpp on a 24bpp X display. + */ +static bool +x11_can_export_pixmap_with_bpp(struct dri2_egl_display *dri2_dpy, int bpp) +{ + bool supported = false; + +#ifdef HAVE_DRI3 + xcb_dri3_buffer_from_pixmap_cookie_t cookie; + xcb_dri3_buffer_from_pixmap_reply_t *reply; + + xcb_pixmap_t pixmap = xcb_generate_id(dri2_dpy->conn); + xcb_create_pixmap(dri2_dpy->conn, bpp, pixmap, dri2_dpy->screen->root, 1, 1); + cookie = xcb_dri3_buffer_from_pixmap(dri2_dpy->conn, pixmap); + reply = xcb_dri3_buffer_from_pixmap_reply(dri2_dpy->conn, cookie, NULL); + + if (reply) { + int *fds = xcb_dri3_buffer_from_pixmap_reply_fds(dri2_dpy->conn, reply); + + for (int i = 0; i < reply->nfd; i++) { + close(fds[i]); + } + + supported = true; + + free(reply); + } + + xcb_free_pixmap(dri2_dpy->conn, pixmap); +#endif + + return supported; +} + static EGLBoolean dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy, - _EGLDisplay *disp, bool supports_preserved) + _EGLDisplay *disp, bool supports_preserved, + bool add_pbuffer_configs) { xcb_depth_iterator_t d; xcb_visualtype_t *visuals; @@ -818,16 +846,23 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy, EGL_NONE }; - unsigned int rgba_masks[4] = { - visuals[i].red_mask, - visuals[i].green_mask, - visuals[i].blue_mask, + int rgba_shifts[4] = { + ffs(visuals[i].red_mask) - 1, + ffs(visuals[i].green_mask) - 1, + ffs(visuals[i].blue_mask) - 1, + -1, + }; + + unsigned int rgba_sizes[4] = { + util_bitcount(visuals[i].red_mask), + util_bitcount(visuals[i].green_mask), + util_bitcount(visuals[i].blue_mask), 0, }; dri2_conf = dri2_add_config(disp, config, config_count + 1, surface_type, config_attrs, - rgba_masks); + rgba_shifts, rgba_sizes); if (dri2_conf) if (dri2_conf->base.ConfigID == config_count + 1) config_count++; @@ -841,11 +876,14 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy, * wants... especially on drivers that only have 32-bit RGBA * EGLConfigs! */ if (d.data->depth == 24 || d.data->depth == 30) { - rgba_masks[3] = - ~(rgba_masks[0] | rgba_masks[1] | rgba_masks[2]); + unsigned int rgba_mask = ~(visuals[i].red_mask | + visuals[i].green_mask | + visuals[i].blue_mask); + rgba_shifts[3] = ffs(rgba_mask) - 1; + rgba_sizes[3] = util_bitcount(rgba_mask); dri2_conf = dri2_add_config(disp, config, config_count + 1, surface_type, config_attrs, - rgba_masks); + rgba_shifts, rgba_sizes); if (dri2_conf) if (dri2_conf->base.ConfigID == config_count + 1) config_count++; @@ -859,35 +897,37 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy, /* Add a 565-no-depth-no-stencil pbuffer-only config. If X11 is depth 24, * we wouldn't have 565 available, which the CTS demands. */ - for (int j = 0; dri2_dpy->driver_configs[j]; j++) { - const __DRIconfig *config = dri2_dpy->driver_configs[j]; - const EGLint config_attrs[] = { - EGL_NATIVE_VISUAL_ID, 0, - EGL_NATIVE_VISUAL_TYPE, EGL_NONE, - EGL_NONE - }; - EGLint surface_type = EGL_PBUFFER_BIT; - unsigned int rgba_masks[4] = { - 0x1f << 11, - 0x3f << 5, - 0x1f << 0, - 0, - }; - - /* Check that we've found single-sample, no depth, no stencil. */ - if (!dri2_x11_config_match_attrib(dri2_dpy, config, - __DRI_ATTRIB_DEPTH_SIZE, 0) || - !dri2_x11_config_match_attrib(dri2_dpy, config, - __DRI_ATTRIB_STENCIL_SIZE, 0) || - !dri2_x11_config_match_attrib(dri2_dpy, config, - __DRI_ATTRIB_SAMPLES, 0)) { - continue; - } - - if (dri2_add_config(disp, config, config_count + 1, surface_type, - config_attrs, rgba_masks)) { - config_count++; - break; + if (add_pbuffer_configs && x11_can_export_pixmap_with_bpp(dri2_dpy, 16)) { + for (int j = 0; dri2_dpy->driver_configs[j]; j++) { + const __DRIconfig *config = dri2_dpy->driver_configs[j]; + const EGLint config_attrs[] = { + EGL_NATIVE_VISUAL_ID, 0, + EGL_NATIVE_VISUAL_TYPE, EGL_NONE, + EGL_NONE + }; + EGLint surface_type = EGL_PBUFFER_BIT; + int rgba_shifts[4] = { 11, 5, 0, -1 }; + unsigned int rgba_sizes[4] = { 5, 6, 5, 0 }; + + /* Check that we've found single-sample, no depth, no stencil, + * and single-buffered. + */ + if (!dri2_x11_config_match_attrib(dri2_dpy, config, + __DRI_ATTRIB_DEPTH_SIZE, 0) || + !dri2_x11_config_match_attrib(dri2_dpy, config, + __DRI_ATTRIB_STENCIL_SIZE, 0) || + !dri2_x11_config_match_attrib(dri2_dpy, config, + __DRI_ATTRIB_SAMPLES, 0) || + !dri2_x11_config_match_attrib(dri2_dpy, config, + __DRI_ATTRIB_DOUBLE_BUFFER, 0)) { + continue; + } + + if (dri2_add_config(disp, config, config_count + 1, surface_type, + config_attrs, rgba_shifts, rgba_sizes)) { + config_count++; + break; + } } } @@ -1355,7 +1395,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp) dri2_setup_screen(disp); - if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true)) + if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true, false)) goto cleanup; /* Fill vtbl last to prevent accidentally calling virtual function during @@ -1453,7 +1493,7 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp) dri2_set_WL_bind_wayland_display(drv, disp); - if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false)) + if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false, true)) goto cleanup; dri2_dpy->loader_dri3_ext.core = dri2_dpy->core; @@ -1563,7 +1603,7 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp) dri2_set_WL_bind_wayland_display(drv, disp); - if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true)) + if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true, false)) goto cleanup; /* Fill vtbl last to prevent accidentally calling virtual function during diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c b/src/egl/drivers/dri2/platform_x11_dri3.c index 189212745ce..04c041cac9b 100644 --- a/src/egl/drivers/dri2/platform_x11_dri3.c +++ b/src/egl/drivers/dri2/platform_x11_dri3.c @@ -402,13 +402,15 @@ dri3_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp, static void dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate) { + struct loader_dri3_drawable *draw = loaderPrivate; + (void) driDrawable; + /* There does not seem to be any kind of consensus on whether we should * support front-buffer rendering or not: * http://lists.freedesktop.org/archives/mesa-dev/2013-June/040129.html */ - _eglLog(_EGL_WARNING, "FIXME: egl/x11 doesn't support front buffer rendering."); - (void) driDrawable; - (void) loaderPrivate; + if (!draw->is_pixmap) + _eglLog(_EGL_WARNING, "FIXME: egl/x11 doesn't support front buffer rendering."); } const __DRIimageLoaderExtension dri3_image_loader_extension = { diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 588c6a5f1eb..a87df71fd39 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -95,6 +95,7 @@ #include "c99_compat.h" #include "c11/threads.h" #include "util/macros.h" +#include "util/android_trace.h" #include "eglapi.h" #include "egldefines.h" @@ -411,6 +412,11 @@ _eglGetPlatformDisplayCommon(EGLenum platform, void *native_display, case EGL_PLATFORM_SURFACELESS_MESA: disp = _eglGetSurfacelessDisplay(native_display, attrib_list); break; +#endif +#ifdef HAVE_ANDROID_PLATFORM + case EGL_PLATFORM_ANDROID_KHR: + disp = _eglGetAndroidDisplay(native_display, attrib_list); + break; #endif default: RETURN_EGL_ERROR(NULL, EGL_BAD_PARAMETER, NULL); @@ -674,6 +680,10 @@ eglTerminate(EGLDisplay dpy) /* do not reset disp->Driver */ disp->ClientAPIsString[0] = 0; disp->Initialized = EGL_FALSE; + + /* Reset blob cache funcs on terminate. */ + disp->BlobCacheSet = NULL; + disp->BlobCacheGet = NULL; } RETURN_EGL_SUCCESS(disp, EGL_TRUE); @@ -1300,6 +1310,7 @@ static EGLBoolean _eglSwapBuffersWithDamageCommon(_EGLDisplay *disp, _EGLSurface *surf, EGLint *rects, EGLint n_rects) { + MTRACE_BEGIN(); _EGLContext *ctx = _eglGetCurrentContext(); _EGLDriver *drv; EGLBoolean ret; @@ -1328,6 +1339,7 @@ _eglSwapBuffersWithDamageCommon(_EGLDisplay *disp, _EGLSurface *surf, surf->BufferAgeRead = EGL_FALSE; } + MTRACE_END(); RETURN_EGL_EVAL(disp, ret); } @@ -1345,10 +1357,13 @@ static EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageKHR(EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects) { + MTRACE_BEGIN(); _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLSurface *surf = _eglLookupSurface(surface, disp); _EGL_FUNC_START(disp, EGL_OBJECT_SURFACE_KHR, surf, EGL_FALSE); - return _eglSwapBuffersWithDamageCommon(disp, surf, rects, n_rects); + EGLBoolean ret = _eglSwapBuffersWithDamageCommon(disp, surf, rects, n_rects); + MTRACE_END(); + return ret; } /** diff --git a/src/egl/main/egldevice.c b/src/egl/main/egldevice.c index c5c9a21273a..ece0f4075da 100644 --- a/src/egl/main/egldevice.c +++ b/src/egl/main/egldevice.c @@ -108,9 +108,9 @@ static int _eglAddDRMDevice(drmDevicePtr device, _EGLDevice **out_dev) { _EGLDevice *dev; + const int wanted_nodes = 1 << DRM_NODE_RENDER | 1 << DRM_NODE_PRIMARY; - if ((device->available_nodes & (1 << DRM_NODE_PRIMARY | - 1 << DRM_NODE_RENDER)) == 0) + if ((device->available_nodes & wanted_nodes) != wanted_nodes) return -1; dev = _eglGlobal.DeviceList; diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c index ba5f84510fe..59e069641a6 100644 --- a/src/egl/main/egldisplay.c +++ b/src/egl/main/egldisplay.c @@ -542,3 +542,19 @@ _eglGetSurfacelessDisplay(void *native_display, return _eglFindDisplay(_EGL_PLATFORM_SURFACELESS, native_display); } #endif /* HAVE_SURFACELESS_PLATFORM */ + +#ifdef HAVE_ANDROID_PLATFORM +_EGLDisplay* +_eglGetAndroidDisplay(void *native_display, + const EGLAttrib *attrib_list) +{ + + /* This platform recognizes no display attributes. */ + if (attrib_list != NULL && attrib_list[0] != EGL_NONE) { + _eglError(EGL_BAD_ATTRIBUTE, "eglGetPlatformDisplay"); + return NULL; + } + + return _eglFindDisplay(_EGL_PLATFORM_ANDROID, native_display); +} +#endif /* HAVE_ANDROID_PLATFORM */ diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index cfd0ff66d64..9f6a1d6f6aa 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -301,6 +301,12 @@ _eglGetSurfacelessDisplay(void *native_display, const EGLAttrib *attrib_list); #endif +#ifdef HAVE_ANDROID_PLATFORM +_EGLDisplay* +_eglGetAndroidDisplay(void *native_display, + const EGLAttrib *attrib_list); +#endif + #ifdef __cplusplus } #endif diff --git a/src/freedreno/Android.drm.mk b/src/freedreno/Android.drm.mk new file mode 100644 index 00000000000..dfa9bed7d2e --- /dev/null +++ b/src/freedreno/Android.drm.mk @@ -0,0 +1,41 @@ +# Mesa 3-D graphics library +# +# Copyright (C) +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Android.mk for libfreedreno_drm.a + +# --------------------------------------- +# Build libfreedreno_drm +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(drm_SOURCES) + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/gallium/include \ + $(MESA_TOP)/src/gallium/auxiliary + +LOCAL_MODULE := libfreedreno_drm + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/freedreno/Android.ir3.mk b/src/freedreno/Android.ir3.mk new file mode 100644 index 00000000000..c6a9d3288d7 --- /dev/null +++ b/src/freedreno/Android.ir3.mk @@ -0,0 +1,51 @@ +# Mesa 3-D graphics library +# +# Copyright (C) +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Android.mk for libfreedreno_ir3.a + +# --------------------------------------- +# Build libfreedreno_ir3 +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(ir3_SOURCES) + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/compiler/nir \ + $(MESA_TOP)/src/gallium/include \ + $(MESA_TOP)/src/gallium/auxiliary \ + $(MESA_TOP)/prebuilt-intermediates/nir \ + +# We need libmesa_nir to get NIR's generated include directories. +LOCAL_STATIC_LIBRARIES := \ + libmesa_nir + +LOCAL_MODULE := libfreedreno_ir3 + +LOCAL_GENERATED_SOURCES := \ + $(MESA_GEN_GLSL_H) \ + $(MESA_GEN_NIR_H) + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/freedreno/Android.mk b/src/freedreno/Android.mk new file mode 100644 index 00000000000..e46e2199dc1 --- /dev/null +++ b/src/freedreno/Android.mk @@ -0,0 +1,30 @@ +# Mesa 3-D graphics library +# +# Copyright (C) +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Android.mk for libfreedreno_* + +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/Makefile.sources +include $(MESA_TOP)/src/gallium/drivers/freedreno/Android.gen.mk +include $(LOCAL_PATH)/Android.drm.mk +include $(LOCAL_PATH)/Android.ir3.mk diff --git a/src/freedreno/Makefile.sources b/src/freedreno/Makefile.sources index aa8edec82f2..a46a2c30913 100644 --- a/src/freedreno/Makefile.sources +++ b/src/freedreno/Makefile.sources @@ -36,6 +36,8 @@ ir3_SOURCES := \ ir3/ir3_nir.c \ ir3/ir3_nir.h \ ir3/ir3_nir_analyze_ubo_ranges.c \ + ir3/ir3_nir_lower_load_barycentric_at_sample.c \ + ir3/ir3_nir_lower_load_barycentric_at_offset.c \ ir3/ir3_nir_lower_io_offsets.c \ ir3/ir3_nir_lower_tg4_to_tex.c \ ir3/ir3_nir_move_varying_inputs.c \ diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 97f4ae96cd9..5c049a5936f 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -48,7 +48,7 @@ void * ir3_alloc(struct ir3 *shader, int sz) struct ir3 * ir3_create(struct ir3_compiler *compiler, gl_shader_stage type, unsigned nin, unsigned nout) { - struct ir3 *shader = rzalloc(compiler, struct ir3); + struct ir3 *shader = rzalloc(NULL, struct ir3); shader->compiler = compiler; shader->type = type; diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index f1d9b53c7c4..353dfdfe252 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1044,6 +1044,7 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr) barrier->cat7.g = true; barrier->cat7.r = true; barrier->cat7.w = true; + barrier->cat7.l = true; barrier->barrier_class = IR3_BARRIER_IMAGE_W | IR3_BARRIER_BUFFER_W; barrier->barrier_conflict = @@ -1304,7 +1305,8 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) idx += nir_src_as_uint(intr->src[1]); for (int i = 0; i < intr->num_components; i++) { unsigned inloc = idx * 4 + i + comp; - if (ctx->so->inputs[idx].bary) { + if (ctx->so->inputs[idx].bary && + !ctx->so->inputs[idx].use_ldlv) { dst[i] = ir3_BARY_F(b, create_immed(b, inloc), 0, coord, 0); } else { /* for non-varyings use the pre-setup input, since @@ -2375,6 +2377,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in) so->inputs[n].compmask = (1 << (ncomp + frac)) - 1; so->inputs_count = MAX2(so->inputs_count, n + 1); so->inputs[n].interpolate = in->data.interpolation; + so->inputs[n].ncomp = ncomp; if (ctx->so->type == MESA_SHADER_FRAGMENT) { @@ -2402,8 +2405,6 @@ setup_input(struct ir3_context *ctx, nir_variable *in) so->inputs[n].bary = true; instr = create_frag_input(ctx, false, idx); } else { - bool use_ldlv = false; - /* detect the special case for front/back colors where * we need to do flat vs smooth shading depending on * rast state: @@ -2424,12 +2425,12 @@ setup_input(struct ir3_context *ctx, nir_variable *in) if (ctx->compiler->flat_bypass) { if ((so->inputs[n].interpolate == INTERP_MODE_FLAT) || (so->inputs[n].rasterflat && ctx->so->key.rasterflat)) - use_ldlv = true; + so->inputs[n].use_ldlv = true; } so->inputs[n].bary = true; - instr = create_frag_input(ctx, use_ldlv, idx); + instr = create_frag_input(ctx, so->inputs[n].use_ldlv, idx); } compile_assert(ctx, idx < ctx->ir->ninputs); diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 312c0644623..46216a6f862 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -32,11 +32,11 @@ get_ubo_load_range(nir_intrinsic_instr *instr) { struct ir3_ubo_range r; - const int bytes = nir_intrinsic_dest_components(instr) * - (nir_dest_bit_size(instr->dest) / 8); + const int offset = nir_src_as_uint(instr->src[1]); + const int bytes = nir_intrinsic_dest_components(instr) * 4; - r.start = ROUND_DOWN_TO(nir_src_as_uint(instr->src[1]), 16 * 4); - r.end = ALIGN(r.start + bytes, 16 * 4); + r.start = ROUND_DOWN_TO(offset, 16 * 4); + r.end = ALIGN(offset + bytes, 16 * 4); return r; } diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 7c1dc38de23..be4e93b9a8f 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -414,6 +414,7 @@ struct ir3_shader_variant { /* fragment shader specific: */ bool bary : 1; /* fetched varying (vs one loaded into reg) */ bool rasterflat : 1; /* special handling for emit->rasterflat */ + bool use_ldlv : 1; /* internal to ir3_compiler_nir */ bool half : 1; enum glsl_interp_mode interpolate; } inputs[16 + 2]; /* +POSITION +FACE */ diff --git a/src/freedreno/vulkan/meson.build b/src/freedreno/vulkan/meson.build index f58aff2cbdb..a25193a8147 100644 --- a/src/freedreno/vulkan/meson.build +++ b/src/freedreno/vulkan/meson.build @@ -90,12 +90,10 @@ libvulkan_freedreno = shared_library( include_directories : [ inc_common, inc_compiler, - inc_vulkan_util, inc_vulkan_wsi, inc_freedreno, ], link_with : [ - libvulkan_util, libvulkan_wsi, libmesa_util, libfreedreno_drm, # required by ir3_shader_get_variant, which we don't use @@ -111,6 +109,7 @@ libvulkan_freedreno = shared_library( dep_valgrind, idep_nir, tu_deps, + idep_vulkan_util, ], c_args : [c_vis_args, no_override_init_args, tu_flags], link_args : [ld_args_bsymbolic, ld_args_gc_sections], diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index aae72c7a843..b2cf3fe7168 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -897,7 +897,7 @@ static const VkQueueFamilyProperties tu_queue_family_properties = { VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, .queueCount = 1, .timestampValidBits = 64, - .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, + .minImageTransferGranularity = { 1, 1, 1 }, }; void diff --git a/src/gallium/Android.common.mk b/src/gallium/Android.common.mk index 782510ff0f4..0d55f04ac94 100644 --- a/src/gallium/Android.common.mk +++ b/src/gallium/Android.common.mk @@ -27,6 +27,9 @@ LOCAL_C_INCLUDES += \ $(GALLIUM_TOP)/include \ $(GALLIUM_TOP)/auxiliary \ $(GALLIUM_TOP)/winsys \ - $(GALLIUM_TOP)/drivers + $(GALLIUM_TOP)/drivers \ + $(MESA_TOP)/src/freedreno \ + $(MESA_TOP)/src/freedreno/ir3 \ + $(MESA_TOP)/src/freedreno/registers include $(MESA_COMMON_MK) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 3fc096789c0..f8c69585e6a 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -950,6 +950,8 @@ draw_set_mapped_so_targets(struct draw_context *draw, { int i; + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + for (i = 0; i < num_targets; i++) draw->so.targets[i] = targets[i]; for (i = num_targets; i < PIPE_MAX_SO_BUFFERS; i++) diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 6420cfbb261..c752163ee05 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -753,8 +753,10 @@ void draw_gs_destroy( struct draw_context *draw ) { int i; if (draw->gs.tgsi.machine) { - for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) + for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { align_free(draw->gs.tgsi.machine->Primitives[i]); + align_free(draw->gs.tgsi.machine->PrimitiveOffsets[i]); + } tgsi_exec_machine_destroy(draw->gs.tgsi.machine); } } diff --git a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h index 9db0dc01117..f043bcf1ab3 100644 --- a/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h +++ b/src/gallium/auxiliary/pipe-loader/driinfo_gallium.h @@ -37,4 +37,5 @@ DRI_CONF_SECTION_MISCELLANEOUS DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER("false") DRI_CONF_GLSL_ZERO_INIT("false") DRI_CONF_ALLOW_RGB10_CONFIGS("true") + DRI_CONF_ALLOW_FP16_CONFIGS("false") DRI_CONF_SECTION_END diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 15edf2f48dc..b6758fc2f9b 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -213,8 +213,10 @@ set_vertex_shader(struct blit_state *ctx) * fragment shader input semantics and vertex_element/buffers. */ if (!ctx->vs) { - const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC }; + const enum tgsi_semantic semantic_names[] = { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC + }; const uint semantic_indexes[] = { 0, 0 }; ctx->vs = util_make_vertex_passthrough_shader(ctx->pipe, 2, semantic_names, diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index e19fde9873d..3dc49cd0958 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -2059,7 +2059,8 @@ void util_blitter_generate_mipmap(struct blitter_context *blitter, target = PIPE_TEXTURE_2D_ARRAY; assert(tex->nr_samples <= 1); - assert(!util_format_has_stencil(desc)); + /* Disallow stencil formats without depth. */ + assert(!util_format_has_stencil(desc) || util_format_has_depth(desc)); is_depth = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS; diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c index 22e6c8ce771..d8380b76bf6 100644 --- a/src/gallium/auxiliary/util/u_debug_symbol.c +++ b/src/gallium/auxiliary/util/u_debug_symbol.c @@ -219,7 +219,7 @@ debug_symbol_name_dbghelp(const void *addr, char* buf, unsigned size) #endif /* PIPE_OS_WINDOWS */ -#if defined(__GLIBC__) && !defined(__UCLIBC__) +#if defined(HAVE_EXECINFO_H) #include @@ -240,7 +240,7 @@ debug_symbol_name_glibc(const void *addr, char* buf, unsigned size) return TRUE; } -#endif /* defined(__GLIBC__) && !defined(__UCLIBC__) */ +#endif /* defined(HAVE_EXECINFO_H) */ void @@ -252,11 +252,11 @@ debug_symbol_name(const void *addr, char* buf, unsigned size) } #endif -#if defined(__GLIBC__) && !defined(__UCLIBC__) +#if defined(HAVE_EXECINFO_H) if (debug_symbol_name_glibc(addr, buf, size)) { return; } -#endif +#endif /* defined(HAVE_EXECINFO_H) */ util_snprintf(buf, size, "%p", addr); buf[size - 1] = 0; diff --git a/src/gallium/auxiliary/util/u_dump_defines.c b/src/gallium/auxiliary/util/u_dump_defines.c index 50dfa37bab7..41108c7248a 100644 --- a/src/gallium/auxiliary/util/u_dump_defines.c +++ b/src/gallium/auxiliary/util/u_dump_defines.c @@ -418,6 +418,7 @@ static const char * util_query_type_short_names[] = { "occlusion_counter", "occlusion_predicate", + "occlusion_predicate_conservative", "timestamp", "timestamp_disjoint", "time_elapsed", @@ -425,6 +426,7 @@ util_query_type_short_names[] = { "primitives_emitted", "so_statistics", "so_overflow_predicate", + "so_overflow_any_predicate", "gpu_finished", "pipeline_statistics", }; diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index 567d3d0c542..21b8c162b30 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -668,11 +668,13 @@ util_copy_image_view(struct pipe_image_view *dst, pipe_resource_reference(&dst->resource, src->resource); dst->format = src->format; dst->access = src->access; + dst->shader_access = src->shader_access; dst->u = src->u; } else { pipe_resource_reference(&dst->resource, NULL); dst->format = PIPE_FORMAT_NONE; dst->access = 0; + dst->shader_access = 0; memset(&dst->u, 0, sizeof(dst->u)); } } diff --git a/src/gallium/auxiliary/util/u_screen.c b/src/gallium/auxiliary/util/u_screen.c index 410f17421e6..3f64e466d7f 100644 --- a/src/gallium/auxiliary/util/u_screen.c +++ b/src/gallium/auxiliary/util/u_screen.c @@ -282,6 +282,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, case PIPE_CAP_TGSI_BALLOT: case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT: case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: + case PIPE_CAP_TGSI_DIV: return 0; case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: @@ -325,7 +326,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, case PIPE_CAP_TGSI_ATOMFADD: case PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS: case PIPE_CAP_IMAGE_LOAD_FORMATTED: - case PIPE_CAP_PREFER_COMPUTE_BLIT_FOR_MULTIMEDIA: + case PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA: return 0; case PIPE_CAP_MAX_GS_INVOCATIONS: @@ -359,7 +360,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, return 1; case PIPE_CAP_DMABUF: -#ifdef PIPE_OS_LINUX +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) return 1; #else return 0; diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 12c58ff7436..a5ab3e06b5e 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -70,7 +70,7 @@ init_shaders(struct vl_compositor *c) return false; } - if (c->pipe_compute_supported) { + if (c->pipe_cs_composit_supported) { c->cs_video_buffer = vl_compositor_cs_create_shader(c, compute_shader_video_buffer); if (!c->cs_video_buffer) { debug_printf("Unable to create video_buffer compute shader.\n"); @@ -125,7 +125,7 @@ static void cleanup_shaders(struct vl_compositor *c) c->pipe->delete_fs_state(c->pipe, c->fs_rgb_yuv.y); c->pipe->delete_fs_state(c->pipe, c->fs_rgb_yuv.uv); - if (c->pipe_compute_supported) { + if (c->pipe_cs_composit_supported) { c->pipe->delete_compute_state(c->pipe, c->cs_video_buffer); c->pipe->delete_compute_state(c->pipe, c->cs_weave_rgb); c->pipe->delete_compute_state(c->pipe, c->cs_rgba); @@ -561,7 +561,7 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state *s, float half_a_line = 0.5f / s->layers[layer].zw.y; switch(deinterlace) { case VL_COMPOSITOR_WEAVE: - if (c->pipe_compute_supported) + if (c->pipe_cs_composit_supported) s->layers[layer].cs = c->cs_weave_rgb; else s->layers[layer].fs = c->fs_weave_rgb; @@ -571,7 +571,7 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state *s, s->layers[layer].zw.x = 0.0f; s->layers[layer].src.tl.y += half_a_line; s->layers[layer].src.br.y += half_a_line; - if (c->pipe_compute_supported) + if (c->pipe_cs_composit_supported) s->layers[layer].cs = c->cs_video_buffer; else s->layers[layer].fs = c->fs_video_buffer; @@ -581,7 +581,7 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state *s, s->layers[layer].zw.x = 1.0f; s->layers[layer].src.tl.y -= half_a_line; s->layers[layer].src.br.y -= half_a_line; - if (c->pipe_compute_supported) + if (c->pipe_cs_composit_supported) s->layers[layer].cs = c->cs_video_buffer; else s->layers[layer].fs = c->fs_video_buffer; @@ -589,7 +589,7 @@ vl_compositor_set_buffer_layer(struct vl_compositor_state *s, } } else { - if (c->pipe_compute_supported) + if (c->pipe_cs_composit_supported) s->layers[layer].cs = c->cs_video_buffer; else s->layers[layer].fs = c->fs_video_buffer; @@ -757,7 +757,10 @@ vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe) memset(c, 0, sizeof(*c)); - c->pipe_compute_supported = pipe->screen->get_param(pipe->screen, PIPE_CAP_COMPUTE); + c->pipe_cs_composit_supported = pipe->screen->get_param(pipe->screen, PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA) && + pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_TEX_TXF_LZ) && + pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_DIV); + c->pipe = pipe; if (!init_pipe_state(c)) { diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index d42618824a7..ae06c153d5b 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -121,7 +121,7 @@ struct vl_compositor void *cs_weave_rgb; void *cs_rgba; - bool pipe_compute_supported; + bool pipe_cs_composit_supported; struct { struct { diff --git a/src/gallium/auxiliary/vl/vl_compositor_cs.c b/src/gallium/auxiliary/vl/vl_compositor_cs.c index 485b4174b8e..abd2aa63f32 100644 --- a/src/gallium/auxiliary/vl/vl_compositor_cs.c +++ b/src/gallium/auxiliary/vl/vl_compositor_cs.c @@ -61,7 +61,7 @@ const char *compute_shader_video_buffer = "IMM[0] UINT32 { 8, 8, 1, 0}\n" "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n" - "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n" + "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n" /* Drawn area check */ "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n" @@ -70,20 +70,20 @@ const char *compute_shader_video_buffer = "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n" "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n" - "UIF TEMP[1]\n" + "UIF TEMP[1].xxxx\n" /* Translate */ - "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n" - "U2F TEMP[2], TEMP[2]\n" - "DIV TEMP[3], TEMP[2], IMM[1].yyyy\n" + "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n" + "U2F TEMP[2].xy, TEMP[2].xyyy\n" + "DIV TEMP[3].xy, TEMP[2].xyyy, IMM[1].yyyy\n" /* Scale */ - "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n" - "DIV TEMP[3], TEMP[3], CONST[3].zwzw\n" + "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwww\n" + "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwww\n" /* Fetch texels */ - "TEX_LZ TEMP[4].x, TEMP[2], SAMP[0], RECT\n" - "TEX_LZ TEMP[4].y, TEMP[3], SAMP[1], RECT\n" - "TEX_LZ TEMP[4].z, TEMP[3], SAMP[2], RECT\n" + "TEX_LZ TEMP[4].x, TEMP[2].xyyy, SAMP[0], RECT\n" + "TEX_LZ TEMP[4].y, TEMP[3].xyyy, SAMP[1], RECT\n" + "TEX_LZ TEMP[4].z, TEMP[3].xyyy, SAMP[2], RECT\n" "MOV TEMP[4].w, IMM[1].xxxx\n" @@ -93,12 +93,12 @@ const char *compute_shader_video_buffer = "DP4 TEMP[7].z, CONST[2], TEMP[4]\n" "MOV TEMP[5].w, TEMP[4].zzzz\n" - "SLE TEMP[6].w, TEMP[5], CONST[3].xxxx\n" - "SGT TEMP[5].w, TEMP[5], CONST[3].yyyy\n" + "SLE TEMP[6].w, TEMP[5].wwww, CONST[3].xxxx\n" + "SGT TEMP[5].w, TEMP[5].wwww, CONST[3].yyyy\n" - "MAX TEMP[7].w, TEMP[5], TEMP[6]\n" + "MAX TEMP[7].w, TEMP[5].wwww, TEMP[6].wwww\n" - "STORE IMAGE[0], TEMP[0], TEMP[7], 2D\n" + "STORE IMAGE[0], TEMP[0].xyyy, TEMP[7], 2D\n" "ENDIF\n" "END\n"; @@ -124,7 +124,7 @@ const char *compute_shader_weave = "IMM[2] UINT32 { 1, 2, 4, 0}\n" "IMM[3] FLT32 { 0.25, 0.5, 0.125, 0.125}\n" - "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n" + "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n" /* Drawn area check */ "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n" @@ -133,22 +133,22 @@ const char *compute_shader_weave = "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n" "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n" - "UIF TEMP[1]\n" - "MOV TEMP[2], TEMP[0]\n" + "UIF TEMP[1].xxxx\n" + "MOV TEMP[2].xy, TEMP[0].xyyy\n" /* Translate */ - "UADD TEMP[2].xy, TEMP[2], -CONST[5].xyxy\n" + "UADD TEMP[2].xy, TEMP[2].xyyy, -CONST[5].xyxy\n" /* Top Y */ - "U2F TEMP[2], TEMP[2]\n" + "U2F TEMP[2].xy, TEMP[2].xyyy\n" "DIV TEMP[2].y, TEMP[2].yyyy, IMM[1].yyyy\n" /* Down Y */ - "MOV TEMP[12], TEMP[2]\n" + "MOV TEMP[12].xy, TEMP[2].xyyy\n" /* Top UV */ - "MOV TEMP[3], TEMP[2]\n" + "MOV TEMP[3].xy, TEMP[2].xyyy\n" "DIV TEMP[3].xy, TEMP[3], IMM[1].yyyy\n" /* Down UV */ - "MOV TEMP[13], TEMP[3]\n" + "MOV TEMP[13].xy, TEMP[3].xyyy\n" /* Texture offset */ "ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy\n" @@ -162,10 +162,10 @@ const char *compute_shader_weave = "ADD TEMP[13].y, TEMP[13].yyyy, IMM[3].wwww\n" /* Scale */ - "DIV TEMP[2].xy, TEMP[2], CONST[3].zwzw\n" - "DIV TEMP[12].xy, TEMP[12], CONST[3].zwzw\n" - "DIV TEMP[3].xy, TEMP[3], CONST[3].zwzw\n" - "DIV TEMP[13].xy, TEMP[13], CONST[3].zwzw\n" + "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n" + "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[3].zwzw\n" + "DIV TEMP[3].xy, TEMP[3].xyyy, CONST[3].zwzw\n" + "DIV TEMP[13].xy, TEMP[13].xyyy, CONST[3].zwzw\n" /* Weave offset */ "ADD TEMP[2].y, TEMP[2].yyyy, IMM[3].xxxx\n" @@ -176,32 +176,32 @@ const char *compute_shader_weave = /* Texture layer */ "MOV TEMP[14].x, TEMP[2].yyyy\n" "MOV TEMP[14].yz, TEMP[3].yyyy\n" - "ROUND TEMP[15], TEMP[14]\n" - "ADD TEMP[14], TEMP[14], -TEMP[15]\n" - "MOV TEMP[14], |TEMP[14]|\n" - "MUL TEMP[14], TEMP[14], IMM[1].yyyy\n" + "ROUND TEMP[15].xyz, TEMP[14].xyzz\n" + "ADD TEMP[14].xyz, TEMP[14].xyzz, -TEMP[15].xyzz\n" + "MOV TEMP[14].xyz, |TEMP[14].xyzz|\n" + "MUL TEMP[14].xyz, TEMP[14].xyzz, IMM[1].yyyy\n" /* Normalize */ - "DIV TEMP[2].xy, TEMP[2], CONST[5].zwzw\n" - "DIV TEMP[12].xy, TEMP[12], CONST[5].zwzw\n" + "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[5].zwzw\n" + "DIV TEMP[12].xy, TEMP[12].xyyy, CONST[5].zwzw\n" "DIV TEMP[15].xy, CONST[5].zwzw, IMM[1].yyyy\n" - "DIV TEMP[3].xy, TEMP[3], TEMP[15].xyxy\n" - "DIV TEMP[13].xy, TEMP[13], TEMP[15].xyxy\n" + "DIV TEMP[3].xy, TEMP[3].xyyy, TEMP[15].xyxy\n" + "DIV TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xyxy\n" /* Fetch texels */ "MOV TEMP[2].z, IMM[1].wwww\n" "MOV TEMP[3].z, IMM[1].wwww\n" - "TEX_LZ TEMP[10].x, TEMP[2], SAMP[0], 2D_ARRAY\n" - "TEX_LZ TEMP[10].y, TEMP[3], SAMP[1], 2D_ARRAY\n" - "TEX_LZ TEMP[10].z, TEMP[3], SAMP[2], 2D_ARRAY\n" + "TEX_LZ TEMP[10].x, TEMP[2].xyzz, SAMP[0], 2D_ARRAY\n" + "TEX_LZ TEMP[10].y, TEMP[3].xyzz, SAMP[1], 2D_ARRAY\n" + "TEX_LZ TEMP[10].z, TEMP[3].xyzz, SAMP[2], 2D_ARRAY\n" "MOV TEMP[12].z, IMM[1].xxxx\n" "MOV TEMP[13].z, IMM[1].xxxx\n" - "TEX_LZ TEMP[11].x, TEMP[12], SAMP[0], 2D_ARRAY\n" - "TEX_LZ TEMP[11].y, TEMP[13], SAMP[1], 2D_ARRAY\n" - "TEX_LZ TEMP[11].z, TEMP[13], SAMP[2], 2D_ARRAY\n" + "TEX_LZ TEMP[11].x, TEMP[12].xyzz, SAMP[0], 2D_ARRAY\n" + "TEX_LZ TEMP[11].y, TEMP[13].xyzz, SAMP[1], 2D_ARRAY\n" + "TEX_LZ TEMP[11].z, TEMP[13].xyzz, SAMP[2], 2D_ARRAY\n" - "LRP TEMP[6], TEMP[14], TEMP[10], TEMP[11]\n" + "LRP TEMP[6].xyz, TEMP[14].xyzz, TEMP[10].xyzz, TEMP[11].xyzz\n" "MOV TEMP[6].w, IMM[1].xxxx\n" /* Color Space Conversion */ @@ -210,12 +210,12 @@ const char *compute_shader_weave = "DP4 TEMP[9].z, CONST[2], TEMP[6]\n" "MOV TEMP[7].w, TEMP[6].zzzz\n" - "SLE TEMP[8].w, TEMP[7], CONST[3].xxxx\n" - "SGT TEMP[7].w, TEMP[7], CONST[3].yyyy\n" + "SLE TEMP[8].w, TEMP[7].wwww, CONST[3].xxxx\n" + "SGT TEMP[7].w, TEMP[7].wwww, CONST[3].yyyy\n" - "MAX TEMP[9].w, TEMP[7], TEMP[8]\n" + "MAX TEMP[9].w, TEMP[7].wwww, TEMP[8].wwww\n" - "STORE IMAGE[0], TEMP[0], TEMP[9], 2D\n" + "STORE IMAGE[0], TEMP[0].xyyy, TEMP[9], 2D\n" "ENDIF\n" "END\n"; @@ -239,7 +239,7 @@ const char *compute_shader_rgba = "IMM[0] UINT32 { 8, 8, 1, 0}\n" "IMM[1] FLT32 { 1.0, 2.0, 0.0, 0.0}\n" - "UMAD TEMP[0], SV[1], IMM[0], SV[0]\n" + "UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n" /* Drawn area check */ "USGE TEMP[1].xy, TEMP[0].xyxy, CONST[4].xyxy\n" @@ -248,18 +248,18 @@ const char *compute_shader_rgba = "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].zzzz\n" "AND TEMP[1].x, TEMP[1].xxxx, TEMP[1].wwww\n" - "UIF TEMP[1]\n" + "UIF TEMP[1].xxxx\n" /* Translate */ - "UADD TEMP[2].xy, TEMP[0], -CONST[5].xyxy\n" - "U2F TEMP[2], TEMP[2]\n" + "UADD TEMP[2].xy, TEMP[0].xyyy, -CONST[5].xyxy\n" + "U2F TEMP[2].xy, TEMP[2].xyyy\n" /* Scale */ - "DIV TEMP[2], TEMP[2], CONST[3].zwzw\n" + "DIV TEMP[2].xy, TEMP[2].xyyy, CONST[3].zwzw\n" /* Fetch texels */ - "TEX_LZ TEMP[3], TEMP[2], SAMP[0], RECT\n" + "TEX_LZ TEMP[3], TEMP[2].xyyy, SAMP[0], RECT\n" - "STORE IMAGE[0], TEMP[0], TEMP[3], 2D\n" + "STORE IMAGE[0], TEMP[0].xyyy, TEMP[3], 2D\n" "ENDIF\n" "END\n"; diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 57554d0681d..690e534d206 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -521,8 +521,9 @@ The integer capabilities: execution. 0 = throttling is disabled. * ``PIPE_CAP_DMABUF``: Whether Linux DMABUF handles are supported by resource_from_handle and resource_get_handle. -* ``PIPE_CAP_PREFER_COMPUTE_BLIT_FOR_MULTIMEDIA``: Whether VDPAU, VAAPI, and - OpenMAX should use a compute-based blit instead of pipe_context::blit. +* ``PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA``: Whether VDPAU, VAAPI, and + OpenMAX should use a compute-based blit instead of pipe_context::blit and compute pipeline for compositing images. +* ``PIPE_CAP_TGSI_DIV``: Whether opcode DIV is supported .. _pipe_capf: diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c b/src/gallium/drivers/etnaviv/etnaviv_emit.c index ed7b7ee3cb8..ee54daabf3e 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_emit.c +++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c @@ -577,12 +577,12 @@ etna_emit_state(struct etna_context *ctx) static const uint32_t uniform_dirty_bits = ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF; - if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits)) + if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits)) etna_uniforms_write( ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX], ctx->shader_state.VS_UNIFORMS, &ctx->shader_state.vs_uniforms_size); - if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits)) + if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits)) etna_uniforms_write( ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT], ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size); diff --git a/src/gallium/drivers/etnaviv/etnaviv_resource.c b/src/gallium/drivers/etnaviv/etnaviv_resource.c index 83179d3cd08..ab77a80c72b 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_resource.c +++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c @@ -622,6 +622,7 @@ etna_resource_get_handle(struct pipe_screen *pscreen, rsc = etna_resource(rsc->external); handle->stride = rsc->levels[0].stride; + handle->offset = rsc->levels[0].offset; handle->modifier = layout_to_modifier(rsc->layout); if (handle->type == WINSYS_HANDLE_TYPE_SHARED) { diff --git a/src/gallium/drivers/freedreno/Android.gen.mk b/src/gallium/drivers/freedreno/Android.gen.mk index 17b6fbe1b7e..d29ba159d5c 100644 --- a/src/gallium/drivers/freedreno/Android.gen.mk +++ b/src/gallium/drivers/freedreno/Android.gen.mk @@ -25,7 +25,7 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES endif ir3_nir_trig_deps := \ - $(LOCAL_PATH)/ir3/ir3_nir_trig.py \ + $(MESA_TOP)/src/freedreno/ir3/ir3_nir_trig.py \ $(MESA_TOP)/src/compiler/nir/nir_algebraic.py intermediates := $(call local-generated-sources-dir) diff --git a/src/gallium/drivers/freedreno/Android.mk b/src/gallium/drivers/freedreno/Android.mk index ccd88a7d16c..f0ae361cd6a 100644 --- a/src/gallium/drivers/freedreno/Android.mk +++ b/src/gallium/drivers/freedreno/Android.mk @@ -44,7 +44,7 @@ LOCAL_C_INCLUDES := \ LOCAL_GENERATED_SOURCES := $(MESA_GEN_NIR_H) LOCAL_SHARED_LIBRARIES := libdrm -LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_nir +LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_nir libfreedreno_drm libfreedreno_ir3 LOCAL_MODULE := libmesa_pipe_freedreno include $(LOCAL_PATH)/Android.gen.mk diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c index 59dcaa4bf6b..878f67afba0 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c @@ -79,6 +79,7 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) return NULL; pctx = &fd3_ctx->base.base; + pctx->screen = pscreen; fd3_ctx->base.dev = fd_device_ref(screen->dev); fd3_ctx->base.screen = fd_screen(pscreen); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c index e9730e9c209..896050918dc 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c @@ -79,6 +79,7 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) return NULL; pctx = &fd4_ctx->base.base; + pctx->screen = pscreen; fd4_ctx->base.dev = fd_device_ref(screen->dev); fd4_ctx->base.screen = fd_screen(pscreen); diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c b/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c index da76afdfa60..8a829759cdc 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c @@ -482,6 +482,7 @@ fd5_blitter_blit(struct fd_context *ctx, const struct pipe_blit_info *info) batch->needs_flush = true; fd_batch_flush(batch, false, false); + fd_batch_reference(&batch, NULL); return true; } diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_context.c b/src/gallium/drivers/freedreno/a5xx/fd5_context.c index 37e02c61b1f..211d2b5fee5 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_context.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_context.c @@ -78,6 +78,7 @@ fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) return NULL; pctx = &fd5_ctx->base.base; + pctx->screen = pscreen; fd5_ctx->base.dev = fd_device_ref(screen->dev); fd5_ctx->base.screen = fd_screen(pscreen); diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_draw.h b/src/gallium/drivers/freedreno/a5xx/fd5_draw.h index c0d50b29cfd..119ecd4d3ad 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_draw.h +++ b/src/gallium/drivers/freedreno/a5xx/fd5_draw.h @@ -98,8 +98,7 @@ fd5_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring, if (info->index_size) { struct pipe_resource *idx = info->index.resource; - unsigned max_indicies = (idx->width0 - info->indirect->offset) / - info->index_size; + unsigned max_indicies = idx->width0 / info->index_size; OUT_PKT7(ring, CP_DRAW_INDX_INDIRECT, 6); OUT_RINGP(ring, DRAW4(primtype, DI_SRC_SEL_DMA, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.c b/src/gallium/drivers/freedreno/a6xx/fd6_context.c index 7ba0926e3a0..4f696ef092c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.c @@ -84,6 +84,7 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) return NULL; pctx = &fd6_ctx->base.base; + pctx->screen = pscreen; fd6_ctx->base.dev = fd_device_ref(screen->dev); fd6_ctx->base.screen = fd_screen(pscreen); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index dfa0944cedb..f70963ef9bd 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -227,15 +227,16 @@ setup_border_colors(struct fd_texture_stateobj *tex, struct bcolor_entry *entrie /* * HACK: for PIPE_FORMAT_X24S8_UINT we end up w/ the * stencil border color value in bc->ui[0] but according - * to desc->swizzle and desc->channel, the .x component + * to desc->swizzle and desc->channel, the .x/.w component * is NONE and the stencil value is in the y component. - * Meanwhile the hardware wants this in the .x componetn. + * Meanwhile the hardware wants this in the .w component + * for x24s8 and the .x component for x32_s8x24. */ if ((format == PIPE_FORMAT_X24S8_UINT) || (format == PIPE_FORMAT_X32_S8X24_UINT)) { if (j == 0) { c = 1; - cd = 0; + cd = (format == PIPE_FORMAT_X32_S8X24_UINT) ? 0 : 3; } else { continue; } @@ -1168,14 +1169,14 @@ t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) WRITE(REG_A6XX_VPC_UNKNOWN_9600, 0); WRITE(REG_A6XX_GRAS_UNKNOWN_8600, 0x880); - WRITE(REG_A6XX_HLSQ_UNKNOWN_BE04, 0); - WRITE(REG_A6XX_SP_UNKNOWN_AE03, 0x00000410); + WRITE(REG_A6XX_HLSQ_UNKNOWN_BE04, 0x80000); + WRITE(REG_A6XX_SP_UNKNOWN_AE03, 0x1430); WRITE(REG_A6XX_SP_IBO_COUNT, 0); WRITE(REG_A6XX_SP_UNKNOWN_B182, 0); WRITE(REG_A6XX_HLSQ_UNKNOWN_BB11, 0); WRITE(REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000); WRITE(REG_A6XX_UCHE_CLIENT_PF, 4); - WRITE(REG_A6XX_RB_UNKNOWN_8E01, 0x0); + WRITE(REG_A6XX_RB_UNKNOWN_8E01, 0x1); WRITE(REG_A6XX_SP_UNKNOWN_AB00, 0x5); WRITE(REG_A6XX_VFD_UNKNOWN_A009, 0x00000001); WRITE(REG_A6XX_RB_UNKNOWN_8811, 0x00000010); @@ -1186,7 +1187,7 @@ t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) WRITE(REG_A6XX_GRAS_UNKNOWN_8101, 0); WRITE(REG_A6XX_GRAS_SAMPLE_CNTL, 0); - WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0); + WRITE(REG_A6XX_GRAS_UNKNOWN_8110, 0x2); WRITE(REG_A6XX_RB_RENDER_CONTROL0, 0x401); WRITE(REG_A6XX_RB_RENDER_CONTROL1, 0); @@ -1232,7 +1233,10 @@ t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0); WRITE(REG_A6XX_VPC_UNKNOWN_9108, 0x3); WRITE(REG_A6XX_SP_TP_UNKNOWN_B304, 0); - WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2); + /* NOTE blob seems to (mostly?) use 0xb2 for SP_TP_UNKNOWN_B309 + * but this seems to kill texture gather offsets. + */ + WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0xa2); WRITE(REG_A6XX_RB_UNKNOWN_8804, 0); WRITE(REG_A6XX_GRAS_UNKNOWN_80A4, 0); WRITE(REG_A6XX_GRAS_UNKNOWN_80A5, 0); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_format.c b/src/gallium/drivers/freedreno/a6xx/fd6_format.c index 9448ff18e54..fb480266b0c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_format.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_format.c @@ -216,7 +216,7 @@ static struct fd6_format formats[PIPE_FORMAT_COUNT] = { _T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX), _T(Z24X8_UNORM, X8Z24_UNORM, X8Z24_UNORM, WZYX), - _T(X24S8_UINT, 8_8_8_8_UINT, X8Z24_UNORM, XYZW), // XXX + _T(X24S8_UINT, 8_8_8_8_UINT, X8Z24_UNORM, WZYX), _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, X8Z24_UNORM, WZYX), _T(Z32_FLOAT, 32_FLOAT, R32_FLOAT, WZYX), _T(Z32_FLOAT_S8X24_UINT, 32_FLOAT, R32_FLOAT, WZYX), @@ -450,7 +450,7 @@ fd6_tex_swiz(enum pipe_format format, unsigned char *swiz, */ if (format == PIPE_FORMAT_X24S8_UINT) { const unsigned char stencil_swiz[4] = { - PIPE_SWIZZLE_X, PIPE_SWIZZLE_X, PIPE_SWIZZLE_X, PIPE_SWIZZLE_X + PIPE_SWIZZLE_W, PIPE_SWIZZLE_W, PIPE_SWIZZLE_W, PIPE_SWIZZLE_W }; util_format_compose_swizzles(stencil_swiz, uswiz, swiz); } else if (fd6_pipe2swap(format) != WZYX) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c index a00e4446333..ebdfd5b8923 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c @@ -214,6 +214,12 @@ emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, OUT_RING(ring, 0x00000000); } + /* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE + * plus this CP_EVENT_WRITE at the end in it's own IB.. + */ + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25)); + if (rsc->stencil) { struct fd_resource_slice *slice = fd_resource_slice(rsc->stencil, 0); stride = slice->pitch * rsc->stencil->cpp; @@ -402,7 +408,6 @@ set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag) static void emit_binning_pass(struct fd_batch *batch) { - struct fd_context *ctx = batch->ctx; struct fd_ringbuffer *ring = batch->gmem; struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; @@ -463,12 +468,22 @@ emit_binning_pass(struct fd_batch *batch) OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, UNK_2D); - OUT_PKT7(ring, CP_EVENT_WRITE, 4); - OUT_RING(ring, CACHE_FLUSH_TS); - OUT_RELOCW(ring, fd6_context(ctx)->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ - OUT_RING(ring, 0x00000000); - + fd6_cache_inv(batch, ring); + fd6_cache_flush(batch, ring); fd_wfi(batch, ring); + + OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); + + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x0); + + OUT_PKT7(ring, CP_SET_MODE, 1); + OUT_RING(ring, 0x0); + + OUT_WFI5(ring); + + OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); + OUT_RING(ring, 0x7c400004); /* RB_CCU_CNTL */ } static void @@ -544,6 +559,15 @@ fd6_emit_tile_init(struct fd_batch *batch) OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1); OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9805, 1); + OUT_RING(ring, 0x1); + + OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A0F8, 1); + OUT_RING(ring, 0x1); + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x1); } else { set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000); patch_draws(batch, IGNORE_VISIBILITY); @@ -580,9 +604,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) struct fd6_context *fd6_ctx = fd6_context(ctx); struct fd_ringbuffer *ring = batch->gmem; - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7)); - emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10); @@ -595,8 +616,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) set_scissor(ring, x1, y1, x2, y2); - set_window_offset(ring, x1, y1); - OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1); OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); @@ -620,7 +639,32 @@ fd6_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) (tile->p * 4) + (32 * A6XX_VSC_DATA_PITCH), 0, 0); OUT_RELOC(ring, fd6_ctx->vsc_data2, (tile->p * A6XX_VSC_DATA2_PITCH), 0, 0); + + set_window_offset(ring, x1, y1); + + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000); + + OUT_PKT4(ring, REG_A6XX_VPC_SO_OVERRIDE, 1); + OUT_RING(ring, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); + + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x0); + + OUT_PKT7(ring, CP_SET_MODE, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8804, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_SP_TP_UNKNOWN_B304, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_80A4, 1); + OUT_RING(ring, 0x0); } else { + set_window_offset(ring, x1, y1); + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); OUT_RING(ring, 0x1); @@ -640,6 +684,13 @@ set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring) blit_scissor.maxx = MIN2(pfb->width, batch->max_scissor.maxx); blit_scissor.maxy = MIN2(pfb->height, batch->max_scissor.maxy); + /* NOTE: blob switches to CP_BLIT instead of CP_EVENT_WRITE:BLIT for + * small render targets. But since we align pitch to binw I think + * we can get away avoiding GPU hangs a simpler way, by just rounding + * up the blit scissor: + */ + blit_scissor.maxx = MAX2(blit_scissor.maxx, batch->ctx->screen->gmem_alignw); + OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) | @@ -1021,26 +1072,6 @@ prepare_tile_fini_ib(struct fd_batch *batch) FD_RINGBUFFER_STREAMING); ring = batch->tile_fini; - if (use_hw_binning(batch)) { - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10); - } - - OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); - OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | - CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | - CP_SET_DRAW_STATE__0_GROUP_ID(0)); - OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); - OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); - - OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); - OUT_RING(ring, 0x0); - - emit_marker6(ring, 7); - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); - emit_marker6(ring, 7); - set_blit_scissor(batch, ring); if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { @@ -1074,7 +1105,32 @@ prepare_tile_fini_ib(struct fd_batch *batch) static void fd6_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) { - fd6_emit_ib(batch->gmem, batch->tile_fini); + struct fd_ringbuffer *ring = batch->gmem; + + if (use_hw_binning(batch)) { + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x5) | 0x10); + } + + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); + OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1); + OUT_RING(ring, 0x0); + + emit_marker6(ring, 7); + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); + emit_marker6(ring, 7); + + fd6_emit_ib(ring, batch->tile_fini); + + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, A2XX_CP_SET_MARKER_0_MODE(0x7)); } static void diff --git a/src/gallium/drivers/iris/iris_fence.c b/src/gallium/drivers/iris/iris_fence.c index 06452f70966..f94a52724cf 100644 --- a/src/gallium/drivers/iris/iris_fence.c +++ b/src/gallium/drivers/iris/iris_fence.c @@ -205,24 +205,25 @@ iris_fence_await(struct pipe_context *ctx, #define MSEC_PER_SEC (1000) static uint64_t -rel2abs(uint64_t timeout) +gettime_ns(void) { - struct timespec ts; - uint64_t now; + struct timespec current; + clock_gettime(CLOCK_MONOTONIC, ¤t); + return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec; +} - if (!timeout) +static uint64_t +rel2abs(uint64_t timeout) +{ + if (timeout == 0) return 0; - if (timeout == PIPE_TIMEOUT_INFINITE) - return INT64_MAX; - - clock_gettime(CLOCK_MONOTONIC, &ts); - now = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; + uint64_t current_time = gettime_ns(); + uint64_t max_timeout = (uint64_t) INT64_MAX - current_time; - if (now > INT64_MAX - timeout) - return INT64_MAX; + timeout = MIN2(max_timeout, timeout); - return now + timeout; + return current_time + timeout; } static boolean @@ -243,7 +244,7 @@ iris_fence_finish(struct pipe_screen *p_screen, struct drm_syncobj_wait args = { .handles = (uintptr_t)handles, .count_handles = fence->count, - .timeout_nsec = rel2abs(timeout), /* XXX */ + .timeout_nsec = rel2abs(timeout), .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL }; return drm_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args) == 0; diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 30ec3f1ff86..7512889c47b 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -468,7 +468,8 @@ iris_setup_uniforms(const struct brw_compiler *compiler, if (load->src[0].ssa == temp_ubo_name) { nir_instr_rewrite_src(instr, &load->src[0], nir_src_for_ssa(nir_imm_int(&b, 0))); - } else if (nir_src_as_uint(load->src[0]) == 0) { + } else if (nir_src_is_const(load->src[0]) && + nir_src_as_uint(load->src[0]) == 0) { nir_ssa_def *offset = nir_iadd(&b, load->src[1].ssa, nir_imm_int(&b, 4 * num_system_values)); diff --git a/src/gallium/drivers/iris/iris_program_cache.c b/src/gallium/drivers/iris/iris_program_cache.c index 849f96906ca..e82375f0825 100644 --- a/src/gallium/drivers/iris/iris_program_cache.c +++ b/src/gallium/drivers/iris/iris_program_cache.c @@ -45,7 +45,7 @@ #include "iris_resource.h" struct keybox { - uint8_t size; + uint16_t size; enum iris_program_cache_id cache_id; uint8_t data[0]; }; diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c index fd50139defc..343339685b7 100644 --- a/src/gallium/drivers/iris/iris_resource.c +++ b/src/gallium/drivers/iris/iris_resource.c @@ -1003,6 +1003,7 @@ iris_map_copy_region(struct iris_transfer *map) .nr_samples = xfer->resource->nr_samples, .nr_storage_samples = xfer->resource->nr_storage_samples, .array_size = box->depth, + .format = res->internal_format, }; if (xfer->resource->target == PIPE_BUFFER) @@ -1012,22 +1013,6 @@ iris_map_copy_region(struct iris_transfer *map) else templ.target = PIPE_TEXTURE_2D; - /* Depth, stencil, and ASTC can't be linear surfaces, so we can't use - * xfer->resource->format directly. Pick a bpb compatible format so - * resource creation will succeed; blorp_copy will override it anyway. - */ - switch (util_format_get_blocksizebits(res->internal_format)) { - case 8: templ.format = PIPE_FORMAT_R8_UINT; break; - case 16: templ.format = PIPE_FORMAT_R8G8_UINT; break; - case 24: templ.format = PIPE_FORMAT_R8G8B8_UINT; break; - case 32: templ.format = PIPE_FORMAT_R8G8B8A8_UINT; break; - case 48: templ.format = PIPE_FORMAT_R16G16B16_UINT; break; - case 64: templ.format = PIPE_FORMAT_R16G16B16A16_UINT; break; - case 96: templ.format = PIPE_FORMAT_R32G32B32_UINT; break; - case 128: templ.format = PIPE_FORMAT_R32G32B32A32_UINT; break; - default: unreachable("Invalid bpb"); - } - map->staging = iris_resource_create(pscreen, &templ); assert(map->staging); @@ -1443,6 +1428,10 @@ iris_transfer_map(struct pipe_context *ctx, no_gpu = true; } + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + if (fmtl->txc == ISL_TXC_ASTC) + no_gpu = true; + if ((map_would_stall || res->aux.usage == ISL_AUX_USAGE_CCS_E) && !no_gpu) { /* If we need a synchronous mapping and the resource is busy, * we copy to/from a linear temporary buffer using the GPU. @@ -1586,6 +1575,8 @@ iris_flush_and_dirty_for_history(struct iris_context *ice, flush |= PIPE_CONTROL_RENDER_TARGET_FLUSH; iris_emit_pipe_control_flush(batch, flush); + + iris_dirty_for_history(ice, res); } bool diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index 5b321a6f862..30d32b2ed43 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -179,6 +179,7 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES: case PIPE_CAP_INVALIDATE_BUFFER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: return true; case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE: case PIPE_CAP_TGSI_FS_FBFETCH: diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 677fa5aba53..f1b7e631892 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -631,6 +631,7 @@ iris_emit_l3_config(struct iris_batch *batch, const struct gen_l3_config *cfg, * desirable behavior. */ reg.ErrorDetectionBehaviorControl = true; + reg.UseFullWays = true; #endif reg.URBAllocation = cfg->n[GEN_L3P_URB]; reg.ROAllocation = cfg->n[GEN_L3P_RO]; @@ -733,20 +734,12 @@ iris_init_render_context(struct iris_screen *screen, } iris_emit_lri(batch, HALF_SLICE_CHICKEN7, reg_val); - /* WA_2204188704: Pixel Shader Panic dispatch must be disabled. */ - iris_pack_state(GENX(COMMON_SLICE_CHICKEN3), ®_val, reg) { - reg.PSThreadPanicDispatch = 0x3; - reg.PSThreadPanicDispatchMask = 0x3; - } - iris_emit_lri(batch, COMMON_SLICE_CHICKEN3, reg_val); - iris_pack_state(GENX(SLICE_COMMON_ECO_CHICKEN1), ®_val, reg) { reg.StateCacheRedirectToCSSectionEnable = true; reg.StateCacheRedirectToCSSectionEnableMask = true; } iris_emit_lri(batch, SLICE_COMMON_ECO_CHICKEN1, reg_val); - // XXX: 3D_MODE? #endif @@ -1901,7 +1894,8 @@ iris_create_surface(struct pipe_context *ctx, return NULL; } - surf->view = (struct isl_view) { + struct isl_view *view = &surf->view; + *view = (struct isl_view) { .format = fmt.fmt, .base_level = tmpl->u.tex.level, .levels = 1, @@ -1925,15 +1919,98 @@ iris_create_surface(struct pipe_context *ctx, if (!unlikely(map)) return NULL; - unsigned aux_modes = res->aux.possible_usages; - while (aux_modes) { - enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); + if (!isl_format_is_compressed(res->surf.format)) { + /* This is a normal surface. Fill out a SURFACE_STATE for each possible + * auxiliary surface mode and return the pipe_surface. + */ + unsigned aux_modes = res->aux.possible_usages; + while (aux_modes) { + enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); + + fill_surface_state(&screen->isl_dev, map, res, view, aux_usage); - fill_surface_state(&screen->isl_dev, map, res, &surf->view, aux_usage); + map += SURFACE_STATE_ALIGNMENT; + } - map += SURFACE_STATE_ALIGNMENT; + return psurf; + } + + /* The resource has a compressed format, which is not renderable, but we + * have a renderable view format. We must be attempting to upload blocks + * of compressed data via an uncompressed view. + * + * In this case, we can assume there are no auxiliary buffers, a single + * miplevel, and that the resource is single-sampled. Gallium may try + * and create an uncompressed view with multiple layers, however. + */ + assert(!isl_format_is_compressed(fmt.fmt)); + assert(res->aux.possible_usages == 1 << ISL_AUX_USAGE_NONE); + assert(res->surf.samples == 1); + assert(view->levels == 1); + + struct isl_surf isl_surf; + uint32_t offset_B = 0, tile_x_sa = 0, tile_y_sa = 0; + + if (view->base_level > 0) { + /* We can't rely on the hardware's miplevel selection with such + * a substantial lie about the format, so we select a single image + * using the Tile X/Y Offset fields. In this case, we can't handle + * multiple array slices. + * + * On Broadwell, HALIGN and VALIGN are specified in pixels and are + * hard-coded to align to exactly the block size of the compressed + * texture. This means that, when reinterpreted as a non-compressed + * texture, the tile offsets may be anything and we can't rely on + * X/Y Offset. + * + * Return NULL to force the state tracker to take fallback paths. + */ + if (view->array_len > 1 || GEN_GEN == 8) + return NULL; + + const bool is_3d = res->surf.dim == ISL_SURF_DIM_3D; + isl_surf_get_image_surf(&screen->isl_dev, &res->surf, + view->base_level, + is_3d ? 0 : view->base_array_layer, + is_3d ? view->base_array_layer : 0, + &isl_surf, + &offset_B, &tile_x_sa, &tile_y_sa); + + /* We use address and tile offsets to access a single level/layer + * as a subimage, so reset level/layer so it doesn't offset again. + */ + view->base_array_layer = 0; + view->base_level = 0; + } else { + /* Level 0 doesn't require tile offsets, and the hardware can find + * array slices using QPitch even with the format override, so we + * can allow layers in this case. Copy the original ISL surface. + */ + memcpy(&isl_surf, &res->surf, sizeof(isl_surf)); } + /* Scale down the image dimensions by the block size. */ + const struct isl_format_layout *fmtl = + isl_format_get_layout(res->surf.format); + isl_surf.format = fmt.fmt; + isl_surf.logical_level0_px = isl_surf_get_logical_level0_el(&isl_surf); + isl_surf.phys_level0_sa = isl_surf_get_phys_level0_el(&isl_surf); + tile_x_sa /= fmtl->bw; + tile_y_sa /= fmtl->bh; + + psurf->width = isl_surf.logical_level0_px.width; + psurf->height = isl_surf.logical_level0_px.height; + + struct isl_surf_fill_state_info f = { + .surf = &isl_surf, + .view = view, + .mocs = mocs(res->bo), + .address = res->bo->gtt_offset + offset_B, + .x_offset_sa = tile_x_sa, + .y_offset_sa = tile_y_sa, + }; + + isl_surf_fill_state_s(&screen->isl_dev, map, &f); return psurf; } @@ -3863,7 +3940,7 @@ surf_state_offset_for_aux(struct iris_resource *res, enum isl_aux_usage aux_usage) { return SURFACE_STATE_ALIGNMENT * - util_bitcount(res->aux.possible_usages & ((1 << aux_usage) - 1)); + util_bitcount(aux_modes & ((1 << aux_usage) - 1)); } static void @@ -3893,11 +3970,12 @@ update_clear_value(struct iris_context *ice, struct iris_batch *batch, struct iris_resource *res, struct iris_state_ref *state, - unsigned aux_modes, + unsigned all_aux_modes, struct isl_view *view) { struct iris_screen *screen = batch->screen; const struct gen_device_info *devinfo = &screen->devinfo; + UNUSED unsigned aux_modes = all_aux_modes; /* We only need to update the clear color in the surface state for gen8 and * gen9. Newer gens can read it directly from the clear color state buffer. @@ -3912,13 +3990,13 @@ update_clear_value(struct iris_context *ice, while (aux_modes) { enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); - surf_state_update_clear_value(batch, res, state, aux_modes, + surf_state_update_clear_value(batch, res, state, all_aux_modes, aux_usage); } } else if (devinfo->gen == 8) { pipe_resource_reference(&state->res, NULL); void *map = alloc_surface_states(ice->state.surface_uploader, - state, res->aux.possible_usages); + state, all_aux_modes); while (aux_modes) { enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); fill_surface_state(&screen->isl_dev, map, res, view, aux_usage); diff --git a/src/gallium/drivers/lima/lima_draw.c b/src/gallium/drivers/lima/lima_draw.c index e0a1e181937..22910fbe72b 100644 --- a/src/gallium/drivers/lima/lima_draw.c +++ b/src/gallium/drivers/lima/lima_draw.c @@ -1438,6 +1438,7 @@ lima_pack_wb_zsbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx) { struct lima_context_framebuffer *fb = &ctx->framebuffer; struct lima_resource *res = lima_resource(fb->base.zsbuf->texture); + int level = fb->base.zsbuf->u.tex.level; uint32_t format; @@ -1455,14 +1456,14 @@ lima_pack_wb_zsbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx) struct lima_pp_wb_reg *wb = (void *)wb_reg; wb[wb_idx].type = 0x01; /* 1 for depth, stencil */ - wb[wb_idx].address = res->bo->va; + wb[wb_idx].address = res->bo->va + res->levels[level].offset; wb[wb_idx].pixel_format = format; if (res->tiled) { wb[wb_idx].pixel_layout = 0x2; wb[wb_idx].pitch = fb->tiled_w; } else { wb[wb_idx].pixel_layout = 0x0; - wb[wb_idx].pitch = res->levels[0].stride / 8; + wb[wb_idx].pitch = res->levels[level].stride / 8; } wb[wb_idx].mrt_bits = 0; } @@ -1472,6 +1473,7 @@ lima_pack_wb_cbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx) { struct lima_context_framebuffer *fb = &ctx->framebuffer; struct lima_resource *res = lima_resource(fb->base.cbufs[0]->texture); + int level = fb->base.cbufs[0]->u.tex.level; bool swap_channels = false; switch (fb->base.cbufs[0]->format) { @@ -1485,14 +1487,14 @@ lima_pack_wb_cbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx) struct lima_pp_wb_reg *wb = (void *)wb_reg; wb[wb_idx].type = 0x02; /* 2 for color buffer */ - wb[wb_idx].address = res->bo->va; + wb[wb_idx].address = res->bo->va + res->levels[level].offset; wb[wb_idx].pixel_format = LIMA_PIXEL_FORMAT_B8G8R8A8; if (res->tiled) { wb[wb_idx].pixel_layout = 0x2; wb[wb_idx].pitch = fb->tiled_w; } else { wb[wb_idx].pixel_layout = 0x0; - wb[wb_idx].pitch = res->levels[0].stride / 8; + wb[wb_idx].pitch = res->levels[level].stride / 8; } wb[wb_idx].mrt_bits = swap_channels ? 0x4 : 0x0; } diff --git a/src/gallium/drivers/lima/lima_texture.c b/src/gallium/drivers/lima/lima_texture.c index 90af0dec74b..ead5e79a02e 100644 --- a/src/gallium/drivers/lima/lima_texture.c +++ b/src/gallium/drivers/lima/lima_texture.c @@ -119,19 +119,17 @@ lima_texture_desc_set_res(struct lima_context *ctx, uint32_t *desc, uint32_t base_va = lima_res->bo->va; - /* attach level 0 */ - desc[6] |= (base_va << 24) | (layout << 13); - desc[7] |= base_va >> 8; + /* attach first level */ + uint32_t first_va = base_va + lima_res->levels[first_level].offset; + desc[6] |= (first_va << 24) | (layout << 13); + desc[7] |= first_va >> 8; /* Attach remaining levels. * Each subsequent mipmap address is specified using the 26 msbs. * These addresses are then packed continuously in memory */ unsigned current_desc_index = 7; unsigned current_desc_bit_index = 24; - for (i = 1; i < LIMA_MAX_MIP_LEVELS; i++) { - if (first_level + i > last_level) - break; - + for (i = first_level + 1; i <= last_level; i++) { uint32_t address = base_va + lima_res->levels[i].offset; address = (address >> 6); desc[current_desc_index] |= (address << current_desc_bit_index); @@ -163,32 +161,21 @@ lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sample /* 2D texture */ desc[1] |= 0x400; - desc[1] &= ~0xff000000; + first_level = texture->base.u.tex.first_level; + last_level = texture->base.u.tex.last_level; + if (last_level - first_level >= LIMA_MAX_MIP_LEVELS) + last_level = first_level + LIMA_MAX_MIP_LEVELS - 1; + switch (sampler->base.min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - first_level = texture->base.u.tex.first_level; - last_level = texture->base.u.tex.last_level; - if (last_level - first_level >= LIMA_MAX_MIP_LEVELS) - last_level = first_level + LIMA_MAX_MIP_LEVELS - 1; - mipmapping = true; - desc[1] |= ((last_level - first_level) << 24); - desc[2] &= ~0x0600; - break; case PIPE_TEX_MIPFILTER_LINEAR: - first_level = texture->base.u.tex.first_level; - last_level = texture->base.u.tex.last_level; - if (last_level - first_level >= LIMA_MAX_MIP_LEVELS) - last_level = first_level + LIMA_MAX_MIP_LEVELS - 1; + desc[2] |= 0x0600; + case PIPE_TEX_MIPFILTER_NEAREST: mipmapping = true; desc[1] |= ((last_level - first_level) << 24); - desc[2] |= 0x0600; break; case PIPE_TEX_MIPFILTER_NONE: default: - first_level = 0; - last_level = 0; mipmapping = false; - desc[2] &= ~0x0600; break; } diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 510346d2abf..125d2695707 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -271,6 +271,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_INT64: case PIPE_CAP_INT64_DIVMOD: case PIPE_CAP_QUERY_SO_OVERFLOW: + case PIPE_CAP_TGSI_DIV: return 1; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index aca3b0afb1e..1f702a987d8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -51,12 +51,12 @@ NVC0LegalizeSSA::handleDIV(Instruction *i) // Generate movs to the input regs for the call we want to generate for (int s = 0; i->srcExists(s); ++s) { Instruction *ld = i->getSrc(s)->getInsn(); - assert(ld->getSrc(0) != NULL); // check if we are moving an immediate, propagate it in that case if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV) || !(ld->src(0).getFile() == FILE_IMMEDIATE)) bld.mkMovToReg(s, i->getSrc(s)); else { + assert(ld->getSrc(0) != NULL); bld.mkMovToReg(s, ld->getSrc(0)); // Clear the src, to make code elimination possible here before we // delete the instruction i later diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 0b3220903b9..bfdb923379b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2080,14 +2080,15 @@ void AlgebraicOpt::handleCVT_CVT(Instruction *cvt) { Instruction *insn = cvt->getSrc(0)->getInsn(); - RoundMode rnd = insn->rnd; - if (insn->saturate || + if (!insn || + insn->saturate || insn->subOp || insn->dType != insn->sType || insn->dType != cvt->sType) return; + RoundMode rnd = insn->rnd; switch (insn->op) { case OP_CEIL: rnd = ROUND_PI; diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index b5dc033bd2d..58889d1be8d 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -247,6 +247,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS: case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: case PIPE_CAP_IMAGE_LOAD_FORMATTED: + case PIPE_CAP_TGSI_DIV: return 0; case PIPE_CAP_MAX_GS_INVOCATIONS: diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index 940fb9ce25c..a725aedcd8e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -346,6 +346,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, break; default: assert(!"unsupported IR!"); + free(info); return false; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 423b6af3b64..e28f90e4047 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -218,6 +218,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL: + case PIPE_CAP_TGSI_DIV: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP: return 1; /* class_3d >= NVA0_3D_CLASS; */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index 55167a27c09..5af5e38b82e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -599,19 +599,20 @@ nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) static inline void nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s, - unsigned nr, void **hwcso) + unsigned nr, void **hwcsos) { unsigned highest_found = 0; unsigned i; assert(nr <= PIPE_MAX_SAMPLERS); for (i = 0; i < nr; ++i) { + struct nv50_tsc_entry *hwcso = hwcsos ? nv50_tsc_entry(hwcsos[i]) : NULL; struct nv50_tsc_entry *old = nv50->samplers[s][i]; - if (hwcso[i]) + if (hwcso) highest_found = i; - nv50->samplers[s][i] = nv50_tsc_entry(hwcso[i]); + nv50->samplers[s][i] = hwcso; if (old) nv50_screen_tsc_unlock(nv50->screen, old); } @@ -685,12 +686,13 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s, assert(nr <= PIPE_MAX_SAMPLERS); for (i = 0; i < nr; ++i) { + struct pipe_sampler_view *view = views ? views[i] : NULL; struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]); if (old) nv50_screen_tic_unlock(nv50->screen, old); - if (views[i] && views[i]->texture) { - struct pipe_resource *res = views[i]->texture; + if (view && view->texture) { + struct pipe_resource *res = view->texture; if (res->target == PIPE_BUFFER && (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) nv50->textures_coherent[s] |= 1 << i; @@ -700,7 +702,7 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s, nv50->textures_coherent[s] &= ~(1 << i); } - pipe_sampler_view_reference(&nv50->textures[s][i], views[i]); + pipe_sampler_view_reference(&nv50->textures[s][i], view); } assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS); @@ -768,6 +770,7 @@ nv50_sp_state_create(struct pipe_context *pipe, break; default: assert(!"unsupported IR!"); + free(prog); return NULL; } @@ -864,6 +867,7 @@ nv50_cp_state_create(struct pipe_context *pipe, break; default: assert(!"unsupported IR!"); + free(prog); return NULL; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index c81d8952c98..1ff9f19f139 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -594,6 +594,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, break; default: assert(!"unsupported IR!"); + free(info); return false; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 79224ac99a7..1c2d3ee3d7c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -276,6 +276,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case PIPE_CAP_QUERY_SO_OVERFLOW: case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL: + case PIPE_CAP_TGSI_DIV: return 1; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 12e21862ee0..3c31e162b58 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -463,22 +463,23 @@ nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso) static inline void nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, unsigned s, - unsigned nr, void **hwcso) + unsigned nr, void **hwcsos) { unsigned highest_found = 0; unsigned i; for (i = 0; i < nr; ++i) { + struct nv50_tsc_entry *hwcso = hwcsos ? nv50_tsc_entry(hwcsos[i]) : NULL; struct nv50_tsc_entry *old = nvc0->samplers[s][i]; - if (hwcso[i]) + if (hwcso) highest_found = i; - if (hwcso[i] == old) + if (hwcso == old) continue; nvc0->samplers_dirty[s] |= 1 << i; - nvc0->samplers[s][i] = nv50_tsc_entry(hwcso[i]); + nvc0->samplers[s][i] = hwcso; if (old) nvc0_screen_tsc_unlock(nvc0->screen, old); } @@ -523,14 +524,15 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, unsigned i; for (i = 0; i < nr; ++i) { + struct pipe_sampler_view *view = views ? views[i] : NULL; struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); - if (views[i] == nvc0->textures[s][i]) + if (view == nvc0->textures[s][i]) continue; nvc0->textures_dirty[s] |= 1 << i; - if (views[i] && views[i]->texture) { - struct pipe_resource *res = views[i]->texture; + if (view && view->texture) { + struct pipe_resource *res = view->texture; if (res->target == PIPE_BUFFER && (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) nvc0->textures_coherent[s] |= 1 << i; @@ -548,7 +550,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, nvc0_screen_tic_unlock(nvc0->screen, old); } - pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]); + pipe_sampler_view_reference(&nvc0->textures[s][i], view); } for (i = nr; i < nvc0->num_textures[s]; ++i) { @@ -607,6 +609,7 @@ nvc0_sp_state_create(struct pipe_context *pipe, break; default: assert(!"unsupported IR!"); + free(prog); return NULL; } @@ -739,6 +742,7 @@ nvc0_cp_state_create(struct pipe_context *pipe, break; default: assert(!"unsupported IR!"); + free(prog); return NULL; } diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index c5e4dec20bd..022aace73db 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -393,23 +393,24 @@ nve4_compute_validate_constbufs(struct nvc0_context *nvc0) uint64_t address = nvc0->screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s); - assert(i > 0); /* we really only want uniform buffer objects */ - - BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); - PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1)); - PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1)); - BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); - PUSH_DATA (push, 4 * 4); - PUSH_DATA (push, 0x1); - BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4); - PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); - - PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); - PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); - PUSH_DATA (push, nvc0->constbuf[5][i].size); - PUSH_DATA (push, 0); - BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD); + /* constbufs above 0 will are fetched via ubo info in the shader */ + if (i > 0) { + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, address + NVC0_CB_AUX_UBO_INFO(i - 1)); + PUSH_DATA (push, address + NVC0_CB_AUX_UBO_INFO(i - 1)); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, 4 * 4); + PUSH_DATA (push, 0x1); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 4); + PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); + + PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); + PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); + PUSH_DATA (push, nvc0->constbuf[s][i].size); + PUSH_DATA (push, 0); + } + BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD); res->cb_bindings[s] |= 1 << i; } } @@ -554,9 +555,9 @@ nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size) static void nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc) { - // only user constant buffers 1-6 can be put in the descriptor, the rest are + // only user constant buffers 0-6 can be put in the descriptor, the rest are // loaded through global memory - for (int i = 1; i <= 6; i++) { + for (int i = 0; i <= 6; i++) { if (nvc0->constbuf[5][i].user || !nvc0->constbuf[5][i].u.buf) continue; @@ -609,6 +610,10 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, if (nvc0->constbuf[5][0].user || cp->parm_size) { nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo, NVC0_CB_USR_INFO(5), 1 << 16); + + // Later logic will attempt to bind a real buffer at position 0. That + // should not happen if we've bound a user buffer. + assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); } nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, NVC0_CB_AUX_INFO(5), 1 << 11); @@ -649,6 +654,10 @@ gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, if (nvc0->constbuf[5][0].user || cp->parm_size) { gp100_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo, NVC0_CB_USR_INFO(5), 1 << 16); + + // Later logic will attempt to bind a real buffer at position 0. That + // should not happen if we've bound a user buffer. + assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); } gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, NVC0_CB_AUX_INFO(5), 1 << 11); diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c index 29f3ce7ff71..92517c7d1c6 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c @@ -995,7 +995,7 @@ emit_load_const(compiler_context *ctx, nir_load_const_instr *instr) { nir_ssa_def def = instr->def; - float *v = ralloc_array(NULL, float, 4); + float *v = rzalloc_array(NULL, float, 4); nir_const_load_to_arr(v, instr, f32); _mesa_hash_table_u64_insert(ctx->ssa_constants, def.index + 1, v); } diff --git a/src/gallium/drivers/panfrost/pan_swizzle.c b/src/gallium/drivers/panfrost/pan_swizzle.c index 52a907ddd55..60f6953443f 100644 --- a/src/gallium/drivers/panfrost/pan_swizzle.c +++ b/src/gallium/drivers/panfrost/pan_swizzle.c @@ -164,10 +164,10 @@ panfrost_texture_swizzle(unsigned off_x, /* Use fast path if available */ if (!(off_x || off_y) && (width == dest_width)) { - if (bytes_per_pixel == 4 /* && (ALIGN(width, 16) == width) */) { + if (bytes_per_pixel == 4 && (ALIGN(width, 16) == width)) { swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest); return; - } else if (bytes_per_pixel == 1 /* && (ALIGN(width, 16) == width) */) { + } else if (bytes_per_pixel == 1 && (ALIGN(width, 16) == width)) { swizzle_bpp1_align16(width, height, source_stride, (block_pitch * 256 >> 4), pixels, (uint8_t *) ldest); return; } diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 014055b221e..0ccc753147b 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -62,7 +62,8 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, q->buf = r300->rws->buffer_create(r300->rws, r300screen->info.gart_page_size, r300screen->info.gart_page_size, - RADEON_DOMAIN_GTT, 0); + RADEON_DOMAIN_GTT, + RADEON_FLAG_NO_INTERPROCESS_SHARING); if (!q->buf) { FREE(q); return NULL; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 211d35d0607..de98d09cb36 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -915,7 +915,8 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, r300->vbo = rws->buffer_create(rws, MAX2(R300_MAX_DRAW_VBO_SIZE, size), R300_BUFFER_ALIGNMENT, - RADEON_DOMAIN_GTT, 0); + RADEON_DOMAIN_GTT, + RADEON_FLAG_NO_INTERPROCESS_SHARING); if (!r300->vbo) { return FALSE; } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 4af1c46856e..c946cfc8d03 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -103,7 +103,8 @@ r300_buffer_transfer_map( struct pipe_context *context, /* Create a new one in the same pipe_resource. */ new_buf = r300->rws->buffer_create(r300->rws, rbuf->b.b.width0, R300_BUFFER_ALIGNMENT, - rbuf->domain, 0); + rbuf->domain, + RADEON_FLAG_NO_INTERPROCESS_SHARING); if (new_buf) { /* Discard the old buffer. */ pb_reference(&rbuf->buf, NULL); @@ -183,7 +184,8 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.width0, R300_BUFFER_ALIGNMENT, - rbuf->domain, 0); + rbuf->domain, + RADEON_FLAG_NO_INTERPROCESS_SHARING); if (!rbuf->buf) { FREE(rbuf); return NULL; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 46d88b34638..21ade4022c5 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -1113,8 +1113,16 @@ r300_texture_create_object(struct r300_screen *rscreen, /* Create the backing buffer if needed. */ if (!tex->buf) { + /* Only use the first domain for allocation. Multiple domains are not allowed. */ + unsigned alloc_domain = + tex->domain & RADEON_DOMAIN_VRAM ? RADEON_DOMAIN_VRAM : + RADEON_DOMAIN_GTT; + tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, - tex->domain, RADEON_FLAG_NO_SUBALLOC); + alloc_domain, + RADEON_FLAG_NO_SUBALLOC | + /* Use the reusable pool: */ + RADEON_FLAG_NO_INTERPROCESS_SHARING); if (!tex->buf) { goto fail; diff --git a/src/gallium/drivers/r600/radeon_uvd.c b/src/gallium/drivers/r600/radeon_uvd.c index 5568f2138e4..0f5bcc53212 100644 --- a/src/gallium/drivers/r600/radeon_uvd.c +++ b/src/gallium/drivers/r600/radeon_uvd.c @@ -247,7 +247,7 @@ static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec) static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_h265_picture_desc *pic) { - unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; + unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); @@ -262,8 +262,8 @@ static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_ else max_references = MAX2(max_references, 17); - block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3)); - log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 + + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index ca066e89823..95e8007cae2 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -298,7 +298,7 @@ static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec) static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_h265_picture_desc *pic) { - unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; + unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); @@ -313,8 +313,8 @@ static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_ else max_references = MAX2(max_references, 17); - block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3)); - log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 + + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c index 3164dbb2c20..639e5043543 100644 --- a/src/gallium/drivers/radeon/radeon_uvd_enc.c +++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c @@ -73,7 +73,8 @@ radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc, enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag; enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc; enc->enc_pic.general_level_idc = pic->seq.general_level_idc; - enc->enc_pic.max_poc = pic->seq.intra_period; + enc->enc_pic.max_poc = + MAX2(16, util_next_power_of_two(pic->seq.intra_period)); enc->enc_pic.log2_max_poc = 0; for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++) i = (i >> 1); diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c index 1f41b09472f..7e5be33ec54 100644 --- a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c +++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c @@ -573,7 +573,13 @@ radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc) enc->enc_pic.hevc_spec_misc. constrained_intra_pred_flag, 1); radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); - radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); + if (enc->enc_pic.rc_session_init.rate_control_method == + RENC_UVD_RATE_CONTROL_METHOD_NONE) + radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); + else { + radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1); + radeon_uvd_enc_code_ue(enc, 0x0); + } radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset); radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset); radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1); @@ -768,8 +774,7 @@ radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc) if ((enc->enc_pic.nal_unit_type != 19) && (enc->enc_pic.nal_unit_type != 20)) { radeon_uvd_enc_code_fixed_bits(enc, - enc->enc_pic.frame_num % - enc->enc_pic.max_poc, + enc->enc_pic.pic_order_cnt, enc->enc_pic.log2_max_poc); if (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P) radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1); diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c b/src/gallium/drivers/radeon/radeon_vcn_dec.c index 688cef90103..b1d6edba466 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_dec.c +++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c @@ -552,7 +552,7 @@ static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec) static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, struct pipe_h265_picture_desc *pic) { - unsigned block_size, log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; + unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb; unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size; unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4); @@ -568,8 +568,8 @@ static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec, struct pip else max_references = MAX2(max_references, 17); - block_size = (1 << (pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3)); - log2_ctb_size = block_size + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; + log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 + + pic->pps->sps->log2_diff_max_min_luma_coding_block_size; width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size; diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c b/src/gallium/drivers/radeon/radeon_vcn_enc.c index 7d64a28a405..c4fbf6eb63f 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c @@ -72,7 +72,8 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic enc->enc_pic.general_tier_flag = pic->seq.general_tier_flag; enc->enc_pic.general_profile_idc = pic->seq.general_profile_idc; enc->enc_pic.general_level_idc = pic->seq.general_level_idc; - enc->enc_pic.max_poc = pic->seq.intra_period; + enc->enc_pic.max_poc = + MAX2(16, util_next_power_of_two(pic->seq.intra_period)); enc->enc_pic.log2_max_poc = 0; for (int i = enc->enc_pic.max_poc; i != 0; enc->enc_pic.log2_max_poc++) i = (i >> 1); diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c index 7f5b1909344..fdf0e3ac06c 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c @@ -687,7 +687,13 @@ static void radeon_enc_nalu_pps_hevc(struct radeon_encoder *enc) radeon_enc_code_se(enc, 0x0); radeon_enc_code_fixed_bits(enc, enc->enc_pic.hevc_spec_misc.constrained_intra_pred_flag, 1); radeon_enc_code_fixed_bits(enc, 0x0, 1); - radeon_enc_code_fixed_bits(enc, 0x0, 1); + if (enc->enc_pic.rc_session_init.rate_control_method == + RENCODE_RATE_CONTROL_METHOD_NONE) + radeon_enc_code_fixed_bits(enc, 0x0, 1); + else { + radeon_enc_code_fixed_bits(enc, 0x1, 1); + radeon_enc_code_ue(enc, 0x0); + } radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset); radeon_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset); radeon_enc_code_fixed_bits(enc, 0x0, 1); @@ -988,7 +994,7 @@ static void radeon_enc_slice_header_hevc(struct radeon_encoder *enc) } if ((enc->enc_pic.nal_unit_type != 19) && (enc->enc_pic.nal_unit_type != 20)) { - radeon_enc_code_fixed_bits(enc, enc->enc_pic.frame_num % enc->enc_pic.max_poc, enc->enc_pic.log2_max_poc); + radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt, enc->enc_pic.log2_max_poc); if (enc->enc_pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) radeon_enc_code_fixed_bits(enc, 0x1, 1); else { diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index da9b25a442d..1854e1226c3 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -502,7 +502,15 @@ static void cik_sdma_copy(struct pipe_context *ctx, return; } - if ((sctx->chip_class == CIK || sctx->chip_class == VI) && + /* SDMA causes corruption. See: + * https://bugs.freedesktop.org/show_bug.cgi?id=110575 + * https://bugs.freedesktop.org/show_bug.cgi?id=110635 + * + * Keep SDMA enabled on APUs. + */ + if ((sctx->screen->debug_flags & DBG(FORCE_DMA) || + !sctx->screen->info.has_dedicated_vram) && + (sctx->chip_class == CIK || sctx->chip_class == VI) && cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box)) return; diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index 4936eb5a5b1..76705937b65 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -287,11 +287,9 @@ si_invalidate_buffer(struct si_context *sctx, /* Check if mapping this buffer would cause waiting for the GPU. */ if (si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) || !sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) { - uint64_t old_va = buf->gpu_address; - /* Reallocate the buffer in the same pipe_resource. */ si_alloc_resource(sctx->screen, buf); - si_rebind_buffer(sctx, &buf->b.b, old_va); + si_rebind_buffer(sctx, &buf->b.b); } else { util_range_set_empty(&buf->valid_buffer_range); } @@ -307,7 +305,6 @@ void si_replace_buffer_storage(struct pipe_context *ctx, struct si_context *sctx = (struct si_context*)ctx; struct si_resource *sdst = si_resource(dst); struct si_resource *ssrc = si_resource(src); - uint64_t old_gpu_address = sdst->gpu_address; pb_reference(&sdst->buf, ssrc->buf); sdst->gpu_address = ssrc->gpu_address; @@ -322,7 +319,7 @@ void si_replace_buffer_storage(struct pipe_context *ctx, assert(sdst->bo_alignment == ssrc->bo_alignment); assert(sdst->domains == ssrc->domains); - si_rebind_buffer(sctx, dst, old_gpu_address); + si_rebind_buffer(sctx, dst); } static void si_invalidate_resource(struct pipe_context *ctx, diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index f1a433b72df..756f5372fa2 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -500,9 +500,13 @@ static bool si_switch_compute_shader(struct si_context *sctx, COMPUTE_DBG(sctx->screen, "COMPUTE_PGM_RSRC1: 0x%08x " "COMPUTE_PGM_RSRC2: 0x%08x\n", config->rsrc1, config->rsrc2); + sctx->max_seen_compute_scratch_bytes_per_wave = + MAX2(sctx->max_seen_compute_scratch_bytes_per_wave, + config->scratch_bytes_per_wave); + radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE, S_00B860_WAVES(sctx->scratch_waves) - | S_00B860_WAVESIZE(config->scratch_bytes_per_wave >> 10)); + | S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10)); sctx->cs_shader_state.emitted_program = program; sctx->cs_shader_state.offset = offset; diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index f795c33cf26..ca25d424fb5 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -999,6 +999,7 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers, buffers->priority = priority; buffers->priority_constbuf = priority_constbuf; buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*)); + buffers->offsets = CALLOC(num_buffers, sizeof(buffers->offsets[0])); si_init_descriptors(descs, shader_userdata_rel_index, 4, num_buffers); } @@ -1013,6 +1014,7 @@ static void si_release_buffer_resources(struct si_buffer_resources *buffers, } FREE(buffers->buffers); + FREE(buffers->offsets); } static void si_buffer_resources_begin_new_cs(struct si_context *sctx, @@ -1219,11 +1221,10 @@ static void si_set_constant_buffer(struct si_context *sctx, if (input && (input->buffer || input->user_buffer)) { struct pipe_resource *buffer = NULL; uint64_t va; + unsigned buffer_offset; /* Upload the user buffer if needed. */ if (input->user_buffer) { - unsigned buffer_offset; - si_upload_const_buffer(sctx, (struct si_resource**)&buffer, input->user_buffer, input->buffer_size, &buffer_offset); @@ -1232,12 +1233,13 @@ static void si_set_constant_buffer(struct si_context *sctx, si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL); return; } - va = si_resource(buffer)->gpu_address + buffer_offset; } else { pipe_resource_reference(&buffer, input->buffer); - va = si_resource(buffer)->gpu_address + input->buffer_offset; + buffer_offset = input->buffer_offset; } + va = si_resource(buffer)->gpu_address + buffer_offset; + /* Set the descriptor. */ uint32_t *desc = descs->list + slot*4; desc[0] = va; @@ -1252,6 +1254,7 @@ static void si_set_constant_buffer(struct si_context *sctx, S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); buffers->buffers[slot] = buffer; + buffers->offsets[slot] = buffer_offset; radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ, @@ -1336,6 +1339,7 @@ static void si_set_shader_buffer(struct si_context *sctx, S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); pipe_resource_reference(&buffers->buffers[slot], &buf->b.b); + buffers->offsets[slot] = sbuffer->buffer_offset; radeon_add_to_gfx_buffer_list_check_mem(sctx, buf, writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, @@ -1505,20 +1509,6 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot, sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; } -static void si_desc_reset_buffer_offset(uint32_t *desc, uint64_t old_buf_va, - struct pipe_resource *new_buf) -{ - /* Retrieve the buffer offset from the descriptor. */ - uint64_t old_desc_va = si_desc_extract_buffer_address(desc); - - assert(old_buf_va <= old_desc_va); - uint64_t offset_within_buffer = old_desc_va - old_buf_va; - - /* Update the descriptor. */ - si_set_buf_desc_address(si_resource(new_buf), offset_within_buffer, - desc); -} - /* INTERNAL CONST BUFFERS */ static void si_set_polygon_stipple(struct pipe_context *ctx, @@ -1597,13 +1587,14 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx) /* BUFFER DISCARD/INVALIDATION */ -/** Reset descriptors of buffer resources after \p buf has been invalidated. */ +/* Reset descriptors of buffer resources after \p buf has been invalidated. + * If buf == NULL, reset all descriptors. + */ static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, unsigned descriptors_idx, unsigned slot_mask, struct pipe_resource *buf, - uint64_t old_va, enum radeon_bo_priority priority) { struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; @@ -1611,13 +1602,15 @@ static void si_reset_buffer_resources(struct si_context *sctx, while (mask) { unsigned i = u_bit_scan(&mask); - if (buffers->buffers[i] == buf) { - si_desc_reset_buffer_offset(descs->list + i*4, - old_va, buf); + struct pipe_resource *buffer = buffers->buffers[i]; + + if (buffer && (!buf || buffer == buf)) { + si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], + descs->list + i*4); sctx->descriptors_dirty |= 1u << descriptors_idx; radeon_add_to_gfx_buffer_list_check_mem(sctx, - si_resource(buf), + si_resource(buffer), buffers->writable_mask & (1u << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, @@ -1626,11 +1619,13 @@ static void si_reset_buffer_resources(struct si_context *sctx, } } -/* Update all resource bindings where the buffer is bound, including +/* Update all buffer bindings where the buffer is bound, including * all resource descriptors. This is invalidate_buffer without - * the invalidation. */ -void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, - uint64_t old_va) + * the invalidation. + * + * If buf == NULL, update all buffer bindings. + */ +void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) { struct si_resource *buffer = si_resource(buf); unsigned i, shader; @@ -1644,7 +1639,10 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, */ /* Vertex buffers. */ - if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) { + if (!buffer) { + if (num_elems) + sctx->vertex_buffers_dirty = true; + } else if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) { for (i = 0; i < num_elems; i++) { int vb = sctx->vertex_elements->vertex_buffer_index[i]; @@ -1661,21 +1659,23 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, } /* Streamout buffers. (other internal buffers can't be invalidated) */ - if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) { + if (!buffer || buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) { for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) { struct si_buffer_resources *buffers = &sctx->rw_buffers; struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS]; + struct pipe_resource *buffer = buffers->buffers[i]; - if (buffers->buffers[i] != buf) + if (!buffer || (buf && buffer != buf)) continue; - si_desc_reset_buffer_offset(descs->list + i*4, - old_va, buf); + si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], + descs->list + i*4); sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; radeon_add_to_gfx_buffer_list_check_mem(sctx, - buffer, RADEON_USAGE_WRITE, + si_resource(buffer), + RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER, true); @@ -1689,25 +1689,25 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, } /* Constant and shader buffers. */ - if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { + if (!buffer || buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { for (shader = 0; shader < SI_NUM_SHADERS; shader++) si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors_idx(shader), u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS), - buf, old_va, + buf, sctx->const_and_shader_buffers[shader].priority_constbuf); } - if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) { + if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) { for (shader = 0; shader < SI_NUM_SHADERS; shader++) si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors_idx(shader), u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS), - buf, old_va, + buf, sctx->const_and_shader_buffers[shader].priority); } - if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { + if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { /* Texture buffers - update bindings. */ for (shader = 0; shader < SI_NUM_SHADERS; shader++) { struct si_samplers *samplers = &sctx->samplers[shader]; @@ -1717,26 +1717,29 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, while (mask) { unsigned i = u_bit_scan(&mask); - if (samplers->views[i]->texture == buf) { + struct pipe_resource *buffer = samplers->views[i]->texture; + + if (buffer && buffer->target == PIPE_BUFFER && + (!buf || buffer == buf)) { unsigned desc_slot = si_get_sampler_slot(i); - si_desc_reset_buffer_offset(descs->list + - desc_slot * 16 + 4, - old_va, buf); + si_set_buf_desc_address(si_resource(buffer), + samplers->views[i]->u.buf.offset, + descs->list + desc_slot * 16 + 4); sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); - radeon_add_to_gfx_buffer_list_check_mem(sctx, - buffer, RADEON_USAGE_READ, - RADEON_PRIO_SAMPLER_BUFFER, - true); + radeon_add_to_gfx_buffer_list_check_mem( + sctx, si_resource(buffer), + RADEON_USAGE_READ, + RADEON_PRIO_SAMPLER_BUFFER, true); } } } } /* Shader images */ - if (buffer->bind_history & PIPE_BIND_SHADER_IMAGE) { + if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_IMAGE) { for (shader = 0; shader < SI_NUM_SHADERS; ++shader) { struct si_images *images = &sctx->images[shader]; struct si_descriptors *descs = @@ -1745,21 +1748,23 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, while (mask) { unsigned i = u_bit_scan(&mask); + struct pipe_resource *buffer = images->views[i].resource; - if (images->views[i].resource == buf) { + if (buffer && buffer->target == PIPE_BUFFER && + (!buf || buffer == buf)) { unsigned desc_slot = si_get_image_slot(i); if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE) si_mark_image_range_valid(&images->views[i]); - si_desc_reset_buffer_offset( - descs->list + desc_slot * 8 + 4, - old_va, buf); + si_set_buf_desc_address(si_resource(buffer), + images->views[i].u.buf.offset, + descs->list + desc_slot * 8 + 4); sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); radeon_add_to_gfx_buffer_list_check_mem( - sctx, buffer, + sctx, si_resource(buffer), RADEON_USAGE_READWRITE, RADEON_PRIO_SAMPLER_BUFFER, true); } @@ -1768,16 +1773,18 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, } /* Bindless texture handles */ - if (buffer->texture_handle_allocated) { + if (!buffer || buffer->texture_handle_allocated) { struct si_descriptors *descs = &sctx->bindless_descriptors; util_dynarray_foreach(&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { struct pipe_sampler_view *view = (*tex_handle)->view; unsigned desc_slot = (*tex_handle)->desc_slot; + struct pipe_resource *buffer = view->texture; - if (view->texture == buf) { - si_set_buf_desc_address(buffer, + if (buffer && buffer->target == PIPE_BUFFER && + (!buf || buffer == buf)) { + si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset, descs->list + desc_slot * 16 + 4); @@ -1786,7 +1793,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, sctx->bindless_descriptors_dirty = true; radeon_add_to_gfx_buffer_list_check_mem( - sctx, buffer, + sctx, si_resource(buffer), RADEON_USAGE_READ, RADEON_PRIO_SAMPLER_BUFFER, true); } @@ -1794,19 +1801,21 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, } /* Bindless image handles */ - if (buffer->image_handle_allocated) { + if (!buffer || buffer->image_handle_allocated) { struct si_descriptors *descs = &sctx->bindless_descriptors; util_dynarray_foreach(&sctx->resident_img_handles, struct si_image_handle *, img_handle) { struct pipe_image_view *view = &(*img_handle)->view; unsigned desc_slot = (*img_handle)->desc_slot; + struct pipe_resource *buffer = view->resource; - if (view->resource == buf) { + if (buffer && buffer->target == PIPE_BUFFER && + (!buf || buffer == buf)) { if (view->access & PIPE_IMAGE_ACCESS_WRITE) si_mark_image_range_valid(view); - si_set_buf_desc_address(buffer, + si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset, descs->list + desc_slot * 16 + 4); @@ -1815,12 +1824,25 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, sctx->bindless_descriptors_dirty = true; radeon_add_to_gfx_buffer_list_check_mem( - sctx, buffer, + sctx, si_resource(buffer), RADEON_USAGE_READWRITE, RADEON_PRIO_SAMPLER_BUFFER, true); } } } + + if (buffer) { + /* Do the same for other contexts. They will invoke this function + * with buffer == NULL. + */ + unsigned new_counter = p_atomic_inc_return(&sctx->screen->dirty_buf_counter); + + /* Skip the update for the current context, because we have already updated + * the buffer bindings. + */ + if (new_counter == sctx->last_dirty_buf_counter + 1) + sctx->last_dirty_buf_counter = new_counter; + } } static void si_upload_bindless_descriptor(struct si_context *sctx, diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 4e23d283ab7..e526f3009bf 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -155,7 +155,8 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_FBFETCH: case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK: case PIPE_CAP_IMAGE_LOAD_FORMATTED: - case PIPE_CAP_PREFER_COMPUTE_BLIT_FOR_MULTIMEDIA: + case PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA: + case PIPE_CAP_TGSI_DIV: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index b0e0ca7af05..2f484f7052a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -464,9 +464,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, 0, PIPE_USAGE_DEFAULT, SI_RESOURCE_FLAG_32BIT | (use_sdma_upload ? - SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA : - (sscreen->cpdma_prefetch_writes_memory ? - 0 : SI_RESOURCE_FLAG_READ_ONLY))); + SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA : 0)); if (!sctx->b.const_uploader) goto fail; @@ -514,9 +512,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, si_init_fence_functions(sctx); si_init_state_compute_functions(sctx); - if (sscreen->debug_flags & DBG(FORCE_DMA)) - sctx->b.resource_copy_region = sctx->dma_copy; - /* Initialize graphics-only context functions. */ if (sctx->has_graphics) { si_init_context_texture_functions(sctx); @@ -541,6 +536,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, else si_init_dma_functions(sctx); + if (sscreen->debug_flags & DBG(FORCE_DMA)) + sctx->b.resource_copy_region = sctx->dma_copy; + sctx->sample_mask = 0xffff; /* Initialize multimedia functions. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index d3ddb912245..35e548cdec5 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -526,6 +526,7 @@ struct si_screen { * the counter before drawing and re-emit the states accordingly. */ unsigned dirty_tex_counter; + unsigned dirty_buf_counter; /* Atomically increment this counter when an existing texture's * metadata is enabled or disabled in a way that requires changing @@ -852,6 +853,7 @@ struct si_context { unsigned initial_gfx_cs_size; unsigned gpu_reset_counter; unsigned last_dirty_tex_counter; + unsigned last_dirty_buf_counter; unsigned last_compressed_colortex_counter; unsigned last_num_draw_calls; unsigned flags; /* flush flags */ @@ -982,6 +984,8 @@ struct si_context { struct si_resource *scratch_buffer; unsigned scratch_waves; unsigned spi_tmpring_size; + unsigned max_seen_scratch_bytes_per_wave; + unsigned max_seen_compute_scratch_bytes_per_wave; struct si_resource *compute_scratch_buffer; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 6df24f9648a..6d74d774b6d 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -409,6 +409,7 @@ struct si_descriptors { struct si_buffer_resources { struct pipe_resource **buffers; /* this has num_buffers elements */ + unsigned *offsets; /* this has num_buffers elements */ enum radeon_bo_priority priority:6; enum radeon_bo_priority priority_constbuf:6; @@ -487,8 +488,7 @@ struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index); void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab); -void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf, - uint64_t old_va); +void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf); /* si_state.c */ void si_init_state_compute_functions(struct si_context *sctx); void si_init_state_functions(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 8e01e1b35e1..d9dfef0a381 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1254,7 +1254,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i struct si_context *sctx = (struct si_context *)ctx; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct pipe_resource *indexbuf = info->index.resource; - unsigned dirty_tex_counter; + unsigned dirty_tex_counter, dirty_buf_counter; enum pipe_prim_type rast_prim; unsigned index_size = info->index_size; unsigned index_offset = info->indirect ? info->start * index_size : 0; @@ -1292,6 +1292,13 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i si_update_all_texture_descriptors(sctx); } + dirty_buf_counter = p_atomic_read(&sctx->screen->dirty_buf_counter); + if (unlikely(dirty_buf_counter != sctx->last_dirty_buf_counter)) { + sctx->last_dirty_buf_counter = dirty_buf_counter; + /* Rebind all buffers unconditionally. */ + si_rebind_buffer(sctx, NULL); + } + si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)); /* Set the rasterization primitive type. diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index db4c77da2ff..ef8943d9011 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -3104,11 +3104,6 @@ static int si_update_scratch_buffer(struct si_context *sctx, return 1; } -static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx) -{ - return sctx->scratch_buffer ? sctx->scratch_buffer->b.b.width0 : 0; -} - static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader) { return shader ? shader->config.scratch_bytes_per_wave : 0; @@ -3123,23 +3118,6 @@ static struct si_shader *si_get_tcs_current(struct si_context *sctx) sctx->fixed_func_tcs_shader.current; } -static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx) -{ - unsigned bytes = 0; - - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current)); - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current)); - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current)); - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current)); - - if (sctx->tes_shader.cso) { - struct si_shader *tcs = si_get_tcs_current(sctx); - - bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(tcs)); - } - return bytes; -} - static bool si_update_scratch_relocs(struct si_context *sctx) { struct si_shader *tcs = si_get_tcs_current(sctx); @@ -3197,16 +3175,40 @@ static bool si_update_scratch_relocs(struct si_context *sctx) static bool si_update_spi_tmpring_size(struct si_context *sctx) { - unsigned current_scratch_buffer_size = - si_get_current_scratch_buffer_size(sctx); - unsigned scratch_bytes_per_wave = - si_get_max_scratch_bytes_per_wave(sctx); - unsigned scratch_needed_size = scratch_bytes_per_wave * - sctx->scratch_waves; + /* SPI_TMPRING_SIZE.WAVESIZE must be constant for each scratch buffer. + * There are 2 cases to handle: + * + * - If the current needed size is less than the maximum seen size, + * use the maximum seen size, so that WAVESIZE remains the same. + * + * - If the current needed size is greater than the maximum seen size, + * the scratch buffer is reallocated, so we can increase WAVESIZE. + * + * Shaders that set SCRATCH_EN=0 don't allocate scratch space. + * Otherwise, the number of waves that can use scratch is + * SPI_TMPRING_SIZE.WAVES. + */ + unsigned bytes = 0; + + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current)); + + if (sctx->tes_shader.cso) { + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current)); + bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(si_get_tcs_current(sctx))); + } + + sctx->max_seen_scratch_bytes_per_wave = + MAX2(sctx->max_seen_scratch_bytes_per_wave, bytes); + + unsigned scratch_needed_size = + sctx->max_seen_scratch_bytes_per_wave * sctx->scratch_waves; unsigned spi_tmpring_size; if (scratch_needed_size > 0) { - if (scratch_needed_size > current_scratch_buffer_size) { + if (!sctx->scratch_buffer || + scratch_needed_size > sctx->scratch_buffer->b.b.width0) { /* Create a bigger scratch buffer */ si_resource_reference(&sctx->scratch_buffer, NULL); @@ -3232,7 +3234,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx) "scratch size should already be aligned correctly."); spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) | - S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10); + S_0286E8_WAVESIZE(sctx->max_seen_scratch_bytes_per_wave >> 10); if (spi_tmpring_size != sctx->spi_tmpring_size) { sctx->spi_tmpring_size = spi_tmpring_size; si_mark_atom_dirty(sctx, &sctx->atoms.s.scratch_state); diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 59d50376438..91230c5f0da 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -837,8 +837,7 @@ static boolean si_texture_get_handle(struct pipe_screen* screen, if (sscreen->ws->buffer_is_suballocated(res->buf) || tex->surface.tile_swizzle || (tex->buffer.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && - sscreen->info.has_local_buffers && - whandle->type != WINSYS_HANDLE_TYPE_KMS)) { + sscreen->info.has_local_buffers)) { assert(!res->b.is_shared); si_reallocate_texture_inplace(sctx, tex, PIPE_BIND_SHARED, false); diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 5d44824e202..f79f20ee6ab 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -174,6 +174,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_DOUBLES: case PIPE_CAP_INT64: case PIPE_CAP_INT64_DIVMOD: + case PIPE_CAP_TGSI_DIV: return 1; case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 16; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index a4470e6cb07..45d4eda5377 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -659,15 +659,6 @@ compute_lambda_vert(const struct sp_sampler_view *sview, } -static float -compute_lambda_vert_explicite_gradients(UNUSED const struct sp_sampler_view *sview, - UNUSED const float derivs[3][2][TGSI_QUAD_SIZE], - UNUSED int quad) -{ - return 0.0f; -} - - compute_lambda_from_grad_func softpipe_get_lambda_from_grad_func(const struct pipe_sampler_view *view, enum pipe_shader_type shader) diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 9e13ee8ce6d..b6c21a866fe 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -568,11 +568,11 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl) vbuffer_attrs[i].sid = 0; } - /* If we haven't yet emitted a drawing command or if any - * vertex buffer state is changing, issue that state now. + /* If any of the vertex buffer state has changed, issue + * the SetVertexBuffers command. Otherwise, we will just + * need to rebind the resources. */ - if (((hwtnl->cmd.swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) == 0) || - vbuf_count != svga->state.hw_draw.num_vbuffers || + if (vbuf_count != svga->state.hw_draw.num_vbuffers || !vertex_buffers_equal(vbuf_count, vbuffer_attrs, vbuffers, diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index f747ff78bcf..631778a7437 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -478,6 +478,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_PACKED_UNIFORMS: case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: return 0; + case PIPE_CAP_TGSI_DIV: + return 1; case PIPE_CAP_MAX_GS_INVOCATIONS: return 32; case PIPE_CAP_MAX_SHADER_BUFFER_SIZE: diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp index bd48fb2aae7..153e2af7eae 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp @@ -565,68 +565,3 @@ using Vec4 = typename SIMD_T::Vec4; template using Mask = typename SIMD_T::Mask; -template -struct SIMDVecEqual -{ - INLINE bool operator()(Integer a, Integer b) const - { - Integer c = SIMD_T::xor_si(a, b); - return SIMD_T::testz_si(c, c); - } - - INLINE bool operator()(Float a, Float b) const - { - return this->operator()(SIMD_T::castps_si(a), SIMD_T::castps_si(b)); - } - - INLINE bool operator()(Double a, Double b) const - { - return this->operator()(SIMD_T::castpd_si(a), SIMD_T::castpd_si(b)); - } -}; - -template -struct SIMDVecHash -{ - INLINE uint32_t operator()(Integer val) const - { -#if defined(_WIN64) || !defined(_WIN32) // assume non-Windows is always 64-bit - static_assert(sizeof(void*) == 8, "This path only meant for 64-bit code"); - - uint64_t crc32 = 0; - const uint64_t* pData = reinterpret_cast(&val); - static const uint32_t loopIterations = sizeof(val) / sizeof(void*); - static_assert(loopIterations * sizeof(void*) == sizeof(val), "bad vector size"); - - for (uint32_t i = 0; i < loopIterations; ++i) - { - crc32 = _mm_crc32_u64(crc32, pData[i]); - } - - return static_cast(crc32); -#else - static_assert(sizeof(void*) == 4, "This path only meant for 32-bit code"); - - uint32_t crc32 = 0; - const uint32_t* pData = reinterpret_cast(&val); - static const uint32_t loopIterations = sizeof(val) / sizeof(void*); - static_assert(loopIterations * sizeof(void*) == sizeof(val), "bad vector size"); - - for (uint32_t i = 0; i < loopIterations; ++i) - { - crc32 = _mm_crc32_u32(crc32, pData[i]); - } - - return crc32; -#endif - }; - - INLINE uint32_t operator()(Float val) const - { - return operator()(SIMD_T::castps_si(val)); - }; - INLINE uint32_t operator()(Double val) const - { - return operator()(SIMD_T::castpd_si(val)); - } -}; diff --git a/src/gallium/drivers/virgl/virgl_resource.c b/src/gallium/drivers/virgl/virgl_resource.c index ef81f213f40..6d4c9f5fd00 100644 --- a/src/gallium/drivers/virgl/virgl_resource.c +++ b/src/gallium/drivers/virgl/virgl_resource.c @@ -112,6 +112,7 @@ static struct pipe_resource *virgl_resource_from_handle(struct pipe_screen *scre res->u.b = *templ; res->u.b.screen = &vs->base; pipe_reference_init(&res->u.b.reference, 1); + virgl_resource_layout(&res->u.b, &res->metadata); res->hw_res = vs->vws->resource_create_from_handle(vs->vws, whandle); if (!res->hw_res) { diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c index 65106dbb616..a021a6490f6 100644 --- a/src/gallium/drivers/virgl/virgl_screen.c +++ b/src/gallium/drivers/virgl/virgl_screen.c @@ -356,7 +356,8 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_NATIVE_FENCE_FD: return vscreen->vws->supports_fences; case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL: - return vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_SRGB_WRITE_CONTROL; + return (vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_SRGB_WRITE_CONTROL) || + (vscreen->caps.caps.v2.host_feature_check_version < 1); case PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS: return vscreen->caps.caps.v2.capability_bits & VIRGL_CAP_INDIRECT_INPUT_ADDR; default: diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 672a2ea7378..8b9a2cd07f9 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -882,7 +882,8 @@ enum pipe_cap PIPE_CAP_IMAGE_LOAD_FORMATTED, PIPE_CAP_MAX_FRAMES_IN_FLIGHT, PIPE_CAP_DMABUF, - PIPE_CAP_PREFER_COMPUTE_BLIT_FOR_MULTIMEDIA, + PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA, + PIPE_CAP_TGSI_DIV, }; /** diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index f7672bcae06..8e78da7a06a 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -72,6 +72,10 @@ struct dri2_format_mapping { }; static const struct dri2_format_mapping dri2_format_table[] = { + { __DRI_IMAGE_FOURCC_ABGR16161616F, __DRI_IMAGE_FORMAT_ABGR16161616F, + __DRI_IMAGE_COMPONENTS_RGBA, PIPE_FORMAT_R16G16B16A16_FLOAT }, + { __DRI_IMAGE_FOURCC_XBGR16161616F, __DRI_IMAGE_FORMAT_XBGR16161616F, + __DRI_IMAGE_COMPONENTS_RGB, PIPE_FORMAT_R16G16B16X16_FLOAT }, { __DRI_IMAGE_FOURCC_ARGB2101010, __DRI_IMAGE_FORMAT_ARGB2101010, __DRI_IMAGE_COMPONENTS_RGBA, PIPE_FORMAT_B10G10R10A2_UNORM }, { __DRI_IMAGE_FOURCC_XRGB2101010, __DRI_IMAGE_FORMAT_XRGB2101010, @@ -222,6 +226,12 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable, * may occur as the stvis->color_format. */ switch(format) { + case PIPE_FORMAT_R16G16B16A16_FLOAT: + depth = 64; + break; + case PIPE_FORMAT_R16G16B16X16_FLOAT: + depth = 48; + break; case PIPE_FORMAT_B10G10R10A2_UNORM: case PIPE_FORMAT_R10G10B10A2_UNORM: case PIPE_FORMAT_BGRA8888_UNORM: @@ -300,6 +310,12 @@ dri_image_drawable_get_buffers(struct dri_drawable *drawable, } switch (pf) { + case PIPE_FORMAT_R16G16B16A16_FLOAT: + image_format = __DRI_IMAGE_FORMAT_ABGR16161616F; + break; + case PIPE_FORMAT_R16G16B16X16_FLOAT: + image_format = __DRI_IMAGE_FORMAT_XBGR16161616F; + break; case PIPE_FORMAT_B5G5R5A1_UNORM: image_format = __DRI_IMAGE_FORMAT_ARGB1555; break; @@ -373,6 +389,12 @@ dri2_allocate_buffer(__DRIscreen *sPriv, bind |= PIPE_BIND_SHARED; switch (format) { + case 64: + pf = PIPE_FORMAT_R16G16B16A16_FLOAT; + break; + case 48: + pf = PIPE_FORMAT_R16G16B16X16_FLOAT; + break; case 32: pf = PIPE_FORMAT_BGRA8888_UNORM; break; diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c index c1de3bed9dd..df375b67f3f 100644 --- a/src/gallium/state_trackers/dri/dri_drawable.c +++ b/src/gallium/state_trackers/dri/dri_drawable.c @@ -260,6 +260,9 @@ dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target, if (format == __DRI_TEXTURE_FORMAT_RGB) { /* only need to cover the formats recognized by dri_fill_st_visual */ switch (internal_format) { + case PIPE_FORMAT_R16G16B16A16_FLOAT: + internal_format = PIPE_FORMAT_R16G16B16X16_FLOAT; + break; case PIPE_FORMAT_B10G10R10A2_UNORM: internal_format = PIPE_FORMAT_B10G10R10X2_UNORM; break; diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index 82a0988a634..b7a6734e98c 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -121,6 +121,8 @@ dri_fill_in_modes(struct dri_screen *screen) MESA_FORMAT_B8G8R8A8_SRGB, MESA_FORMAT_B8G8R8X8_SRGB, MESA_FORMAT_B5G6R5_UNORM, + MESA_FORMAT_RGBA_FLOAT16, + MESA_FORMAT_RGBX_FLOAT16, /* The 32-bit RGBA format must not precede the 32-bit BGRA format. * Likewise for RGBX and BGRX. Otherwise, the GLX client and the GLX @@ -153,6 +155,8 @@ dri_fill_in_modes(struct dri_screen *screen) PIPE_FORMAT_BGRA8888_SRGB, PIPE_FORMAT_BGRX8888_SRGB, PIPE_FORMAT_B5G6R5_UNORM, + PIPE_FORMAT_R16G16B16A16_FLOAT, + PIPE_FORMAT_R16G16B16X16_FLOAT, PIPE_FORMAT_RGBA8888_UNORM, PIPE_FORMAT_RGBX8888_UNORM, }; @@ -166,7 +170,9 @@ dri_fill_in_modes(struct dri_screen *screen) struct pipe_screen *p_screen = screen->base.screen; boolean pf_z16, pf_x8z24, pf_z24x8, pf_s8z24, pf_z24s8, pf_z32; boolean mixed_color_depth; + boolean allow_rgba_ordering; boolean allow_rgb10; + boolean allow_fp16; static const GLenum back_buffer_modes[] = { __DRI_ATTRIB_SWAP_NONE, __DRI_ATTRIB_SWAP_UNDEFINED, @@ -183,7 +189,10 @@ dri_fill_in_modes(struct dri_screen *screen) depth_buffer_factor = 1; } + allow_rgba_ordering = dri_loader_get_cap(screen, DRI_LOADER_CAP_RGBA_ORDERING); allow_rgb10 = driQueryOptionb(&screen->dev->option_cache, "allow_rgb10_configs"); + allow_fp16 = driQueryOptionb(&screen->dev->option_cache, "allow_fp16_configs"); + allow_fp16 &= dri_loader_get_cap(screen, DRI_LOADER_CAP_FP16); msaa_samples_max = (screen->st_api->feature_mask & ST_API_FEATURE_MS_VISUALS_MASK) ? MSAA_VISUAL_MAX_SAMPLES : 1; @@ -231,19 +240,18 @@ dri_fill_in_modes(struct dri_screen *screen) assert(ARRAY_SIZE(mesa_formats) == ARRAY_SIZE(pipe_formats)); - /* Expose only BGRA ordering if the loader doesn't support RGBA ordering. */ - unsigned num_formats; - if (dri_loader_get_cap(screen, DRI_LOADER_CAP_RGBA_ORDERING)) - num_formats = ARRAY_SIZE(mesa_formats); - else - num_formats = ARRAY_SIZE(mesa_formats) - 2; /* all - RGBA_ORDERING formats */ - /* Add configs. */ - for (format = 0; format < num_formats; format++) { + for (format = 0; format < ARRAY_SIZE(mesa_formats); format++) { __DRIconfig **new_configs = NULL; unsigned num_msaa_modes = 0; /* includes a single-sample mode */ uint8_t msaa_modes[MSAA_VISUAL_MAX_SAMPLES]; + /* Expose only BGRA ordering if the loader doesn't support RGBA ordering. */ + if (!allow_rgba_ordering && + (mesa_formats[format] == MESA_FORMAT_R8G8B8A8_UNORM || + mesa_formats[format] == MESA_FORMAT_R8G8B8X8_UNORM)) + continue; + if (!allow_rgb10 && (mesa_formats[format] == MESA_FORMAT_B10G10R10A2_UNORM || mesa_formats[format] == MESA_FORMAT_B10G10R10X2_UNORM || @@ -251,6 +259,11 @@ dri_fill_in_modes(struct dri_screen *screen) mesa_formats[format] == MESA_FORMAT_R10G10B10X2_UNORM)) continue; + if (!allow_fp16 && + (mesa_formats[format] == MESA_FORMAT_RGBA_FLOAT16 || + mesa_formats[format] == MESA_FORMAT_RGBX_FLOAT16)) + continue; + if (!p_screen->is_format_supported(p_screen, pipe_formats[format], PIPE_TEXTURE_2D, 0, 0, PIPE_BIND_RENDER_TARGET | @@ -315,6 +328,17 @@ dri_fill_st_visual(struct st_visual *stvis, /* Deduce the color format. */ switch (mode->redMask) { + case 0: + /* Formats > 32 bpp */ + assert(mode->floatMode); + if (mode->alphaShift > -1) { + assert(mode->alphaShift == 48); + stvis->color_format = PIPE_FORMAT_R16G16B16A16_FLOAT; + } else { + stvis->color_format = PIPE_FORMAT_R16G16B16X16_FLOAT; + } + break; + case 0x3FF00000: if (mode->alphaMask) { assert(mode->alphaMask == 0xC0000000); diff --git a/src/gallium/state_trackers/va/postproc.c b/src/gallium/state_trackers/va/postproc.c index fbc55b7714b..3431b1b48c7 100644 --- a/src/gallium/state_trackers/va/postproc.c +++ b/src/gallium/state_trackers/va/postproc.c @@ -222,7 +222,7 @@ static VAStatus vlVaPostProcBlit(vlVaDriver *drv, vlVaContext *context, blit.filter = PIPE_TEX_MIPFILTER_LINEAR; if (drv->pipe->screen->get_param(drv->pipe->screen, - PIPE_CAP_PREFER_COMPUTE_BLIT_FOR_MULTIMEDIA)) + PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA)) util_compute_blit(drv->pipe, &blit, &context->blit_cs); else drv->pipe->blit(drv->pipe, &blit); diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c index b0b9bb12f2c..657c619ac42 100644 --- a/src/gallium/targets/d3dadapter9/drm.c +++ b/src/gallium/targets/d3dadapter9/drm.c @@ -243,8 +243,10 @@ drm_create_adapter( int fd, return D3DERR_DRIVERINTERNALERROR; } - ctx->base.throttling_value = - ctx->base.hal->get_param(ctx->base.hal, PIPE_CAP_MAX_FRAMES_IN_FLIGHT); + /* Previously was set to PIPE_CAP_MAX_FRAMES_IN_FLIGHT, + * but the change of value of this cap to 1 seems to cause + * regressions. */ + ctx->base.throttling_value = 2; ctx->base.throttling = ctx->base.throttling_value > 0; driParseOptionInfo(&defaultInitOptions, __driConfigOptionsNine); diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index 6134251b5ca..10b27a80ef3 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -53,6 +53,10 @@ LOCAL_SHARED_LIBRARIES += \ libexpat endif +LOCAL_STATIC_LIBRARIES += \ + libfreedreno_drm \ + libfreedreno_ir3 + ifeq ($(USE_LIBBACKTRACE),true) LOCAL_SHARED_LIBRARIES += libbacktrace endif diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build index dd40969a166..45daf647960 100644 --- a/src/gallium/targets/dri/meson.build +++ b/src/gallium/targets/dri/meson.build @@ -78,8 +78,8 @@ foreach d : [[with_gallium_kmsro, [ 'pl111_dri.so', 'repaper_dri.so', 'rockchip_dri.so', - 'st7586.so', - 'st7735r.so', + 'st7586_dri.so', + 'st7735r_dri.so', 'sun4i-drm_dri.so', ]], [with_gallium_radeonsi, 'radeonsi_dri.so'], diff --git a/src/gallium/targets/osmesa/meson.build b/src/gallium/targets/osmesa/meson.build index b4ae8f4b6ec..e873e311aa0 100644 --- a/src/gallium/targets/osmesa/meson.build +++ b/src/gallium/targets/osmesa/meson.build @@ -43,9 +43,9 @@ libosmesa = shared_library( inc_gallium_drivers, ], link_depends : osmesa_link_deps, - link_whole : [libosmesa_st], + link_whole : [libosmesa_st, libglapi_static], link_with : [ - libmesa_gallium, libgallium, libglapi_static, libws_null, osmesa_link_with, + libmesa_gallium, libgallium, libws_null, osmesa_link_with, ], dependencies : [ dep_selinux, dep_thread, dep_clock, dep_unwind, diff --git a/src/gallium/tests/trivial/meson.build b/src/gallium/tests/trivial/meson.build index bbb25519e12..1f912d5aa46 100644 --- a/src/gallium/tests/trivial/meson.build +++ b/src/gallium/tests/trivial/meson.build @@ -24,6 +24,7 @@ foreach t : ['compute', 'tri', 'quad-tex'] '@0@.c'.format(t), include_directories : inc_common, link_with : [libmesa_util, libgallium, libpipe_loader_dynamic], + dependencies : dep_thread, install : false, ) endforeach diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 4a2377f7e09..972030eaaa8 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -378,7 +378,8 @@ static bool amdgpu_cs_has_user_fence(struct amdgpu_cs_context *cs) cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE && cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD_ENC && cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC && - cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC; + cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC && + cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_JPEG; } static bool amdgpu_cs_has_chaining(struct amdgpu_cs *cs) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 490c246d6e0..2e0e79a3969 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -752,7 +752,9 @@ radeon_cs_create_fence(struct radeon_cmdbuf *rcs) /* Create a fence, which is a dummy BO. */ fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, - RADEON_DOMAIN_GTT, RADEON_FLAG_NO_SUBALLOC); + RADEON_DOMAIN_GTT, + RADEON_FLAG_NO_SUBALLOC + | RADEON_FLAG_NO_INTERPROCESS_SHARING); if (!fence) return NULL; diff --git a/src/gallium/winsys/svga/drm/vmw_msg.c b/src/gallium/winsys/svga/drm/vmw_msg.c index 8cce2241f36..3e8ed2a0fb5 100644 --- a/src/gallium/winsys/svga/drm/vmw_msg.c +++ b/src/gallium/winsys/svga/drm/vmw_msg.c @@ -177,17 +177,23 @@ typedef uint64_t VMW_REG; typedef uint32_t VMW_REG; -/* In the 32-bit version of this macro, we use "m" because there is no - * more register left for bp +/* In the 32-bit version of this macro, we store bp in a memory location + * because we've ran out of registers. + * Now we can't reference that memory location while we've modified + * %esp or %ebp, so we first push it on the stack, just before we push + * %ebp, and then when we need it we read it from the stack where we + * just pushed it. */ #define VMW_PORT_HB_OUT(cmd, in_cx, in_si, in_di, \ port_num, magic, bp, \ ax, bx, cx, dx, si, di) \ ({ \ - __asm__ volatile ("push %%ebp;" \ - "mov %12, %%ebp;" \ + __asm__ volatile ("push %12;" \ + "push %%ebp;" \ + "mov 0x04(%%esp), %%ebp;" \ "rep outsb;" \ - "pop %%ebp;" : \ + "pop %%ebp;" \ + "add $0x04, %%esp;" : \ "=a"(ax), \ "=b"(bx), \ "=c"(cx), \ @@ -209,10 +215,12 @@ typedef uint32_t VMW_REG; port_num, magic, bp, \ ax, bx, cx, dx, si, di) \ ({ \ - __asm__ volatile ("push %%ebp;" \ - "mov %12, %%ebp;" \ + __asm__ volatile ("push %12;" \ + "push %%ebp;" \ + "mov 0x04(%%esp), %%ebp;" \ "rep insb;" \ - "pop %%ebp" : \ + "pop %%ebp;" \ + "add $0x04, %%esp;" : \ "=a"(ax), \ "=b"(bx), \ "=c"(cx), \ @@ -418,6 +426,7 @@ vmw_svga_winsys_host_log(struct svga_winsys_screen *sws, const char *log) struct rpc_channel channel; char *msg; int msg_len; + int ret; #ifdef MSG_NOT_IMPLEMENTED return; @@ -435,12 +444,14 @@ vmw_svga_winsys_host_log(struct svga_winsys_screen *sws, const char *log) util_sprintf(msg, "log %s", log); - if (vmw_open_channel(&channel, RPCI_PROTOCOL_NUM) || - vmw_send_msg(&channel, msg) || - vmw_close_channel(&channel)) { - debug_printf("Failed to send log\n"); + if (!(ret = vmw_open_channel(&channel, RPCI_PROTOCOL_NUM))) { + ret = vmw_send_msg(&channel, msg); + vmw_close_channel(&channel); } + if (ret) + debug_printf("Failed to send log\n"); + FREE(msg); return; diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c index 3b14f1d3513..e27d51013fb 100644 --- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c +++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c @@ -210,6 +210,10 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws, SVGA3dMSQualityLevel qualityLevel, struct vmw_region **p_region) { + union { + union drm_vmw_gb_surface_create_ext_arg ext_arg; + union drm_vmw_gb_surface_create_arg arg; + } s_arg; struct drm_vmw_gb_surface_create_rep *rep; struct vmw_region *region = NULL; int ret; @@ -222,12 +226,11 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws, return SVGA3D_INVALID_ID; } - if (vws->ioctl.have_drm_2_15) { - union drm_vmw_gb_surface_create_ext_arg s_arg; - struct drm_vmw_gb_surface_create_ext_req *req = &s_arg.req; - rep = &s_arg.rep; + memset(&s_arg, 0, sizeof(s_arg)); - memset(&s_arg, 0, sizeof(s_arg)); + if (vws->ioctl.have_drm_2_15) { + struct drm_vmw_gb_surface_create_ext_req *req = &s_arg.ext_arg.req; + rep = &s_arg.ext_arg.rep; req->version = drm_vmw_gb_surface_v1; req->multisample_pattern = multisamplePattern; @@ -264,17 +267,15 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws, buffer_handle : SVGA3D_INVALID_ID; ret = drmCommandWriteRead(vws->ioctl.drm_fd, - DRM_VMW_GB_SURFACE_CREATE_EXT, &s_arg, - sizeof(s_arg)); + DRM_VMW_GB_SURFACE_CREATE_EXT, &s_arg.ext_arg, + sizeof(s_arg.ext_arg)); if (ret) goto out_fail_create; } else { - union drm_vmw_gb_surface_create_arg s_arg; - struct drm_vmw_gb_surface_create_req *req = &s_arg.req; - rep = &s_arg.rep; + struct drm_vmw_gb_surface_create_req *req = &s_arg.arg.req; + rep = &s_arg.arg.rep; - memset(&s_arg, 0, sizeof(s_arg)); req->svga3d_flags = (uint32_t) flags; req->format = (uint32_t) format; @@ -305,7 +306,7 @@ vmw_ioctl_gb_surface_create(struct vmw_winsys_screen *vws, buffer_handle : SVGA3D_INVALID_ID; ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GB_SURFACE_CREATE, - &s_arg, sizeof(s_arg)); + &s_arg.arg, sizeof(s_arg.arg)); if (ret) goto out_fail_create; diff --git a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c index 6401f55bbd3..d9b417dc4da 100644 --- a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c +++ b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c @@ -183,7 +183,7 @@ kms_sw_displaytarget_create(struct sw_winsys *ws, kms_sw_dt->format = format; memset(&create_req, 0, sizeof(create_req)); - create_req.bpp = 32; + create_req.bpp = util_format_get_blocksizebits(format); create_req.width = width; create_req.height = height; ret = drmIoctl(kms_sw->fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_req); diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c index 58005df5595..42a58a6dcc1 100644 --- a/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c +++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_socket.c @@ -125,7 +125,7 @@ static int virgl_vtest_send_init(struct virgl_vtest_winsys *vws) ret = os_get_process_name(cmdline, 63); if (ret == FALSE) strcpy(cmdline, nstr); -#if defined(__GLIBC__) || defined(__CYGWIN__) +#if defined(HAVE_PROGRAM_INVOCATION_NAME) if (!strcmp(cmdline, "shader_runner")) { const char *name; /* hack to get better testname */ diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c index 99396f658f2..a5586b107ad 100644 --- a/src/gbm/backends/dri/gbm_dri.c +++ b/src/gbm/backends/dri/gbm_dri.c @@ -110,6 +110,18 @@ dri_get_buffers_with_format(__DRIdrawable * driDrawable, count, out_count, surf->dri_private); } +static unsigned +dri_get_capability(void *loaderPrivate, enum dri_loader_cap cap) +{ + /* Note: loaderPrivate is _EGLDisplay* */ + switch (cap) { + case DRI_LOADER_CAP_FP16: + return 1; + default: + return 0; + } +} + static int image_get_buffers(__DRIdrawable *driDrawable, unsigned int format, @@ -207,11 +219,12 @@ static const __DRIimageLookupExtension image_lookup_extension = { }; static const __DRIdri2LoaderExtension dri2_loader_extension = { - .base = { __DRI_DRI2_LOADER, 3 }, + .base = { __DRI_DRI2_LOADER, 4 }, .getBuffers = dri_get_buffers, .flushFrontBuffer = dri_flush_front_buffer, .getBuffersWithFormat = dri_get_buffers_with_format, + .getCapability = dri_get_capability, }; static const __DRIimageLoaderExtension image_loader_extension = { @@ -478,51 +491,75 @@ dri_screen_create_sw(struct gbm_dri_device *dri) static const struct gbm_dri_visual gbm_dri_visuals_table[] = { { GBM_FORMAT_R8, __DRI_IMAGE_FORMAT_R8, - { 0x000000ff, 0x00000000, 0x00000000, 0x00000000 }, + { 0, -1, -1, -1 }, + { 8, 0, 0, 0 }, }, { GBM_FORMAT_GR88, __DRI_IMAGE_FORMAT_GR88, - { 0x000000ff, 0x0000ff00, 0x00000000, 0x00000000 }, + { 0, 8, -1, -1 }, + { 8, 8, 0, 0 }, }, { GBM_FORMAT_ARGB1555, __DRI_IMAGE_FORMAT_ARGB1555, - { 0x00007c00, 0x000003e0, 0x0000001f, 0x00008000 }, + { 10, 5, 0, 11 }, + { 5, 5, 5, 1 }, }, { GBM_FORMAT_RGB565, __DRI_IMAGE_FORMAT_RGB565, - { 0x0000f800, 0x000007e0, 0x0000001f, 0x00000000 }, + { 11, 5, 0, -1 }, + { 5, 6, 5, 0 }, }, { GBM_FORMAT_XRGB8888, __DRI_IMAGE_FORMAT_XRGB8888, - { 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000 }, + { 16, 8, 0, -1 }, + { 8, 8, 8, 0 }, }, { GBM_FORMAT_ARGB8888, __DRI_IMAGE_FORMAT_ARGB8888, - { 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 }, + { 16, 8, 0, 24 }, + { 8, 8, 8, 8 }, }, { GBM_FORMAT_XBGR8888, __DRI_IMAGE_FORMAT_XBGR8888, - { 0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000 }, + { 0, 8, 16, -1 }, + { 8, 8, 8, 0 }, }, { GBM_FORMAT_ABGR8888, __DRI_IMAGE_FORMAT_ABGR8888, - { 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 }, + { 0, 8, 16, 24 }, + { 8, 8, 8, 8 }, }, { GBM_FORMAT_XRGB2101010, __DRI_IMAGE_FORMAT_XRGB2101010, - { 0x3ff00000, 0x000ffc00, 0x000003ff, 0x00000000 }, + { 20, 10, 0, -1 }, + { 10, 10, 10, 0 }, }, { GBM_FORMAT_ARGB2101010, __DRI_IMAGE_FORMAT_ARGB2101010, - { 0x3ff00000, 0x000ffc00, 0x000003ff, 0xc0000000 }, + { 20, 10, 0, 30 }, + { 10, 10, 10, 2 }, }, { GBM_FORMAT_XBGR2101010, __DRI_IMAGE_FORMAT_XBGR2101010, - { 0x000003ff, 0x000ffc00, 0x3ff00000, 0x00000000 }, + { 0, 10, 20, -1 }, + { 10, 10, 10, 0 }, }, { GBM_FORMAT_ABGR2101010, __DRI_IMAGE_FORMAT_ABGR2101010, - { 0x000003ff, 0x000ffc00, 0x3ff00000, 0xc0000000 }, + { 0, 10, 20, 30 }, + { 10, 10, 10, 2 }, + }, + { + GBM_FORMAT_XBGR16161616F, __DRI_IMAGE_FORMAT_XBGR16161616F, + { 0, 16, 32, -1 }, + { 16, 16, 16, 0 }, + true, + }, + { + GBM_FORMAT_ABGR16161616F, __DRI_IMAGE_FORMAT_ABGR16161616F, + { 0, 16, 32, 48 }, + { 16, 16, 16, 16 }, + true, }, }; diff --git a/src/gbm/backends/dri/gbm_driint.h b/src/gbm/backends/dri/gbm_driint.h index 8497be3e8f6..a8bfa39e522 100644 --- a/src/gbm/backends/dri/gbm_driint.h +++ b/src/gbm/backends/dri/gbm_driint.h @@ -44,11 +44,18 @@ struct gbm_dri_visual { uint32_t gbm_format; int dri_image_format; struct { - uint32_t red; - uint32_t green; - uint32_t blue; - uint32_t alpha; - } rgba_masks; + int red; + int green; + int blue; + int alpha; + } rgba_shifts; + struct { + unsigned int red; + unsigned int green; + unsigned int blue; + unsigned int alpha; + } rgba_sizes; + bool is_float; }; struct gbm_dri_device { diff --git a/src/gbm/main/gbm.c b/src/gbm/main/gbm.c index 38480ca966c..ceeed5082e9 100644 --- a/src/gbm/main/gbm.c +++ b/src/gbm/main/gbm.c @@ -271,6 +271,9 @@ gbm_bo_get_bpp(struct gbm_bo *bo) case GBM_FORMAT_RGBA1010102: case GBM_FORMAT_BGRA1010102: return 32; + case GBM_FORMAT_XBGR16161616F: + case GBM_FORMAT_ABGR16161616F: + return 64; } } diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h index 9b5288710a5..4c6ab377699 100644 --- a/src/gbm/main/gbm.h +++ b/src/gbm/main/gbm.h @@ -150,6 +150,15 @@ enum gbm_bo_format { #define GBM_FORMAT_RGBA1010102 __gbm_fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */ #define GBM_FORMAT_BGRA1010102 __gbm_fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */ +/* + * Floating point 64bpp RGB + * IEEE 754-2008 binary16 half-precision float + * [15:0] sign:exponent:mantissa 1:5:10 + */ +#define GBM_FORMAT_XBGR16161616F __gbm_fourcc_code('X', 'B', '4', 'H') /* [63:0] x:B:G:R 16:16:16:16 little endian */ + +#define GBM_FORMAT_ABGR16161616F __gbm_fourcc_code('A', 'B', '4', 'H') /* [63:0] A:B:G:R 16:16:16:16 little endian */ + /* packed YCbCr */ #define GBM_FORMAT_YUYV __gbm_fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */ #define GBM_FORMAT_YVYU __gbm_fourcc_code('Y', 'V', 'Y', 'U') /* [31:0] Cb0:Y1:Cr0:Y0 8:8:8:8 little endian */ diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c index 2db29fd6abb..fc34d85d54a 100644 --- a/src/glx/dri_common.c +++ b/src/glx/dri_common.c @@ -166,6 +166,10 @@ static const struct __ATTRIB(__DRI_ATTRIB_GREEN_MASK, greenMask), __ATTRIB(__DRI_ATTRIB_BLUE_MASK, blueMask), __ATTRIB(__DRI_ATTRIB_ALPHA_MASK, alphaMask), + __ATTRIB(__DRI_ATTRIB_RED_SHIFT, redShift), + __ATTRIB(__DRI_ATTRIB_GREEN_SHIFT, greenShift), + __ATTRIB(__DRI_ATTRIB_BLUE_SHIFT, blueShift), + __ATTRIB(__DRI_ATTRIB_ALPHA_SHIFT, alphaShift), #endif __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_WIDTH, maxPbufferWidth), __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_HEIGHT, maxPbufferHeight), diff --git a/src/glx/g_glxglvnddispatchfuncs.c b/src/glx/g_glxglvnddispatchfuncs.c index 5b65afc8602..cec52c554e6 100644 --- a/src/glx/g_glxglvnddispatchfuncs.c +++ b/src/glx/g_glxglvnddispatchfuncs.c @@ -128,7 +128,7 @@ static void dispatch_BindTexImageEXT(Display *dpy, GLXDrawable drawable, static GLXFBConfigSGIX *dispatch_ChooseFBConfigSGIX(Display *dpy, int screen, - const int *attrib_list, + int *attrib_list, int *nelements) { PFNGLXCHOOSEFBCONFIGSGIXPROC pChooseFBConfigSGIX; @@ -220,7 +220,7 @@ static GLXPbuffer dispatch_CreateGLXPbufferSGIX(Display *dpy, GLXFBConfig config, unsigned int width, unsigned int height, - const int *attrib_list) + int *attrib_list) { PFNGLXCREATEGLXPBUFFERSGIXPROC pCreateGLXPbufferSGIX; __GLXvendorInfo *dd; diff --git a/src/glx/glx_error.c b/src/glx/glx_error.c index 712ecf8213d..653cbeb2d2a 100644 --- a/src/glx/glx_error.c +++ b/src/glx/glx_error.c @@ -54,7 +54,7 @@ __glXSendError(Display * dpy, int_fast8_t errorCode, uint_fast32_t resourceID, error.errorCode = glx_dpy->codes->first_error + errorCode; } - error.sequenceNumber = dpy->last_request_read; + error.sequenceNumber = dpy->request; error.resourceID = resourceID; error.minorCode = minorCode; error.majorCode = glx_dpy->majorOpcode; @@ -73,7 +73,7 @@ __glXSendErrorForXcb(Display * dpy, const xcb_generic_error_t *err) error.type = X_Error; error.errorCode = err->error_code; - error.sequenceNumber = dpy->last_request_read; + error.sequenceNumber = err->sequence; error.resourceID = err->resource_id; error.minorCode = err->minor_code; error.majorCode = err->major_code; diff --git a/src/glx/glxconfig.h b/src/glx/glxconfig.h index 2f1074ca5fa..13f5a4e292c 100644 --- a/src/glx/glxconfig.h +++ b/src/glx/glxconfig.h @@ -41,6 +41,7 @@ struct glx_config { GLint redBits, greenBits, blueBits, alphaBits; /* bits per comp */ GLuint redMask, greenMask, blueMask, alphaMask; + GLuint redShift, greenShift, blueShift, alphaShift; GLint rgbBits; /* total bits for rgb */ GLint indexBits; /* total bits for colorindex */ diff --git a/src/glx/glxglvnd.c b/src/glx/glxglvnd.c index b6b415114c9..bf5c2a06b0c 100644 --- a/src/glx/glxglvnd.c +++ b/src/glx/glxglvnd.c @@ -41,7 +41,7 @@ static void *__glXGLVNDGetDispatchAddress(const GLubyte *procName) { unsigned internalIndex = FindGLXFunction(procName); - return __glXDispatchFunctions[internalIndex]; + return (void*)__glXDispatchFunctions[internalIndex]; } static void __glXGLVNDSetDispatchIndex(const GLubyte *procName, int index) diff --git a/src/glx/meson.build b/src/glx/meson.build index 0e3245a254f..d7b4d086642 100644 --- a/src/glx/meson.build +++ b/src/glx/meson.build @@ -1,4 +1,4 @@ -# Copyright © 2017 Intel Corporation +# Copyright © 2017-2019 Intel Corporation # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -70,7 +70,7 @@ extra_libs_libglx = [] extra_deps_libgl = [] extra_ld_args_libgl = [] -if with_dri +if with_glx == 'dri' files_libglx += files( 'dri_common.c', 'dri_common.h', @@ -83,7 +83,7 @@ if with_dri endif # dri2 -if with_dri and with_dri_platform == 'drm' and dep_libdrm.found() +if with_glx == 'dri' and with_dri_platform == 'drm' and dep_libdrm.found() files_libglx += files( 'dri2.c', 'dri2_glx.c', diff --git a/src/intel/Android.compiler.mk b/src/intel/Android.compiler.mk index c2b01221dfc..41af7b20b9c 100644 --- a/src/intel/Android.compiler.mk +++ b/src/intel/Android.compiler.mk @@ -28,7 +28,7 @@ # --------------------------------------- include $(CLEAR_VARS) - +LOCAL_CFLAGS += -Wno-error LOCAL_MODULE := libmesa_intel_compiler LOCAL_MODULE_CLASS := STATIC_LIBRARIES diff --git a/src/intel/Android.dev.mk b/src/intel/Android.dev.mk index 4f14b0362d7..e4a5058bbf7 100644 --- a/src/intel/Android.dev.mk +++ b/src/intel/Android.dev.mk @@ -33,5 +33,8 @@ LOCAL_C_INCLUDES := $(MESA_TOP)/include LOCAL_SRC_FILES := $(DEV_FILES) +LOCAL_CFLAGS := \ + -Wno-gnu-variable-sized-type-not-at-end + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/intel/Android.vulkan.mk b/src/intel/Android.vulkan.mk index 05922afdbe0..53e3d18e962 100644 --- a/src/intel/Android.vulkan.mk +++ b/src/intel/Android.vulkan.mk @@ -71,6 +71,7 @@ LOCAL_C_INCLUDES := \ LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_entrypoints.h LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/dummy.c +LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.h $(intermediates)/vulkan/dummy.c: @mkdir -p $(dir $@) @@ -85,6 +86,14 @@ $(intermediates)/vulkan/anv_entrypoints.h: $(intermediates)/vulkan/dummy.c \ --outdir $(dir $@) \ --xml $(VULKAN_API_XML) +$(intermediates)/vulkan/anv_extensions.h: $(ANV_ENTRYPOINTS_GEN_SCRIPT) \ + $(ANV_EXTENSIONS_SCRIPT) \ + $(VULKAN_API_XML) + @mkdir -p $(dir $@) + $(MESA_PYTHON2) $(ANV_EXTENSIONS_GEN_SCRIPT) \ + --xml $(VULKAN_API_XML) \ + --out-h $@ + LOCAL_EXPORT_C_INCLUDE_DIRS := \ $(intermediates) @@ -239,7 +248,7 @@ include $(BUILD_STATIC_LIBRARY) include $(CLEAR_VARS) LOCAL_MODULE := libmesa_vulkan_common LOCAL_MODULE_CLASS := STATIC_LIBRARIES - +LOCAL_CFLAGS += -Wno-error intermediates := $(call local-generated-sources-dir) LOCAL_SRC_FILES := $(VULKAN_FILES) @@ -261,7 +270,6 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \ LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_entrypoints.c LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.c -LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.h $(intermediates)/vulkan/anv_entrypoints.c: $(ANV_ENTRYPOINTS_GEN_SCRIPT) \ $(ANV_EXTENSIONS_SCRIPT) \ @@ -279,14 +287,6 @@ $(intermediates)/vulkan/anv_extensions.c: $(ANV_EXTENSIONS_GEN_SCRIPT) \ --xml $(VULKAN_API_XML) \ --out-c $@ -$(intermediates)/vulkan/anv_extensions.h: $(ANV_EXTENSIONS_GEN_SCRIPT) \ - $(ANV_EXTENSIONS_SCRIPT) \ - $(VULKAN_API_XML) - @mkdir -p $(dir $@) - $(MESA_PYTHON2) $(ANV_EXTENSIONS_GEN_SCRIPT) \ - --xml $(VULKAN_API_XML) \ - --out-h $@ - LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES) LOCAL_HEADER_LIBRARIES += $(VULKAN_COMMON_HEADER_LIBRARIES) diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index 9e964d02f36..222a7bcf4b2 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -2506,15 +2506,8 @@ blorp_surf_convert_to_uncompressed(const struct isl_device *isl_dev, *y /= fmtl->bh; } - info->surf.logical_level0_px.width = - DIV_ROUND_UP(info->surf.logical_level0_px.width, fmtl->bw); - info->surf.logical_level0_px.height = - DIV_ROUND_UP(info->surf.logical_level0_px.height, fmtl->bh); - - assert(info->surf.phys_level0_sa.width % fmtl->bw == 0); - assert(info->surf.phys_level0_sa.height % fmtl->bh == 0); - info->surf.phys_level0_sa.width /= fmtl->bw; - info->surf.phys_level0_sa.height /= fmtl->bh; + info->surf.logical_level0_px = isl_surf_get_logical_level0_el(&info->surf); + info->surf.phys_level0_sa = isl_surf_get_phys_level0_el(&info->surf); assert(info->tile_x_sa % fmtl->bw == 0); assert(info->tile_y_sa % fmtl->bh == 0); diff --git a/src/intel/compiler/brw_cfg.cpp b/src/intel/compiler/brw_cfg.cpp index 600b428a492..6c40889088d 100644 --- a/src/intel/compiler/brw_cfg.cpp +++ b/src/intel/compiler/brw_cfg.cpp @@ -128,9 +128,6 @@ void bblock_t::combine_with(bblock_t *that) { assert(this->can_combine_with(that)); - foreach_list_typed (bblock_link, link, link, &this->children) { - assert(link->block == that); - } foreach_list_typed (bblock_link, link, link, &that->parents) { assert(link->block == this); } diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 44296083711..0c18efa67f8 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -141,7 +141,8 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo) nir_lower_ineg64 | nir_lower_logic64 | nir_lower_minmax64 | - nir_lower_shift64; + nir_lower_shift64 | + nir_lower_extract64; fp64_options |= nir_lower_fp64_full_software; } @@ -187,6 +188,8 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo) /* Prior to Gen6, there are no three source operations. */ nir_options->lower_ffma = devinfo->gen < 6; + nir_options->lower_bitfield_reverse = devinfo->gen < 7; + nir_options->lower_int64_options = int64_options; nir_options->lower_doubles_options = fp64_options; compiler->glsl_compiler_options[i].NirOptions = nir_options; diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index d8532a0ff6e..29965e60a7f 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -1113,7 +1113,9 @@ brw_untyped_surface_write(struct brw_codegen *p, void brw_memory_fence(struct brw_codegen *p, struct brw_reg dst, - enum opcode send_op); + struct brw_reg src, + enum opcode send_op, + bool stall); void brw_pixel_interpolator_query(struct brw_codegen *p, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index a271621393d..ad209a5a535 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -707,9 +707,9 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, gen7_convert_mrf_to_grf(p, &dest); assert(dest.nr < 128); - assert(src0.file != BRW_IMMEDIATE_VALUE || src0.nr < 128); - assert(src1.file != BRW_IMMEDIATE_VALUE || src1.nr < 128); - assert(src2.file != BRW_IMMEDIATE_VALUE || src2.nr < 128); + assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128); + assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128); + assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128); assert(dest.address_mode == BRW_ADDRESS_DIRECT); assert(src0.address_mode == BRW_ADDRESS_DIRECT); assert(src1.address_mode == BRW_ADDRESS_DIRECT); @@ -2523,8 +2523,8 @@ brw_send_indirect_message(struct brw_codegen *p, if (desc.file == BRW_IMMEDIATE_VALUE) { send = next_insn(p, BRW_OPCODE_SEND); + brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_set_desc(p, send, desc.ud | desc_imm); - } else { struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); @@ -2543,11 +2543,11 @@ brw_send_indirect_message(struct brw_codegen *p, brw_pop_insn_state(p); send = next_insn(p, BRW_OPCODE_SEND); + brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_set_src1(p, send, addr); } brw_set_dest(p, send, dst); - brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_inst_set_sfid(devinfo, send, sfid); brw_inst_set_eot(devinfo, send, eot); } @@ -3037,10 +3037,12 @@ brw_set_memory_fence_message(struct brw_codegen *p, void brw_memory_fence(struct brw_codegen *p, struct brw_reg dst, - enum opcode send_op) + struct brw_reg src, + enum opcode send_op, + bool stall) { const struct gen_device_info *devinfo = p->devinfo; - const bool commit_enable = + const bool commit_enable = stall || devinfo->gen >= 10 || /* HSD ES # 1404612949 */ (devinfo->gen == 7 && !devinfo->is_haswell); struct brw_inst *insn; @@ -3048,15 +3050,15 @@ brw_memory_fence(struct brw_codegen *p, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_exec_size(p, BRW_EXECUTE_1); - dst = vec1(dst); + dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW); + src = retype(vec1(src), BRW_REGISTER_TYPE_UD); /* Set dst as destination for dependency tracking, the MEMORY_FENCE * message doesn't write anything back. */ insn = next_insn(p, send_op); - dst = retype(dst, BRW_REGISTER_TYPE_UW); brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, dst); + brw_set_src0(p, insn, src); brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, commit_enable); @@ -3067,7 +3069,7 @@ brw_memory_fence(struct brw_codegen *p, */ insn = next_insn(p, send_op); brw_set_dest(p, insn, offset(dst, 1)); - brw_set_src0(p, insn, offset(dst, 1)); + brw_set_src0(p, insn, src); brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, commit_enable); @@ -3079,6 +3081,9 @@ brw_memory_fence(struct brw_codegen *p, brw_MOV(p, dst, offset(dst, 1)); } + if (stall) + brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst); + brw_pop_insn_state(p); } diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index 943f724e60f..203280570aa 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -289,6 +289,18 @@ sources_not_null(const struct gen_device_info *devinfo, return error_msg; } +static struct string +alignment_supported(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + struct string error_msg = { .str = NULL, .len = 0 }; + + ERROR_IF(devinfo->gen >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16, + "Align16 not supported"); + + return error_msg; +} + static bool inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst) { @@ -600,17 +612,31 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); struct string error_msg = { .str = NULL, .len = 0 }; + if (devinfo->gen >= 11) { + if (num_sources == 3) { + ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 || + brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1, + "Byte data type is not supported for src1/2 register regioning. This includes " + "byte broadcast as well."); + } + if (num_sources == 2) { + ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1, + "Byte data type is not supported for src1 register regioning. This includes " + "byte broadcast as well."); + } + } + if (num_sources == 3) - return (struct string){}; + return error_msg; if (inst_is_send(devinfo, inst)) - return (struct string){}; + return error_msg; if (exec_size == 1) - return (struct string){}; + return error_msg; if (desc->ndst == 0) - return (struct string){}; + return error_msg; /* The PRMs say: * @@ -635,12 +661,9 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf if (dst_type_is_byte) { if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) { - if (!inst_is_raw_move(devinfo, inst)) { + if (!inst_is_raw_move(devinfo, inst)) ERROR("Only raw MOV supports a packed-byte destination"); - return error_msg; - } else { - return (struct string){}; - } + return error_msg; } } @@ -1823,6 +1846,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo, } else { CHECK(sources_not_null); CHECK(send_restrictions); + CHECK(alignment_supported); CHECK(general_restrictions_based_on_operand_types); CHECK(general_restrictions_on_region_parameters); CHECK(special_restrictions_for_mixed_float_mode); diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 335eaa0e934..f7e37d57b22 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1890,8 +1890,8 @@ fs_visitor::split_virtual_grfs() * destination), we mark the used slots as inseparable. Then we go * through and split the registers into the smallest pieces we can. */ - bool split_points[reg_count]; - memset(split_points, 0, sizeof(split_points)); + bool *split_points = new bool[reg_count]; + memset(split_points, 0, reg_count * sizeof(*split_points)); /* Mark all used registers as fully splittable */ foreach_block_and_inst(block, fs_inst, inst, cfg) { @@ -1925,8 +1925,8 @@ fs_visitor::split_virtual_grfs() } } - int new_virtual_grf[reg_count]; - int new_reg_offset[reg_count]; + int *new_virtual_grf = new int[reg_count]; + int *new_reg_offset = new int[reg_count]; int reg = 0; for (int i = 0; i < num_vars; i++) { @@ -1982,6 +1982,10 @@ fs_visitor::split_virtual_grfs() } } invalidate_live_intervals(); + + delete[] split_points; + delete[] new_virtual_grf; + delete[] new_reg_offset; } /** @@ -1997,8 +2001,8 @@ bool fs_visitor::compact_virtual_grfs() { bool progress = false; - int remap_table[this->alloc.count]; - memset(remap_table, -1, sizeof(remap_table)); + int *remap_table = new int[this->alloc.count]; + memset(remap_table, -1, this->alloc.count * sizeof(int)); /* Mark which virtual GRFs are used. */ foreach_block_and_inst(block, const fs_inst, inst, cfg) { @@ -2054,6 +2058,8 @@ fs_visitor::compact_virtual_grfs() } } + delete[] remap_table; + return progress; } @@ -6124,9 +6130,6 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, case FS_OPCODE_LINTERP: case SHADER_OPCODE_GET_BUFFER_SIZE: - case FS_OPCODE_DDX_COARSE: - case FS_OPCODE_DDX_FINE: - case FS_OPCODE_DDY_COARSE: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case FS_OPCODE_PACK_HALF_2x16_SPLIT: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: @@ -6143,6 +6146,9 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, */ return (devinfo->gen == 4 ? 16 : MIN2(16, inst->exec_size)); + case FS_OPCODE_DDX_COARSE: + case FS_OPCODE_DDX_FINE: + case FS_OPCODE_DDY_COARSE: case FS_OPCODE_DDY_FINE: /* The implementation of this virtual opcode may require emitting * compressed Align16 instructions, which are severely limited on some @@ -8303,7 +8309,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, if (!v32->run_cs(min_dispatch_width)) { compiler->shader_perf_log(log_data, "SIMD32 shader failed to compile: %s", - v16->fail_msg); + v32->fail_msg); if (!cfg) { fail_msg = "Couldn't generate SIMD32 program and not " diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index a69e3c6ae80..b7e8de85405 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -322,10 +322,11 @@ namespace brw { case SHADER_OPCODE_INT_REMAINDER: return emit(instruction(opcode, dispatch_width(), dst, fix_math_operand(src0), - fix_math_operand(src1))); + fix_math_operand(fix_byte_src(src1)))); default: - return emit(instruction(opcode, dispatch_width(), dst, src0, src1)); + return emit(instruction(opcode, dispatch_width(), dst, + src0, fix_byte_src(src1))); } } @@ -344,12 +345,12 @@ namespace brw { case BRW_OPCODE_LRP: return emit(instruction(opcode, dispatch_width(), dst, fix_3src_operand(src0), - fix_3src_operand(src1), - fix_3src_operand(src2))); + fix_3src_operand(fix_byte_src(src1)), + fix_3src_operand(fix_byte_src(src2)))); default: return emit(instruction(opcode, dispatch_width(), dst, - src0, src1, src2)); + src0, fix_byte_src(src1), fix_byte_src(src2))); } } @@ -399,8 +400,11 @@ namespace brw { { assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); - return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), - fix_unsigned_negate(src1))); + /* In some cases we can't have bytes as operand for src1, so use the + * same type for both operand. + */ + return set_condmod(mod, SEL(dst, fix_unsigned_negate(fix_byte_src(src0)), + fix_unsigned_negate(fix_byte_src(src1)))); } /** @@ -657,8 +661,8 @@ namespace brw { emit(BRW_OPCODE_CSEL, retype(dst, BRW_REGISTER_TYPE_F), retype(src0, BRW_REGISTER_TYPE_F), - retype(src1, BRW_REGISTER_TYPE_F), - src2)); + retype(fix_byte_src(src1), BRW_REGISTER_TYPE_F), + fix_byte_src(src2))); } /** @@ -708,6 +712,22 @@ namespace brw { backend_shader *shader; + /** + * Byte sized operands are not supported for src1 on Gen11+. + */ + src_reg + fix_byte_src(const src_reg &src) const + { + if ((shader->devinfo->gen < 11 && !shader->devinfo->is_geminilake) || + type_sz(src.type) != 1) + return src; + + dst_reg temp = vgrf(src.type == BRW_REGISTER_TYPE_UB ? + BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_D); + MOV(temp, src); + return src_reg(temp); + } + private: /** * Workaround for negation of UD registers. See comment in diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp index 8d16be4c4bb..c12e0d62293 100644 --- a/src/intel/compiler/brw_fs_combine_constants.cpp +++ b/src/intel/compiler/brw_fs_combine_constants.cpp @@ -289,7 +289,7 @@ get_alignment_for_imm(const struct imm *imm) } static bool -needs_negate(const struct fs_reg *reg, const struct imm *imm) +needs_negate(const fs_reg *reg, const struct imm *imm) { switch (reg->type) { case BRW_REGISTER_TYPE_DF: diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index af8350aed6c..67740c783f1 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1206,27 +1206,50 @@ fs_generator::generate_ddx(const fs_inst *inst, { unsigned vstride, width; - if (inst->opcode == FS_OPCODE_DDX_FINE) { - /* produce accurate derivatives */ - vstride = BRW_VERTICAL_STRIDE_2; - width = BRW_WIDTH_2; - } else { - /* replicate the derivative at the top-left pixel to other pixels */ - vstride = BRW_VERTICAL_STRIDE_4; - width = BRW_WIDTH_4; - } + if (devinfo->gen >= 8) { + if (inst->opcode == FS_OPCODE_DDX_FINE) { + /* produce accurate derivatives */ + vstride = BRW_VERTICAL_STRIDE_2; + width = BRW_WIDTH_2; + } else { + /* replicate the derivative at the top-left pixel to other pixels */ + vstride = BRW_VERTICAL_STRIDE_4; + width = BRW_WIDTH_4; + } + + struct brw_reg src0 = byte_offset(src, type_sz(src.type));; + struct brw_reg src1 = src; - struct brw_reg src0 = byte_offset(src, type_sz(src.type));; - struct brw_reg src1 = src; + src0.vstride = vstride; + src0.width = width; + src0.hstride = BRW_HORIZONTAL_STRIDE_0; + src1.vstride = vstride; + src1.width = width; + src1.hstride = BRW_HORIZONTAL_STRIDE_0; - src0.vstride = vstride; - src0.width = width; - src0.hstride = BRW_HORIZONTAL_STRIDE_0; - src1.vstride = vstride; - src1.width = width; - src1.hstride = BRW_HORIZONTAL_STRIDE_0; + brw_ADD(p, dst, src0, negate(src1)); + } else { + /* On Haswell and earlier, the region used above appears to not work + * correctly for compressed instructions. At least on Haswell and + * Iron Lake, compressed ALIGN16 instructions do work. Since we + * would have to split to SIMD8 no matter which method we choose, we + * may as well use ALIGN16 on all platforms gen7 and earlier. + */ + struct brw_reg src0 = stride(src, 4, 4, 1); + struct brw_reg src1 = stride(src, 4, 4, 1); + if (inst->opcode == FS_OPCODE_DDX_FINE) { + src0.swizzle = BRW_SWIZZLE_XXZZ; + src1.swizzle = BRW_SWIZZLE_YYWW; + } else { + src0.swizzle = BRW_SWIZZLE_XXXX; + src1.swizzle = BRW_SWIZZLE_YYYY; + } - brw_ADD(p, dst, src0, negate(src1)); + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, dst, negate(src0), src1); + brw_pop_insn_state(p); + } } /* The negate_value boolean is used to negate the derivative computation for @@ -1256,31 +1279,15 @@ fs_generator::generate_ddy(const fs_inst *inst, if (devinfo->gen >= 11 || (devinfo->is_broadwell && src.type == BRW_REGISTER_TYPE_HF)) { src = stride(src, 0, 2, 1); - struct brw_reg src_0 = byte_offset(src, 0 * type_size); - struct brw_reg src_2 = byte_offset(src, 2 * type_size); - struct brw_reg src_4 = byte_offset(src, 4 * type_size); - struct brw_reg src_6 = byte_offset(src, 6 * type_size); - struct brw_reg src_8 = byte_offset(src, 8 * type_size); - struct brw_reg src_10 = byte_offset(src, 10 * type_size); - struct brw_reg src_12 = byte_offset(src, 12 * type_size); - struct brw_reg src_14 = byte_offset(src, 14 * type_size); - - struct brw_reg dst_0 = byte_offset(dst, 0 * type_size); - struct brw_reg dst_4 = byte_offset(dst, 4 * type_size); - struct brw_reg dst_8 = byte_offset(dst, 8 * type_size); - struct brw_reg dst_12 = byte_offset(dst, 12 * type_size); brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_4); - - brw_ADD(p, dst_0, negate(src_0), src_2); - brw_ADD(p, dst_4, negate(src_4), src_6); - - if (inst->exec_size == 16) { - brw_ADD(p, dst_8, negate(src_8), src_10); - brw_ADD(p, dst_12, negate(src_12), src_14); + for (uint32_t g = 0; g < inst->exec_size; g += 4) { + brw_set_default_group(p, inst->group + g); + brw_ADD(p, byte_offset(dst, g * type_size), + negate(byte_offset(src, g * type_size)), + byte_offset(src, (g + 2) * type_size)); } - brw_pop_insn_state(p); } else { struct brw_reg src0 = stride(src, 4, 4, 1); @@ -1295,10 +1302,28 @@ fs_generator::generate_ddy(const fs_inst *inst, } } else { /* replicate the derivative at the top-left pixel to other pixels */ - struct brw_reg src0 = byte_offset(stride(src, 4, 4, 0), 0 * type_size); - struct brw_reg src1 = byte_offset(stride(src, 4, 4, 0), 2 * type_size); + if (devinfo->gen >= 8) { + struct brw_reg src0 = byte_offset(stride(src, 4, 4, 0), 0 * type_size); + struct brw_reg src1 = byte_offset(stride(src, 4, 4, 0), 2 * type_size); - brw_ADD(p, dst, negate(src0), src1); + brw_ADD(p, dst, negate(src0), src1); + } else { + /* On Haswell and earlier, the region used above appears to not work + * correctly for compressed instructions. At least on Haswell and + * Iron Lake, compressed ALIGN16 instructions do work. Since we + * would have to split to SIMD8 no matter which method we choose, we + * may as well use ALIGN16 on all platforms gen7 and earlier. + */ + struct brw_reg src0 = stride(src, 4, 4, 1); + struct brw_reg src1 = stride(src, 4, 4, 1); + src0.swizzle = BRW_SWIZZLE_XXXX; + src1.swizzle = BRW_SWIZZLE_ZZZZ; + + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, dst, negate(src0), src1); + brw_pop_insn_state(p); + } } } @@ -2070,13 +2095,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case SHADER_OPCODE_MEMORY_FENCE: - brw_memory_fence(p, dst, BRW_OPCODE_SEND); + assert(src[1].file == BRW_IMMEDIATE_VALUE); + brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, src[1].ud); break; case SHADER_OPCODE_INTERLOCK: assert(devinfo->gen >= 9); /* The interlock is basically a memory fence issued via sendc */ - brw_memory_fence(p, dst, BRW_OPCODE_SENDC); + brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC, false); break; case SHADER_OPCODE_FIND_LIVE_CHANNEL: { diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index cdd3f7bccaa..2505c6e885c 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1318,9 +1318,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_ine32: { fs_reg dest = result; + /* On Gen11 we have an additional issue being that src1 cannot be a byte + * type. So we convert both operands for the comparison. + */ + fs_reg temp_op[2]; + temp_op[0] = bld.fix_byte_src(op[0]); + temp_op[1] = bld.fix_byte_src(op[1]); + const uint32_t bit_size = nir_src_bit_size(instr->src[0].src); if (bit_size != 32) - dest = bld.vgrf(op[0].type, 1); + dest = bld.vgrf(temp_op[0].type, 1); brw_conditional_mod cond; switch (instr->op) { @@ -1341,7 +1348,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) default: unreachable("bad opcode"); } - bld.CMP(dest, op[0], op[1], cond); + bld.CMP(dest, temp_op[0], temp_op[1], cond); if (bit_size > 32) { bld.MOV(result, subscript(dest, BRW_REGISTER_TYPE_UD, 0)); @@ -4169,7 +4176,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_memory_barrier: { const fs_builder ubld = bld.group(8, 0); const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); - ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp) + ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, + brw_vec8_grf(0, 0), brw_imm_ud(0)) ->size_written = 2 * REG_SIZE; break; } @@ -4821,16 +4829,29 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_quad_swap_horizontal: { const fs_reg value = get_nir_src(instr->src[0]); const fs_reg tmp = bld.vgrf(value.type); - const fs_builder ubld = bld.exec_all().group(dispatch_width / 2, 0); + if (devinfo->gen <= 7) { + /* The hardware doesn't seem to support these crazy regions with + * compressed instructions on gen7 and earlier so we fall back to + * using quad swizzles. Fortunately, we don't support 64-bit + * anything in Vulkan on gen7. + */ + assert(nir_src_bit_size(instr->src[0]) == 32); + const fs_builder ubld = bld.exec_all(); + ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value, + brw_imm_ud(BRW_SWIZZLE4(1,0,3,2))); + bld.MOV(retype(dest, value.type), tmp); + } else { + const fs_builder ubld = bld.exec_all().group(dispatch_width / 2, 0); - const fs_reg src_left = horiz_stride(value, 2); - const fs_reg src_right = horiz_stride(horiz_offset(value, 1), 2); - const fs_reg tmp_left = horiz_stride(tmp, 2); - const fs_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2); + const fs_reg src_left = horiz_stride(value, 2); + const fs_reg src_right = horiz_stride(horiz_offset(value, 1), 2); + const fs_reg tmp_left = horiz_stride(tmp, 2); + const fs_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2); - ubld.MOV(tmp_left, src_right); - ubld.MOV(tmp_right, src_left); + ubld.MOV(tmp_left, src_right); + ubld.MOV(tmp_right, src_left); + } bld.MOV(retype(dest, value.type), tmp); break; } @@ -4970,14 +4991,26 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr const fs_builder ubld = bld.group(8, 0); const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); - ubld.emit(SHADER_OPCODE_INTERLOCK, tmp)->size_written = 2 * - REG_SIZE; - + ubld.emit(SHADER_OPCODE_INTERLOCK, tmp, brw_vec8_grf(0, 0)) + ->size_written = 2 * REG_SIZE; break; } case nir_intrinsic_end_invocation_interlock: { - /* We don't need to do anything here */ + /* For endInvocationInterlock(), we need to insert a memory fence which + * stalls in the shader until the memory transactions prior to that + * fence are complete. This ensures that the shader does not end before + * any writes from its critical section have landed. Otherwise, you can + * end up with a case where the next invocation on that pixel properly + * stalls for previous FS invocation on its pixel to complete but + * doesn't actually wait for the dataport memory transactions from that + * thread to land before submitting its own. + */ + const fs_builder ubld = bld.group(8, 0); + const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); + ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, + brw_vec8_grf(0, 0), brw_imm_ud(1)) + ->size_written = 2 * REG_SIZE; break; } diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index 17a9dc8e9c4..35903c4030e 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -591,7 +591,7 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all) */ foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->dst.file == VGRF && inst->has_source_and_destination_hazard()) { - for (unsigned i = 0; i < 3; i++) { + for (unsigned i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr); } @@ -710,14 +710,9 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all) if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 && inst->src[2].file == VGRF && inst->src[3].file == VGRF && - inst->src[2].nr != inst->src[3].nr) { - for (unsigned i = 0; i < inst->mlen; i++) { - for (unsigned j = 0; j < inst->ex_mlen; j++) { - ra_add_node_interference(g, inst->src[2].nr + i, - inst->src[3].nr + j); - } - } - } + inst->src[2].nr != inst->src[3].nr) + ra_add_node_interference(g, inst->src[2].nr, + inst->src[3].nr); } } diff --git a/src/intel/compiler/brw_predicated_break.cpp b/src/intel/compiler/brw_predicated_break.cpp index 607715dace4..e60052f3608 100644 --- a/src/intel/compiler/brw_predicated_break.cpp +++ b/src/intel/compiler/brw_predicated_break.cpp @@ -128,14 +128,8 @@ opt_predicated_break(backend_shader *s) while_inst->predicate = jump_inst->predicate; while_inst->predicate_inverse = !jump_inst->predicate_inverse; - earlier_block->children.make_empty(); - earlier_block->add_successor(s->cfg->mem_ctx, while_block); - assert(earlier_block->can_combine_with(while_block)); earlier_block->combine_with(while_block); - - earlier_block->next()->parents.make_empty(); - earlier_block->add_successor(s->cfg->mem_ctx, earlier_block->next()); } progress = true; diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 7d60665b621..6308b280ee7 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -1204,9 +1204,31 @@ vec4_instruction::reswizzle(int dst_writemask, int swizzle) opcode != BRW_OPCODE_DP3 && opcode != BRW_OPCODE_DP2 && opcode != VEC4_OPCODE_PACK_BYTES) { for (int i = 0; i < 3; i++) { - if (src[i].file == BAD_FILE || src[i].file == IMM) + if (src[i].file == BAD_FILE) continue; + if (src[i].file == IMM) { + assert(src[i].type != BRW_REGISTER_TYPE_V && + src[i].type != BRW_REGISTER_TYPE_UV); + + /* Vector immediate types need to be reswizzled. */ + if (src[i].type == BRW_REGISTER_TYPE_VF) { + const unsigned imm[] = { + (src[i].ud >> 0) & 0x0ff, + (src[i].ud >> 8) & 0x0ff, + (src[i].ud >> 16) & 0x0ff, + (src[i].ud >> 24) & 0x0ff, + }; + + src[i] = brw_imm_vf4(imm[BRW_GET_SWZ(swizzle, 0)], + imm[BRW_GET_SWZ(swizzle, 1)], + imm[BRW_GET_SWZ(swizzle, 2)], + imm[BRW_GET_SWZ(swizzle, 3)]); + } + + continue; + } + src[i].swizzle = brw_compose_swizzle(swizzle, src[i].swizzle); } } diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index 05e6f50ebb6..013b7fb0874 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -1883,7 +1883,7 @@ generate_code(struct brw_codegen *p, break; case SHADER_OPCODE_MEMORY_FENCE: - brw_memory_fence(p, dst, BRW_OPCODE_SEND); + brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false); break; case SHADER_OPCODE_FIND_LIVE_CHANNEL: { diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 7a8ae8158a3..4909aa32a4b 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -760,7 +760,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) const vec4_builder bld = vec4_builder(this).at_end().annotate(current_annotation, base_ir); const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); - bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp) + bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0)) ->size_written = 2 * REG_SIZE; break; } diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp index 65326416064..efdae4fd79b 100644 --- a/src/intel/compiler/test_eu_validate.cpp +++ b/src/intel/compiler/test_eu_validate.cpp @@ -2372,3 +2372,124 @@ TEST_P(validation_test, qword_low_power_no_depctrl) clear_instructions(p); } } + +TEST_P(validation_test, gen11_no_byte_src_1_2) +{ + static const struct { + enum opcode opcode; + unsigned access_mode; + + enum brw_reg_type dst_type; + struct { + enum brw_reg_type type; + unsigned vstride; + unsigned width; + unsigned hstride; + } srcs[3]; + + int gen; + bool expected_result; + } inst[] = { +#define INST(opcode, access_mode, dst_type, \ + src0_type, src0_vstride, src0_width, src0_hstride, \ + src1_type, src1_vstride, src1_width, src1_hstride, \ + src2_type, \ + gen, expected_result) \ + { \ + BRW_OPCODE_##opcode, \ + BRW_ALIGN_##access_mode, \ + BRW_REGISTER_TYPE_##dst_type, \ + { \ + { \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_VERTICAL_STRIDE_##src0_vstride, \ + BRW_WIDTH_##src0_width, \ + BRW_HORIZONTAL_STRIDE_##src0_hstride, \ + }, \ + { \ + BRW_REGISTER_TYPE_##src1_type, \ + BRW_VERTICAL_STRIDE_##src1_vstride, \ + BRW_WIDTH_##src1_width, \ + BRW_HORIZONTAL_STRIDE_##src1_hstride, \ + }, \ + { \ + BRW_REGISTER_TYPE_##src2_type, \ + }, \ + }, \ + gen, \ + expected_result, \ + } + + /* Passes on < 11 */ + INST(MOV, 16, F, B, 2, 4, 0, UD, 0, 4, 0, D, 8, true ), + INST(ADD, 16, UD, F, 0, 4, 0, UB, 0, 1, 0, D, 7, true ), + INST(MAD, 16, D, B, 0, 4, 0, UB, 0, 1, 0, B, 10, true ), + + /* Fails on 11+ */ + INST(MAD, 1, UB, W, 1, 1, 0, D, 0, 4, 0, B, 11, false ), + INST(MAD, 1, UB, W, 1, 1, 1, UB, 1, 1, 0, W, 11, false ), + INST(ADD, 1, W, W, 1, 4, 1, B, 1, 1, 0, D, 11, false ), + + /* Passes on 11+ */ + INST(MOV, 1, W, B, 8, 8, 1, D, 8, 8, 1, D, 11, true ), + INST(ADD, 1, UD, B, 8, 8, 1, W, 8, 8, 1, D, 11, true ), + INST(MAD, 1, B, B, 0, 1, 0, D, 0, 4, 0, W, 11, true ), + +#undef INST + }; + + + for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { + /* Skip instruction not meant for this gen. */ + if (devinfo.gen != inst[i].gen) + continue; + + brw_push_insn_state(p); + + brw_set_default_exec_size(p, BRW_EXECUTE_8); + brw_set_default_access_mode(p, inst[i].access_mode); + + switch (inst[i].opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].srcs[0].type)); + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); + brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); + break; + case BRW_OPCODE_ADD: + brw_ADD(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].srcs[0].type), + retype(g0, inst[i].srcs[1].type)); + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); + brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width); + brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); + brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].srcs[1].vstride); + brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width); + brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].srcs[1].hstride); + break; + case BRW_OPCODE_MAD: + brw_MAD(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].srcs[0].type), + retype(g0, inst[i].srcs[1].type), + retype(g0, inst[i].srcs[2].type)); + brw_inst_set_3src_a1_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); + brw_inst_set_3src_a1_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); + brw_inst_set_3src_a1_src1_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); + brw_inst_set_3src_a1_src1_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); + break; + default: + unreachable("invalid opcode"); + } + + brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); + + brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width); + brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width); + + brw_pop_insn_state(p); + + EXPECT_EQ(inst[i].expected_result, validate(p)); + + clear_instructions(p); + } +} diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c index acfed5119ba..c1b5178ce8e 100644 --- a/src/intel/isl/isl.c +++ b/src/intel/isl/isl.c @@ -717,7 +717,7 @@ isl_surf_choose_dim_layout(const struct isl_device *dev, /** * Calculate the physical extent of the surface's first level, in units of - * surface samples. The result is aligned to the format's compression block. + * surface samples. */ static void isl_calc_phys_level0_extent_sa(const struct isl_device *dev, @@ -746,8 +746,8 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, case ISL_DIM_LAYOUT_GEN4_2D: case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ: *phys_level0_sa = (struct isl_extent4d) { - .w = isl_align_npot(info->width, fmtl->bw), - .h = fmtl->bh, + .w = info->width, + .h = 1, .d = 1, .a = info->array_len, }; @@ -771,8 +771,8 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, assert(info->samples == 1); *phys_level0_sa = (struct isl_extent4d) { - .w = isl_align_npot(info->width, fmtl->bw), - .h = isl_align_npot(info->height, fmtl->bh), + .w = info->width, + .h = info->height, .d = 1, .a = info->array_len, }; @@ -807,9 +807,6 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, isl_msaa_interleaved_scale_px_to_sa(info->samples, &phys_level0_sa->w, &phys_level0_sa->h); - - phys_level0_sa->w = isl_align(phys_level0_sa->w, fmtl->bw); - phys_level0_sa->h = isl_align(phys_level0_sa->h, fmtl->bh); break; } break; @@ -832,8 +829,8 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, assert(ISL_DEV_GEN(dev) >= 9); *phys_level0_sa = (struct isl_extent4d) { - .w = isl_align_npot(info->width, fmtl->bw), - .h = isl_align_npot(info->height, fmtl->bh), + .w = info->width, + .h = info->height, .d = 1, .a = info->depth, }; @@ -842,8 +839,8 @@ isl_calc_phys_level0_extent_sa(const struct isl_device *dev, case ISL_DIM_LAYOUT_GEN4_3D: assert(ISL_DEV_GEN(dev) < 9); *phys_level0_sa = (struct isl_extent4d) { - .w = isl_align(info->width, fmtl->bw), - .h = isl_align(info->height, fmtl->bh), + .w = info->width, + .h = info->height, .d = info->depth, .a = 1, }; @@ -968,13 +965,10 @@ isl_calc_phys_slice0_extent_sa_gen4_2d( const struct isl_extent4d *phys_level0_sa, struct isl_extent2d *phys_slice0_sa) { - const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - assert(phys_level0_sa->depth == 1); if (info->levels == 1) { - /* Do not pad the surface to the image alignment. Instead, pad it only - * to the pixel format's block alignment. + /* Do not pad the surface to the image alignment. * * For tiled surfaces, using a reduced alignment here avoids wasting CPU * cycles on the below mipmap layout caluclations. Reducing the @@ -989,8 +983,8 @@ isl_calc_phys_slice0_extent_sa_gen4_2d( * VkBufferImageCopy::bufferRowLength. */ *phys_slice0_sa = (struct isl_extent2d) { - .w = isl_align_npot(phys_level0_sa->w, fmtl->bw), - .h = isl_align_npot(phys_level0_sa->h, fmtl->bh), + .w = phys_level0_sa->w, + .h = phys_level0_sa->h, }; return; } @@ -1055,9 +1049,9 @@ isl_calc_phys_total_extent_el_gen4_2d( array_pitch_span, &phys_slice0_sa); *total_extent_el = (struct isl_extent2d) { - .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw), + .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw), .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) + - isl_assert_div(phys_slice0_sa.h, fmtl->bh), + isl_align_div_npot(phys_slice0_sa.h, fmtl->bh), }; } @@ -1201,7 +1195,7 @@ isl_calc_phys_total_extent_el_gen9_1d( { MAYBE_UNUSED const struct isl_format_layout *fmtl = isl_format_get_layout(info->format); - assert(phys_level0_sa->height / fmtl->bh == 1); + assert(phys_level0_sa->height == 1); assert(phys_level0_sa->depth == 1); assert(info->samples == 1); assert(image_align_sa->w >= fmtl->bw); @@ -1478,8 +1472,6 @@ isl_surf_init_s(const struct isl_device *dev, struct isl_extent4d phys_level0_sa; isl_calc_phys_level0_extent_sa(dev, info, dim_layout, tiling, msaa_layout, &phys_level0_sa); - assert(phys_level0_sa.w % fmtl->bw == 0); - assert(phys_level0_sa.h % fmtl->bh == 0); enum isl_array_pitch_span array_pitch_span = isl_choose_array_pitch_span(dev, info, dim_layout, &phys_level0_sa); diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index 6790ba002ad..ae21fef3b35 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -1176,7 +1176,7 @@ struct isl_surf { /** * Physical extent of the surface's base level, in units of physical - * surface samples and aligned to the format's compression block. + * surface samples. * * Consider isl_dim_layout as an operator that transforms a logical surface * layout to a physical surface layout. Then @@ -1885,6 +1885,34 @@ isl_surf_get_image_alignment_sa(const struct isl_surf *surf) fmtl->bd * surf->image_alignment_el.d); } +/** + * Logical extent of level 0 in units of surface elements. + */ +static inline struct isl_extent4d +isl_surf_get_logical_level0_el(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + return isl_extent4d(DIV_ROUND_UP(surf->logical_level0_px.w, fmtl->bw), + DIV_ROUND_UP(surf->logical_level0_px.h, fmtl->bh), + DIV_ROUND_UP(surf->logical_level0_px.d, fmtl->bd), + surf->logical_level0_px.a); +} + +/** + * Physical extent of level 0 in units of surface elements. + */ +static inline struct isl_extent4d +isl_surf_get_phys_level0_el(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + return isl_extent4d(DIV_ROUND_UP(surf->phys_level0_sa.w, fmtl->bw), + DIV_ROUND_UP(surf->phys_level0_sa.h, fmtl->bh), + DIV_ROUND_UP(surf->phys_level0_sa.d, fmtl->bd), + surf->phys_level0_sa.a); +} + /** * Pitch between vertically adjacent surface elements, in bytes. */ diff --git a/src/intel/isl/isl_format.c b/src/intel/isl/isl_format.c index b429e7779a4..9847f231923 100644 --- a/src/intel/isl/isl_format.c +++ b/src/intel/isl/isl_format.c @@ -294,7 +294,11 @@ static const struct surface_format_info format_info[] = { SF( 70, 70, x, x, x, x, x, x, x, x, x, x, BC7_UNORM_SRGB) SF( 70, 70, x, x, x, x, x, x, x, x, x, x, BC6H_UF16) SF( x, x, x, x, x, x, x, x, x, x, x, x, PLANAR_420_8) - SF( 75, 75, x, x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) + /* The format enum for R8G8B8_UNORM_SRGB first shows up in the HSW PRM but + * empirical testing indicates that it doesn't actually sRGB decode and + * acts identical to R8G8B8_UNORM. It does work on gen8+. + */ + SF( 80, 80, x, x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC1_RGB8) SF( 80, 80, x, x, x, x, x, x, x, x, x, x, ETC2_RGB8) SF( 80, 80, x, x, x, x, x, x, x, x, x, x, EAC_R11) diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c index 66497e457d9..648ec460afc 100644 --- a/src/intel/perf/gen_perf.c +++ b/src/intel/perf/gen_perf.c @@ -212,22 +212,10 @@ enumerate_sysfs_metrics(struct gen_perf *perf) static bool kernel_has_dynamic_config_support(struct gen_perf *perf, int fd) { - hash_table_foreach(perf->oa_metrics_table, entry) { - struct gen_perf_query_info *query = entry->data; - char config_path[280]; - uint64_t config_id; + uint64_t invalid_config_id = UINT64_MAX; - snprintf(config_path, sizeof(config_path), "%s/metrics/%s/id", - perf->sysfs_dev_dir, query->guid); - - /* Look for the test config, which we know we can't replace. */ - if (read_file_uint64(config_path, &config_id) && config_id == 1) { - return perf->ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, - &config_id) < 0 && errno == ENOENT; - } - } - - return false; + return perf->ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, + &invalid_config_id) < 0 && errno == ENOENT; } bool @@ -298,8 +286,7 @@ compute_topology_builtins(struct gen_perf *perf, for (int i = 0; i < sizeof(devinfo->eu_masks); i++) perf->sys_vars.n_eus += __builtin_popcount(devinfo->eu_masks[i]); - perf->sys_vars.eu_threads_count = - perf->sys_vars.n_eus * devinfo->num_thread_per_eu; + perf->sys_vars.eu_threads_count = devinfo->num_thread_per_eu; /* The subslice mask builtin contains bits for all slices. Prior to Gen11 * it had groups of 3bits for each slice, on Gen11 it's 8bits for each diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index 64db89ed6aa..8a09c94e760 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -28,7 +28,11 @@ #include #include +#if defined(MAJOR_IN_SYSMACROS) #include +#elif defined(MAJOR_IN_MKDEV) +#include +#endif #include "util/hash_table.h" #include "util/ralloc.h" @@ -128,6 +132,7 @@ struct gen_perf_query_info { GEN_PERF_QUERY_TYPE_OA, GEN_PERF_QUERY_TYPE_RAW, GEN_PERF_QUERY_TYPE_PIPELINE, + GEN_PERF_QUERY_TYPE_NULL, } kind; const char *name; const char *guid; diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c index 23112cadc36..74a892c358d 100644 --- a/src/intel/tools/intel_dump_gpu.c +++ b/src/intel/tools/intel_dump_gpu.c @@ -567,7 +567,9 @@ ioctl_init_helper(int fd, unsigned long request, ...) static void __attribute__ ((destructor)) fini(void) { - free(output_filename); - aub_file_finish(&aub_file); - free(bos); + if (devinfo.gen != 0) { + free(output_filename); + aub_file_finish(&aub_file); + free(bos); + } } diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 109f5f71bc4..48d41891cfb 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -165,7 +165,7 @@ anv_state_table_init(struct anv_state_table *table, goto fail_fd; } - if (!u_vector_init(&table->mmap_cleanups, + if (!u_vector_init(&table->cleanups, round_to_power_of_two(sizeof(struct anv_state_table_cleanup)), 128)) { result = vk_error(VK_ERROR_INITIALIZATION_FAILED); @@ -179,12 +179,12 @@ anv_state_table_init(struct anv_state_table *table, uint32_t initial_size = initial_entries * ANV_STATE_ENTRY_SIZE; result = anv_state_table_expand_range(table, initial_size); if (result != VK_SUCCESS) - goto fail_mmap_cleanups; + goto fail_cleanups; return VK_SUCCESS; - fail_mmap_cleanups: - u_vector_finish(&table->mmap_cleanups); + fail_cleanups: + u_vector_finish(&table->cleanups); fail_fd: close(table->fd); @@ -195,7 +195,7 @@ static VkResult anv_state_table_expand_range(struct anv_state_table *table, uint32_t size) { void *map; - struct anv_mmap_cleanup *cleanup; + struct anv_state_table_cleanup *cleanup; /* Assert that we only ever grow the pool */ assert(size >= table->state.end); @@ -204,11 +204,11 @@ anv_state_table_expand_range(struct anv_state_table *table, uint32_t size) if (size > BLOCK_POOL_MEMFD_SIZE) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - cleanup = u_vector_add(&table->mmap_cleanups); + cleanup = u_vector_add(&table->cleanups); if (!cleanup) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - *cleanup = ANV_MMAP_CLEANUP_INIT; + *cleanup = ANV_STATE_TABLE_CLEANUP_INIT; /* Just leak the old map until we destroy the pool. We can't munmap it * without races or imposing locking on the block allocate fast path. On @@ -272,12 +272,12 @@ anv_state_table_finish(struct anv_state_table *table) { struct anv_state_table_cleanup *cleanup; - u_vector_foreach(cleanup, &table->mmap_cleanups) { + u_vector_foreach(cleanup, &table->cleanups) { if (cleanup->map) munmap(cleanup->map, cleanup->size); } - u_vector_finish(&table->mmap_cleanups); + u_vector_finish(&table->cleanups); close(table->fd); } @@ -478,6 +478,11 @@ anv_block_pool_init(struct anv_block_pool *pool, if (result != VK_SUCCESS) goto fail_mmap_cleanups; + /* Make the entire pool available in the front of the pool. If back + * allocation needs to use this space, the "ends" will be re-arranged. + */ + pool->state.end = pool->size; + return VK_SUCCESS; fail_mmap_cleanups: diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c index aee11ccacd2..8c785323d36 100644 --- a/src/intel/vulkan/anv_android.c +++ b/src/intel/vulkan/anv_android.c @@ -126,7 +126,7 @@ get_ahw_buffer_format_properties( /* Fill properties fields based on description. */ VkAndroidHardwareBufferFormatPropertiesANDROID *p = pProperties; - p->format = vk_format_from_android(desc.format); + p->format = vk_format_from_android(desc.format, desc.usage); const struct anv_format *anv_format = anv_get_format(p->format); p->externalFormat = (uint64_t) (uintptr_t) anv_format; diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 0d3d3f948e6..96ee66f0655 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -1075,11 +1075,11 @@ clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer, { static const union isl_color_value color_value = { .u32 = { 0, } }; const struct anv_subpass *subpass = cmd_buffer->state.subpass; - const uint32_t att_idx = subpass->depth_stencil_attachment->attachment; - - if (att_idx == VK_ATTACHMENT_UNUSED) + if (!subpass->depth_stencil_attachment) return; + const uint32_t att_idx = subpass->depth_stencil_attachment->attachment; + assert(att_idx != VK_ATTACHMENT_UNUSED); struct anv_render_pass_attachment *pass_att = &cmd_buffer->state.pass->attachments[att_idx]; diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 348764c61f5..b0ce00f6daf 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -146,9 +146,6 @@ anv_cmd_state_finish(struct anv_cmd_buffer *cmd_buffer) anv_cmd_pipeline_state_finish(cmd_buffer, &state->gfx.base); anv_cmd_pipeline_state_finish(cmd_buffer, &state->compute.base); - for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) - vk_free(&cmd_buffer->pool->alloc, state->push_constants[i]); - vk_free(&cmd_buffer->pool->alloc, state->attachments); } @@ -159,47 +156,6 @@ anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer) anv_cmd_state_init(cmd_buffer); } -/** - * This function updates the size of the push constant buffer we need to emit. - * This is called in various parts of the driver to ensure that different - * pieces of push constant data get emitted as needed. However, it is important - * that we never shrink the size of the buffer. For example, a compute shader - * dispatch will always call this for the base group id, which has an - * offset in the push constant buffer that is smaller than the offset for - * storage image data. If the compute shader has storage images, we will call - * this again with a larger size during binding table emission. However, - * if we dispatch the compute shader again without dirtying our descriptors, - * we would still call this function with a smaller size for the base group - * id, and not for the images, which would incorrectly shrink the size of the - * push constant data we emit with that dispatch, making us drop the image data. - */ -VkResult -anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, uint32_t size) -{ - struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage]; - - if (*ptr == NULL) { - *ptr = vk_alloc(&cmd_buffer->pool->alloc, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*ptr == NULL) { - anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - (*ptr)->size = size; - } else if ((*ptr)->size < size) { - *ptr = vk_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (*ptr == NULL) { - anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - } - (*ptr)->size = size; - } - - return VK_SUCCESS; -} - static VkResult anv_create_cmd_buffer( struct anv_device * device, struct anv_cmd_pool * pool, @@ -766,7 +722,7 @@ anv_push_constant_value(const struct anv_cmd_pipeline_state *state, } else if (ANV_PARAM_IS_PUSH(param)) { uint32_t offset = ANV_PARAM_PUSH_OFFSET(param); assert(offset % sizeof(uint32_t) == 0); - if (offset < data->size) + if (offset < sizeof(data->client_data)) return *(uint32_t *)((uint8_t *)data + offset); else return 0; @@ -792,12 +748,12 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer, return (struct anv_state) { .offset = 0 }; struct anv_push_constants *data = - cmd_buffer->state.push_constants[stage]; + &cmd_buffer->state.push_constants[stage]; const struct brw_stage_prog_data *prog_data = pipeline->shaders[stage]->prog_data; /* If we don't actually have any push constants, bail. */ - if (prog_data == NULL || prog_data->nr_params == 0 || data == NULL) + if (prog_data == NULL || prog_data->nr_params == 0) return (struct anv_state) { .offset = 0 }; struct anv_state state = @@ -820,7 +776,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) { struct anv_cmd_pipeline_state *pipeline_state = &cmd_buffer->state.compute.base; struct anv_push_constants *data = - cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; + &cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline; const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; @@ -882,13 +838,7 @@ void anv_CmdPushConstants( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); anv_foreach_stage(stage, stageFlags) { - VkResult result = - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, - stage, client_data); - if (result != VK_SUCCESS) - return; - - memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset, + memcpy(cmd_buffer->state.push_constants[stage].client_data + offset, pValues, size); } diff --git a/src/intel/vulkan/anv_descriptor_set.c b/src/intel/vulkan/anv_descriptor_set.c index dc33cc6d9a8..f4e00667334 100644 --- a/src/intel/vulkan/anv_descriptor_set.c +++ b/src/intel/vulkan/anv_descriptor_set.c @@ -103,6 +103,16 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device, type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) data |= ANV_DESCRIPTOR_ADDRESS_RANGE; + /* On Ivy Bridge and Bay Trail, we need swizzles textures in the shader + * Do not handle VK_DESCRIPTOR_TYPE_STORAGE_IMAGE and + * VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT because they already must + * have identity swizzle. + */ + if (device->info.gen == 7 && !device->info.is_haswell && + (type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE || + type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)) + data |= ANV_DESCRIPTOR_TEXTURE_SWIZZLE; + return data; } @@ -123,6 +133,9 @@ anv_descriptor_data_size(enum anv_descriptor_data data) if (data & ANV_DESCRIPTOR_ADDRESS_RANGE) size += sizeof(struct anv_address_range_descriptor); + if (data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) + size += sizeof(struct anv_texture_swizzle_descriptor); + return size; } @@ -1184,6 +1197,26 @@ anv_descriptor_set_write_image_view(struct anv_device *device, anv_descriptor_set_write_image_param(desc_map, image_param); } + + if (image_view && (bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE)) { + assert(!(bind_layout->data & ANV_DESCRIPTOR_SAMPLED_IMAGE)); + assert(image_view); + struct anv_texture_swizzle_descriptor desc_data[3]; + memset(desc_data, 0, sizeof(desc_data)); + + for (unsigned p = 0; p < image_view->n_planes; p++) { + desc_data[p] = (struct anv_texture_swizzle_descriptor) { + .swizzle = { + (uint8_t)image_view->planes[p].isl.swizzle.r, + (uint8_t)image_view->planes[p].isl.swizzle.g, + (uint8_t)image_view->planes[p].isl.swizzle.b, + (uint8_t)image_view->planes[p].isl.swizzle.a, + }, + }; + } + memcpy(desc_map, desc_data, + MAX2(1, bind_layout->max_plane_count) * sizeof(desc_data[0])); + } } void diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index d74116bd9c9..ab8dee46a86 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1170,6 +1170,11 @@ void anv_GetPhysicalDeviceFeatures2( } } +#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64 + +#define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64 +#define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256 + void anv_GetPhysicalDeviceProperties( VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties* pProperties) @@ -1215,20 +1220,20 @@ void anv_GetPhysicalDeviceProperties( .sparseAddressSpaceSize = 0, .maxBoundDescriptorSets = MAX_SETS, .maxPerStageDescriptorSamplers = max_samplers, - .maxPerStageDescriptorUniformBuffers = 64, + .maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, .maxPerStageDescriptorStorageBuffers = max_ssbos, .maxPerStageDescriptorSampledImages = max_textures, .maxPerStageDescriptorStorageImages = max_images, - .maxPerStageDescriptorInputAttachments = 64, + .maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS, .maxPerStageResources = max_per_stage, .maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */ - .maxDescriptorSetUniformBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorUniformBuffers */ + .maxDescriptorSetUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, /* number of stages * maxPerStageDescriptorUniformBuffers */ .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */ .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, .maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */ .maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */ - .maxDescriptorSetInputAttachments = 256, + .maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS, .maxVertexInputAttributes = MAX_VBS, .maxVertexInputBindings = MAX_VBS, .maxVertexInputAttributeOffset = 2047, @@ -1296,7 +1301,7 @@ void anv_GetPhysicalDeviceProperties( .sampledImageStencilSampleCounts = sample_counts, .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, - .timestampComputeAndGraphics = false, + .timestampComputeAndGraphics = true, .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency, .maxClipDistances = 8, .maxCullDistances = 8, @@ -1393,20 +1398,20 @@ void anv_GetPhysicalDeviceProperties2( props->robustBufferAccessUpdateAfterBind = true; props->quadDivergentImplicitLod = false; props->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views; - props->maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0; + props->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS; props->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX; props->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views; props->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views; - props->maxPerStageDescriptorUpdateAfterBindInputAttachments = 0; + props->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS; props->maxPerStageUpdateAfterBindResources = UINT32_MAX; props->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views; - props->maxDescriptorSetUpdateAfterBindUniformBuffers = 0; - props->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0; + props->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS; + props->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2; props->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX; props->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2; props->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views; props->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views; - props->maxDescriptorSetUpdateAfterBindInputAttachments = 0; + props->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS; break; } @@ -2146,16 +2151,18 @@ VkResult anv_CreateDevice( if (!device) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - const unsigned decode_flags = - GEN_BATCH_DECODE_FULL | - ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) | - GEN_BATCH_DECODE_OFFSETS | - GEN_BATCH_DECODE_FLOATS; + if (INTEL_DEBUG & DEBUG_BATCH) { + const unsigned decode_flags = + GEN_BATCH_DECODE_FULL | + ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) | + GEN_BATCH_DECODE_OFFSETS | + GEN_BATCH_DECODE_FLOATS; - gen_batch_decode_ctx_init(&device->decoder_ctx, - &physical_device->info, - stderr, decode_flags, NULL, - decode_get_bo, NULL, device); + gen_batch_decode_ctx_init(&device->decoder_ctx, + &physical_device->info, + stderr, decode_flags, NULL, + decode_get_bo, NULL, device); + } device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; device->instance = physical_device->instance; @@ -2438,7 +2445,8 @@ void anv_DestroyDevice( anv_gem_destroy_context(device, device->context_id); - gen_batch_decode_ctx_finish(&device->decoder_ctx); + if (INTEL_DEBUG & DEBUG_BATCH) + gen_batch_decode_ctx_finish(&device->decoder_ctx); close(device->fd); @@ -2995,6 +3003,9 @@ void anv_FreeMemory( if (mem->map) anv_UnmapMemory(_device, _mem); + p_atomic_add(&pdevice->memory.heaps[mem->type->heapIndex].used, + -mem->bo->size); + anv_bo_cache_release(device, &device->bo_cache, mem->bo); #if defined(ANDROID) && ANDROID_API_LEVEL >= 26 @@ -3002,9 +3013,6 @@ void anv_FreeMemory( AHardwareBuffer_release(mem->ahw); #endif - p_atomic_add(&pdevice->memory.heaps[mem->type->heapIndex].used, - -mem->bo->size); - vk_free2(&device->alloc, pAllocator, mem); } diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py index 962ebdbc58d..1f103e86364 100644 --- a/src/intel/vulkan/anv_extensions.py +++ b/src/intel/vulkan/anv_extensions.py @@ -67,18 +67,18 @@ def __init__(self, version, enable): # the those extension strings, then tests dEQP-VK.api.info.instance.extensions # and dEQP-VK.api.info.device fail due to the duplicated strings. EXTENSIONS = [ - Extension('VK_KHR_8bit_storage', 1, 'device->info.gen >= 8'), + Extension('VK_KHR_8bit_storage', 1, 'device->info.gen >= 8 && !ANDROID'), Extension('VK_KHR_16bit_storage', 1, 'device->info.gen >= 8'), Extension('VK_KHR_bind_memory2', 1, True), - Extension('VK_KHR_create_renderpass2', 1, True), + Extension('VK_KHR_create_renderpass2', 1, '!ANDROID'), Extension('VK_KHR_dedicated_allocation', 1, True), - Extension('VK_KHR_depth_stencil_resolve', 1, True), + Extension('VK_KHR_depth_stencil_resolve', 1, '!ANDROID'), Extension('VK_KHR_descriptor_update_template', 1, True), Extension('VK_KHR_device_group', 1, True), Extension('VK_KHR_device_group_creation', 1, True), Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'), Extension('VK_KHR_draw_indirect_count', 1, True), - Extension('VK_KHR_driver_properties', 1, True), + Extension('VK_KHR_driver_properties', 1, '!ANDROID'), Extension('VK_KHR_external_fence', 1, 'device->has_syncobj_wait'), Extension('VK_KHR_external_fence_capabilities', 1, True), @@ -105,9 +105,9 @@ def __init__(self, version, enable): Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True), Extension('VK_KHR_sampler_ycbcr_conversion', 1, True), Extension('VK_KHR_shader_atomic_int64', 1, - 'device->info.gen >= 9 && device->use_softpin'), + 'device->info.gen >= 9 && device->use_softpin && !ANDROID'), Extension('VK_KHR_shader_draw_parameters', 1, True), - Extension('VK_KHR_shader_float16_int8', 1, 'device->info.gen >= 8'), + Extension('VK_KHR_shader_float16_int8', 1, 'device->info.gen >= 8 && !ANDROID'), Extension('VK_KHR_storage_buffer_storage_class', 1, True), Extension('VK_KHR_surface', 25, 'ANV_HAS_SURFACE'), Extension('VK_KHR_surface_protected_capabilities', 1, 'ANV_HAS_SURFACE'), @@ -142,13 +142,13 @@ def __init__(self, version, enable): Extension('VK_EXT_scalar_block_layout', 1, True), Extension('VK_EXT_shader_stencil_export', 1, 'device->info.gen >= 9'), Extension('VK_EXT_shader_viewport_index_layer', 1, True), - Extension('VK_EXT_transform_feedback', 1, True), + Extension('VK_EXT_transform_feedback', 1, 'device->info.gen >= 8'), Extension('VK_EXT_vertex_attribute_divisor', 3, True), Extension('VK_EXT_ycbcr_image_arrays', 1, True), Extension('VK_ANDROID_external_memory_android_hardware_buffer', 3, 'ANDROID'), Extension('VK_ANDROID_native_buffer', 5, 'ANDROID'), - Extension('VK_GOOGLE_decorate_string', 1, True), - Extension('VK_GOOGLE_hlsl_functionality1', 1, True), + Extension('VK_GOOGLE_decorate_string', 1, '!ANDROID'), + Extension('VK_GOOGLE_hlsl_functionality1', 1, '!ANDROID'), Extension('VK_NV_compute_shader_derivatives', 1, True), ] diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index 4e15e626eed..5be02b3e86e 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -69,6 +69,7 @@ .aspect = VK_IMAGE_ASPECT_DEPTH_BIT, \ }, \ }, \ + .vk_format = __vk_fmt, \ .n_planes = 1, \ } @@ -80,6 +81,7 @@ .aspect = VK_IMAGE_ASPECT_STENCIL_BIT, \ }, \ }, \ + .vk_format = __vk_fmt, \ .n_planes = 1, \ } @@ -465,6 +467,14 @@ anv_get_format_plane(const struct gen_device_info *devinfo, VkFormat vk_format, const struct isl_format_layout *isl_layout = isl_format_get_layout(plane_format.isl_format); + /* On Ivy Bridge we don't even have enough 24 and 48-bit formats that we + * can reliably do texture upload with BLORP so just don't claim support + * for any of them. + */ + if (devinfo->gen == 7 && !devinfo->is_haswell && + (isl_layout->bpb == 24 || isl_layout->bpb == 48)) + return unsupported; + if (tiling == VK_IMAGE_TILING_OPTIMAL && !util_is_power_of_two_or_zero(isl_layout->bpb)) { /* Tiled formats *must* be power-of-two because we need up upload @@ -798,6 +808,7 @@ anv_get_image_format_properties( if (format == NULL) goto unsupported; + assert(format->vk_format == info->format); format_feature_flags = anv_get_image_format_features(devinfo, info->format, format, info->tiling); @@ -977,6 +988,13 @@ static const VkExternalMemoryProperties prime_fd_props = { VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, }; +static const VkExternalMemoryProperties userptr_props = { + .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT, + .exportFromImportedHandleTypes = 0, + .compatibleHandleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, +}; + static const VkExternalMemoryProperties android_buffer_props = { .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT, @@ -1069,6 +1087,10 @@ VkResult anv_GetPhysicalDeviceImageFormatProperties2( if (external_props) external_props->externalMemoryProperties = prime_fd_props; break; + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: + if (external_props) + external_props->externalMemoryProperties = userptr_props; + break; case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID: if (ahw_supported && external_props) { external_props->externalMemoryProperties = android_image_props; @@ -1159,6 +1181,9 @@ void anv_GetPhysicalDeviceExternalBufferProperties( case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: pExternalBufferProperties->externalMemoryProperties = prime_fd_props; return; + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: + pExternalBufferProperties->externalMemoryProperties = userptr_props; + return; case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID: if (physical_device->supported_extensions.ANDROID_external_memory_android_hardware_buffer) { pExternalBufferProperties->externalMemoryProperties = android_buffer_props; @@ -1170,8 +1195,14 @@ void anv_GetPhysicalDeviceExternalBufferProperties( } unsupported: + /* From the Vulkan 1.1.113 spec: + * + * compatibleHandleTypes must include at least handleType. + */ pExternalBufferProperties->externalMemoryProperties = - (VkExternalMemoryProperties) {0}; + (VkExternalMemoryProperties) { + .compatibleHandleTypes = pExternalBufferInfo->handleType, + }; } VkResult anv_CreateSamplerYcbcrConversion( diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 3841234df14..10885d8451d 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -816,12 +816,12 @@ resolve_ahw_image(struct anv_device *device, vk_tiling == VK_IMAGE_TILING_OPTIMAL); /* Check format. */ - VkFormat vk_format = vk_format_from_android(desc.format); + VkFormat vk_format = vk_format_from_android(desc.format, desc.usage); enum isl_format isl_fmt = anv_get_isl_format(&device->info, vk_format, VK_IMAGE_ASPECT_COLOR_BIT, vk_tiling); - assert(format != ISL_FORMAT_UNSUPPORTED); + assert(isl_fmt != ISL_FORMAT_UNSUPPORTED); /* Handle RGB(X)->RGBA fallback. */ switch (desc.format) { @@ -1278,6 +1278,10 @@ anv_image_fill_surface_state(struct anv_device *device, if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT) view.swizzle = anv_swizzle_for_render(view.swizzle); + /* On Ivy Bridge and Bay Trail we do the swizzle in the shader */ + if (device->info.gen == 7 && !device->info.is_haswell) + view.swizzle = ISL_SWIZZLE_IDENTITY; + /* If this is a HiZ buffer we can sample from with a programmable clear * value (SKL+), define the clear value to the optimal constant. */ @@ -1355,13 +1359,10 @@ anv_image_fill_surface_state(struct anv_device *device, */ const struct isl_format_layout *fmtl = isl_format_get_layout(surface->isl.format); + tmp_surf.logical_level0_px = + isl_surf_get_logical_level0_el(&tmp_surf); + tmp_surf.phys_level0_sa = isl_surf_get_phys_level0_el(&tmp_surf); tmp_surf.format = view.format; - tmp_surf.logical_level0_px.width = - DIV_ROUND_UP(tmp_surf.logical_level0_px.width, fmtl->bw); - tmp_surf.logical_level0_px.height = - DIV_ROUND_UP(tmp_surf.logical_level0_px.height, fmtl->bh); - tmp_surf.phys_level0_sa.width /= fmtl->bw; - tmp_surf.phys_level0_sa.height /= fmtl->bh; tile_x_sa /= fmtl->bw; tile_y_sa /= fmtl->bh; diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c index 08bff9585bc..f6b9584b410 100644 --- a/src/intel/vulkan/anv_intel.c +++ b/src/intel/vulkan/anv_intel.c @@ -64,7 +64,8 @@ VkResult anv_CreateDmaBufImageINTEL( .samples = 1, /* FIXME: Need a way to use X tiling to allow scanout */ .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT, .flags = 0, }}, pAllocator, &image_h); diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c index 3d9ba5c3ecd..94ec56252ba 100644 --- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c +++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c @@ -725,6 +725,10 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin, nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); nir_variable *var = nir_deref_instr_get_variable(deref); + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + unsigned binding_offset = state->set[set].surface_offsets[binding]; + nir_builder *b = &state->builder; b->cursor = nir_before_instr(&intrin->instr); @@ -742,7 +746,7 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin, intrin->dest.ssa.bit_size, state); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc)); - } else if (use_bindless) { + } else if (binding_offset > MAX_BINDING_TABLE_SIZE) { const bool write_only = (var->data.image.access & ACCESS_NON_READABLE) != 0; nir_ssa_def *desc = @@ -750,9 +754,6 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *handle = nir_channel(b, desc, write_only ? 1 : 0); nir_rewrite_image_intrinsic(intrin, handle, true); } else { - unsigned set = var->data.descriptor_set; - unsigned binding = var->data.binding; - unsigned binding_offset = state->set[set].surface_offsets[binding]; unsigned array_size = state->layout->set[set].layout->binding[binding].array_size; @@ -856,8 +857,21 @@ lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type, assert(deref->deref_type == nir_deref_type_array); if (nir_src_is_const(deref->arr.index)) { - unsigned arr_index = nir_src_as_uint(deref->arr.index); - *base_index += MIN2(arr_index, array_size - 1); + unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1); + struct anv_sampler **immutable_samplers = + state->layout->set[set].layout->binding[binding].immutable_samplers; + if (immutable_samplers) { + /* Array of YCbCr samplers are tightly packed in the binding + * tables, compute the offset of an element in the array by + * adding the number of planes of all preceding elements. + */ + unsigned desc_arr_index = 0; + for (int i = 0; i < arr_index; i++) + desc_arr_index += immutable_samplers[i]->n_planes; + *base_index += desc_arr_index; + } else { + *base_index += arr_index; + } } else { /* From VK_KHR_sampler_ycbcr_conversion: * @@ -899,13 +913,100 @@ tex_instr_get_and_remove_plane_src(nir_tex_instr *tex) return plane; } +static nir_ssa_def * +build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx, + unsigned start, unsigned end) +{ + if (start == end - 1) { + return srcs[start]; + } else { + unsigned mid = start + (end - start) / 2; + return nir_bcsel(b, nir_ilt(b, idx, nir_imm_int(b, mid)), + build_def_array_select(b, srcs, idx, start, mid), + build_def_array_select(b, srcs, idx, mid, end)); + } +} + static void -lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +lower_gen7_tex_swizzle(nir_tex_instr *tex, unsigned plane, + struct apply_pipeline_layout_state *state) { - state->builder.cursor = nir_before_instr(&tex->instr); + assert(state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell); + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF || + nir_tex_instr_is_query(tex) || + tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */ + (tex->is_shadow && tex->is_new_style_shadow)) + return; + + int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + assert(deref_src_idx >= 0); + + nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src); + nir_variable *var = nir_deref_instr_get_variable(deref); + + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + const struct anv_descriptor_set_binding_layout *bind_layout = + &state->layout->set[set].layout->binding[binding]; + + if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0) + return; + + nir_builder *b = &state->builder; + b->cursor = nir_before_instr(&tex->instr); + + const unsigned plane_offset = + plane * sizeof(struct anv_texture_swizzle_descriptor); + nir_ssa_def *swiz = + build_descriptor_load(deref, plane_offset, 1, 32, state); + + b->cursor = nir_after_instr(&tex->instr); + + assert(tex->dest.ssa.bit_size == 32); + assert(tex->dest.ssa.num_components == 4); + + /* Initializing to undef is ok; nir_opt_undef will clean it up. */ + nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); + nir_ssa_def *comps[8]; + for (unsigned i = 0; i < ARRAY_SIZE(comps); i++) + comps[i] = undef; + + comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0); + if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float) + comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1); + else + comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1); + comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0); + comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1); + comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2); + comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3); + + nir_ssa_def *swiz_comps[4]; + for (unsigned i = 0; i < 4; i++) { + nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i)); + swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8); + } + nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4); + /* Rewrite uses before we insert so we don't rewrite this use */ + nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, + nir_src_for_ssa(swiz_tex_res), + swiz_tex_res->parent_instr); +} + +static void +lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state) +{ unsigned plane = tex_instr_get_and_remove_plane_src(tex); + /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this + * before we lower the derefs away so we can still find the descriptor. + */ + if (state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell) + lower_gen7_tex_swizzle(tex, plane, state); + + state->builder.cursor = nir_before_instr(&tex->instr); + lower_tex_deref(tex, nir_tex_src_texture_deref, &tex->texture_index, plane, state); diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index b9c9bfd7598..4012a6d5a71 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -400,12 +400,12 @@ populate_wm_prog_key(const struct gen_device_info *devinfo, * harmless to compute it and then let dead-code take care of it. */ if (ms_info->rasterizationSamples > 1) { - key->persample_interp = + key->persample_interp = ms_info->sampleShadingEnable && (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1; key->multisample_fbo = true; } - key->frag_coord_adds_sample_pos = ms_info->sampleShadingEnable; + key->frag_coord_adds_sample_pos = key->persample_interp; } } @@ -825,14 +825,24 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler, continue; const unsigned rt = var->data.location - FRAG_RESULT_DATA0; - /* Unused or out-of-bounds */ - if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & (1 << rt))) + /* Out-of-bounds */ + if (rt >= MAX_RTS) continue; const unsigned array_len = glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; assert(rt + array_len <= max_rt); + /* Unused */ + if (!(stage->key.wm.color_outputs_valid & BITFIELD_RANGE(rt, array_len))) { + /* If this is the RT at location 0 and we have alpha to coverage + * enabled we will have to create a null RT for it, so mark it as + * used. + */ + if (rt > 0 || !stage->key.wm.alpha_to_coverage) + continue; + } + for (unsigned i = 0; i < array_len; i++) rt_used[rt + i] = true; } @@ -843,11 +853,22 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler, continue; rt_to_bindings[i] = num_rts; - rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) { - .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, - .binding = 0, - .index = i, - }; + + if (stage->key.wm.color_outputs_valid & (1 << i)) { + rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) { + .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, + .binding = 0, + .index = i, + }; + } else { + /* Setup a null render target */ + rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) { + .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, + .binding = 0, + .index = UINT32_MAX, + }; + } + num_rts++; } @@ -857,9 +878,11 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler, continue; const unsigned rt = var->data.location - FRAG_RESULT_DATA0; - if (rt >= MAX_RTS || - !(stage->key.wm.color_outputs_valid & (1 << rt))) { - /* Unused or out-of-bounds, throw it away */ + + if (rt >= MAX_RTS || !rt_used[rt]) { + /* Unused or out-of-bounds, throw it away, unless it is the first + * RT and we have alpha to coverage enabled. + */ deleted_output = true; var->data.mode = nir_var_function_temp; exec_node_remove(&var->node); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 8727d56d90f..e86561adf97 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -744,7 +744,7 @@ struct anv_state_table { struct anv_free_entry *map; uint32_t size; struct anv_block_state state; - struct u_vector mmap_cleanups; + struct u_vector cleanups; }; struct anv_state_pool { @@ -1548,6 +1548,17 @@ struct anv_sampled_image_descriptor { uint32_t sampler; }; +struct anv_texture_swizzle_descriptor { + /** Texture swizzle + * + * See also nir_intrinsic_channel_select_intel + */ + uint8_t swizzle[4]; + + /** Unused padding to ensure the struct is a multiple of 64 bits */ + uint32_t _pad; +}; + /** Struct representing a storage image descriptor */ struct anv_storage_image_descriptor { /** Bindless image handles @@ -1589,6 +1600,8 @@ enum anv_descriptor_data { ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6), /** Storage image handles */ ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7), + /** Storage image handles */ + ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8), }; struct anv_descriptor_set_binding_layout { @@ -2157,12 +2170,6 @@ struct anv_xfb_binding { #define ANV_PARAM_DYN_OFFSET_IDX(param) ((param) & 0xffff) struct anv_push_constants { - /* Current allocated size of this push constants data structure. - * Because a decent chunk of it may not be used (images on SKL, for - * instance), we won't actually allocate the entire structure up-front. - */ - uint32_t size; - /* Push constant data provided by the client through vkPushConstants */ uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE]; @@ -2345,7 +2352,7 @@ struct anv_cmd_state { bool xfb_enabled; struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS]; VkShaderStageFlags push_constant_stages; - struct anv_push_constants * push_constants[MESA_SHADER_STAGES]; + struct anv_push_constants push_constants[MESA_SHADER_STAGES]; struct anv_state binding_tables[MESA_SHADER_STAGES]; struct anv_state samplers[MESA_SHADER_STAGES]; @@ -2465,14 +2472,6 @@ VkResult anv_cmd_buffer_execbuf(struct anv_device *device, VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer); -VkResult -anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer, - gl_shader_stage stage, uint32_t size); -#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \ - anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \ - (offsetof(struct anv_push_constants, field) + \ - sizeof(cmd_buffer->state.push_constants[0]->field))) - struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer, const void *data, uint32_t size, uint32_t alignment); struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, @@ -3201,7 +3200,13 @@ anv_can_sample_with_hiz(const struct gen_device_info * const devinfo, if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) return false; - if (devinfo->gen < 8) + /* Allow this feature on BDW even though it is disabled in the BDW devinfo + * struct. There's documentation which suggests that this feature actually + * reduces performance on BDW, but it has only been observed to help so + * far. Sampling fast-cleared blocks on BDW must also be handled with care + * (see depth_stencil_attachment_compute_aux_usage() for more info). + */ + if (devinfo->gen != 8 && !devinfo->has_sample_with_hiz) return false; return image->samples == 1; diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index dcefed9e4dc..92b7c6f3ff8 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -27,7 +27,6 @@ #include #include -#include #include "anv_private.h" #include "vk_util.h" diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 380283bdd56..115d12b3536 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -286,41 +286,3 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, { /* The NP PMA fix doesn't exist on gen7 */ } - -void genX(CmdSetEvent)( - VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - anv_finishme("Implement events on gen7"); -} - -void genX(CmdResetEvent)( - VkCommandBuffer commandBuffer, - VkEvent event, - VkPipelineStageFlags stageMask) -{ - anv_finishme("Implement events on gen7"); -} - -void genX(CmdWaitEvents)( - VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) -{ - anv_finishme("Implement events on gen7"); - - genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, - false, /* byRegion */ - memoryBarrierCount, pMemoryBarriers, - bufferMemoryBarrierCount, pBufferMemoryBarriers, - imageMemoryBarrierCount, pImageMemoryBarriers); -} diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index 6568d2c7511..762cc373cb6 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -355,6 +355,8 @@ want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer) */ const bool stc_write_en = (ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + (cmd_buffer->state.gfx.dynamic.stencil_write_mask.front || + cmd_buffer->state.gfx.dynamic.stencil_write_mask.back) && pipeline->writes_stencil; /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */ @@ -565,105 +567,3 @@ void genX(CmdBindIndexBuffer)( cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER; } - -/* Set of stage bits for which are pipelined, i.e. they get queued by the - * command streamer for later execution. - */ -#define ANV_PIPELINE_STAGE_PIPELINED_BITS \ - (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | \ - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | \ - VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | \ - VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | \ - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | \ - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | \ - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | \ - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | \ - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | \ - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | \ - VK_PIPELINE_STAGE_TRANSFER_BIT | \ - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | \ - VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | \ - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) - -void genX(CmdSetEvent)( - VkCommandBuffer commandBuffer, - VkEvent _event, - VkPipelineStageFlags stageMask) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_event, event, _event); - - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { - pc.StallAtPixelScoreboard = true; - pc.CommandStreamerStallEnable = true; - } - - pc.DestinationAddressType = DAT_PPGTT, - pc.PostSyncOperation = WriteImmediateData, - pc.Address = (struct anv_address) { - cmd_buffer->device->dynamic_state_pool.block_pool.bo, - event->state.offset - }; - pc.ImmediateData = VK_EVENT_SET; - } -} - -void genX(CmdResetEvent)( - VkCommandBuffer commandBuffer, - VkEvent _event, - VkPipelineStageFlags stageMask) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_event, event, _event); - - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { - pc.StallAtPixelScoreboard = true; - pc.CommandStreamerStallEnable = true; - } - - pc.DestinationAddressType = DAT_PPGTT; - pc.PostSyncOperation = WriteImmediateData; - pc.Address = (struct anv_address) { - cmd_buffer->device->dynamic_state_pool.block_pool.bo, - event->state.offset - }; - pc.ImmediateData = VK_EVENT_RESET; - } -} - -void genX(CmdWaitEvents)( - VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - for (uint32_t i = 0; i < eventCount; i++) { - ANV_FROM_HANDLE(anv_event, event, pEvents[i]); - - anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), sem) { - sem.WaitMode = PollingMode, - sem.CompareOperation = COMPARE_SAD_EQUAL_SDD, - sem.SemaphoreDataDword = VK_EVENT_SET, - sem.SemaphoreAddress = (struct anv_address) { - cmd_buffer->device->dynamic_state_pool.block_pool.bo, - event->state.offset - }; - } - } - - genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, - false, /* byRegion */ - memoryBarrierCount, pMemoryBarriers, - bufferMemoryBarrierCount, pBufferMemoryBarriers, - imageMemoryBarrierCount, pImageMemoryBarriers); -} diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 1af36bced24..dc1117f80ad 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -76,6 +76,8 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) sba.GeneralStateMOCS = GENX(MOCS); sba.GeneralStateBaseAddressModifyEnable = true; + sba.StatelessDataPortAccessMOCS = GENX(MOCS); + sba.SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer); sba.SurfaceStateMOCS = GENX(MOCS); @@ -108,6 +110,23 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) sba.IndirectObjectBufferSizeModifyEnable = true; sba.InstructionBufferSize = 0xfffff; sba.InstructionBuffersizeModifyEnable = true; +# else + /* On gen7, we have upper bounds instead. According to the docs, + * setting an upper bound of zero means that no bounds checking is + * performed so, in theory, we should be able to leave them zero. + * However, border color is broken and the GPU bounds-checks anyway. + * To avoid this and other potential problems, we may as well set it + * for everything. + */ + sba.GeneralStateAccessUpperBound = + (struct anv_address) { .bo = NULL, .offset = 0xfffff000 }; + sba.GeneralStateAccessUpperBoundModifyEnable = true; + sba.DynamicStateAccessUpperBound = + (struct anv_address) { .bo = NULL, .offset = 0xfffff000 }; + sba.DynamicStateAccessUpperBoundModifyEnable = true; + sba.InstructionAccessUpperBound = + (struct anv_address) { .bo = NULL, .offset = 0xfffff000 }; + sba.InstructionAccessUpperBoundModifyEnable = true; # endif # if (GEN_GEN >= 9) if (cmd_buffer->device->instance->physicalDevice.use_softpin) { @@ -762,27 +781,21 @@ init_fast_clear_color(struct anv_cmd_buffer *cmd_buffer, set_image_fast_clear_state(cmd_buffer, image, aspect, ANV_FAST_CLEAR_NONE); - /* The fast clear value dword(s) will be copied into a surface state object. - * Ensure that the restrictions of the fields in the dword(s) are followed. - * - * CCS buffers on SKL+ can have any value set for the clear colors. - */ - if (image->samples == 1 && GEN_GEN >= 9) - return; - - /* Other combinations of auxiliary buffers and platforms require specific - * values in the clear value dword(s). + /* Initialize the struct fields that are accessed for fast-clears so that + * the HW restrictions on the field values are satisfied. */ struct anv_address addr = anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect); if (GEN_GEN >= 9) { - for (unsigned i = 0; i < 4; i++) { + const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev; + const unsigned num_dwords = GEN_GEN >= 10 ? + isl_dev->ss.clear_color_state_size / 4 : + isl_dev->ss.clear_value_size / 4; + for (unsigned i = 0; i < num_dwords; i++) { anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { sdi.Address = addr; sdi.Address.offset += i * 4; - /* MCS buffers on SKL+ can only have 1/0 clear colors. */ - assert(image->samples > 1); sdi.ImmediateData = 0; } } @@ -3535,16 +3548,8 @@ anv_cmd_buffer_push_base_group_id(struct anv_cmd_buffer *cmd_buffer, if (anv_batch_has_error(&cmd_buffer->batch)) return; - VkResult result = - anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, MESA_SHADER_COMPUTE, - base_work_group_id); - if (result != VK_SUCCESS) { - cmd_buffer->batch.status = result; - return; - } - struct anv_push_constants *push = - cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; + &cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE]; if (push->base_work_group_id[0] != baseGroupX || push->base_work_group_id[1] != baseGroupY || push->base_work_group_id[2] != baseGroupZ) { @@ -3750,6 +3755,25 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer, anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), t); #endif +#if GEN_GEN == 9 + if (pipeline == _3D) { + /* There is a mid-object preemption workaround which requires you to + * re-emit MEDIA_VFE_STATE after switching from GPGPU to 3D. However, + * even without preemption, we have issues with geometry flickering when + * GPGPU and 3D are back-to-back and this seems to fix it. We don't + * really know why. + */ + const uint32_t subslices = + MAX2(cmd_buffer->device->instance->physicalDevice.subslice_total, 1); + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_VFE_STATE), vfe) { + vfe.MaximumNumberofThreads = + devinfo->max_cs_threads * subslices - 1; + vfe.NumberofURBEntries = 2; + vfe.URBEntryAllocationSize = 2; + } + } +#endif + /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] * PIPELINE_SELECT [DevBWR+]": * @@ -4743,3 +4767,110 @@ void genX(CmdEndConditionalRenderingEXT)( cmd_state->conditional_render_enabled = false; } #endif + +/* Set of stage bits for which are pipelined, i.e. they get queued by the + * command streamer for later execution. + */ +#define ANV_PIPELINE_STAGE_PIPELINED_BITS \ + (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | \ + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | \ + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | \ + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | \ + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | \ + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | \ + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | \ + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | \ + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | \ + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | \ + VK_PIPELINE_STAGE_TRANSFER_BIT | \ + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | \ + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | \ + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) + +void genX(CmdSetEvent)( + VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { + pc.StallAtPixelScoreboard = true; + pc.CommandStreamerStallEnable = true; + } + + pc.DestinationAddressType = DAT_PPGTT, + pc.PostSyncOperation = WriteImmediateData, + pc.Address = (struct anv_address) { + cmd_buffer->device->dynamic_state_pool.block_pool.bo, + event->state.offset + }; + pc.ImmediateData = VK_EVENT_SET; + } +} + +void genX(CmdResetEvent)( + VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_event, event, _event); + + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { + pc.StallAtPixelScoreboard = true; + pc.CommandStreamerStallEnable = true; + } + + pc.DestinationAddressType = DAT_PPGTT; + pc.PostSyncOperation = WriteImmediateData; + pc.Address = (struct anv_address) { + cmd_buffer->device->dynamic_state_pool.block_pool.bo, + event->state.offset + }; + pc.ImmediateData = VK_EVENT_RESET; + } +} + +void genX(CmdWaitEvents)( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ +#if GEN_GEN >= 8 + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + for (uint32_t i = 0; i < eventCount; i++) { + ANV_FROM_HANDLE(anv_event, event, pEvents[i]); + + anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), sem) { + sem.WaitMode = PollingMode, + sem.CompareOperation = COMPARE_SAD_EQUAL_SDD, + sem.SemaphoreDataDword = VK_EVENT_SET, + sem.SemaphoreAddress = (struct anv_address) { + cmd_buffer->device->dynamic_state_pool.block_pool.bo, + event->state.offset + }; + } + } +#else + anv_finishme("Implement events on gen7"); +#endif + + genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, + false, /* byRegion */ + memoryBarrierCount, pMemoryBarriers, + bufferMemoryBarrierCount, pBufferMemoryBarriers, + imageMemoryBarrierCount, pImageMemoryBarriers); +} diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 0b58dce05b0..8db2e1e6aa0 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1075,7 +1075,7 @@ emit_3dstate_clip(struct anv_pipeline *pipeline, clip.ClipEnable = true; clip.StatisticsEnable = true; clip.EarlyCullEnable = true; - clip.APIMode = APIMODE_D3D, + clip.APIMode = APIMODE_D3D; clip.ViewportXYClipTestEnable = true; #if GEN_GEN >= 8 diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 146435c3f8f..aa0cf8b9471 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -346,14 +346,23 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer, } static void -emit_query_availability(struct anv_cmd_buffer *cmd_buffer, - struct anv_address addr) +emit_query_mi_availability(struct gen_mi_builder *b, + struct anv_address addr, + bool available) +{ + gen_mi_store(b, gen_mi_mem64(addr), gen_mi_imm(available)); +} + +static void +emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer, + struct anv_address addr, + bool available) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { pc.DestinationAddressType = DAT_PPGTT; pc.PostSyncOperation = WriteImmediateData; pc.Address = addr; - pc.ImmediateData = 1; + pc.ImmediateData = available; } } @@ -366,11 +375,39 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer, struct gen_mi_builder *b, struct anv_query_pool *pool, uint32_t first_index, uint32_t num_queries) { - for (uint32_t i = 0; i < num_queries; i++) { - struct anv_address slot_addr = - anv_query_address(pool, first_index + i); - gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8); - emit_query_availability(cmd_buffer, slot_addr); + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + /* These queries are written with a PIPE_CONTROL so clear them using the + * PIPE_CONTROL as well so we don't have to synchronize between 2 types + * of operations. + */ + assert((pool->stride % 8) == 0); + for (uint32_t i = 0; i < num_queries; i++) { + struct anv_address slot_addr = + anv_query_address(pool, first_index + i); + + for (uint32_t qword = 1; qword < (pool->stride / 8); qword++) { + emit_query_pc_availability(cmd_buffer, + anv_address_add(slot_addr, qword * 8), + false); + } + emit_query_pc_availability(cmd_buffer, slot_addr, true); + } + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + for (uint32_t i = 0; i < num_queries; i++) { + struct anv_address slot_addr = + anv_query_address(pool, first_index + i); + gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8); + emit_query_mi_availability(b, slot_addr, true); + } + break; + + default: + unreachable("Unsupported query type"); } } @@ -383,11 +420,28 @@ void genX(CmdResetQueryPool)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_query_pool, pool, queryPool); - for (uint32_t i = 0; i < queryCount; i++) { - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) { - sdm.Address = anv_query_address(pool, firstQuery + i); - sdm.ImmediateData = 0; + switch (pool->type) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + for (uint32_t i = 0; i < queryCount; i++) { + emit_query_pc_availability(cmd_buffer, + anv_query_address(pool, firstQuery + i), + false); } + break; + + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: { + struct gen_mi_builder b; + gen_mi_builder_init(&b, &cmd_buffer->batch); + + for (uint32_t i = 0; i < queryCount; i++) + emit_query_mi_availability(&b, anv_query_address(pool, firstQuery + i), false); + break; + } + + default: + unreachable("Unsupported query type"); } } @@ -525,7 +579,7 @@ void genX(CmdEndQueryIndexedEXT)( switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 16)); - emit_query_availability(cmd_buffer, query_addr); + emit_query_pc_availability(cmd_buffer, query_addr, true); break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: { @@ -543,7 +597,7 @@ void genX(CmdEndQueryIndexedEXT)( offset += 16; } - emit_query_availability(cmd_buffer, query_addr); + emit_query_mi_availability(&b, query_addr, true); break; } @@ -554,7 +608,7 @@ void genX(CmdEndQueryIndexedEXT)( } emit_xfb_query(&b, index, anv_address_add(query_addr, 16)); - emit_query_availability(cmd_buffer, query_addr); + emit_query_mi_availability(&b, query_addr, true); break; default: @@ -613,7 +667,7 @@ void genX(CmdWriteTimestamp)( break; } - emit_query_availability(cmd_buffer, query_addr); + emit_query_pc_availability(cmd_buffer, query_addr, true); /* When multiview is active the spec requires that N consecutive query * indices are used, where N is the number of active views in the subpass. @@ -684,7 +738,20 @@ void genX(CmdCopyQueryPoolResults)( } if ((flags & VK_QUERY_RESULT_WAIT_BIT) || - (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS)) { + (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS) || + /* Occlusion & timestamp queries are written using a PIPE_CONTROL and + * because we're about to copy values from MI commands, we need to + * stall the command streamer to make sure the PIPE_CONTROL values have + * landed, otherwise we could see inconsistent values & availability. + * + * From the vulkan spec: + * + * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of + * previous uses of vkCmdResetQueryPool in the same queue, without + * any additional synchronization." + */ + pool->type == VK_QUERY_TYPE_OCCLUSION || + pool->type == VK_QUERY_TYPE_TIMESTAMP) { cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); } diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c index 9276dc9470b..c2266b68207 100644 --- a/src/intel/vulkan/genX_state.c +++ b/src/intel/vulkan/genX_state.c @@ -200,18 +200,6 @@ genX(init_device_state)(struct anv_device *device) lri.DataDWord = half_slice_chicken7; } - /* WA_2204188704: Pixel Shader Panic dispatch must be disabled. - */ - uint32_t common_slice_chicken3; - anv_pack_struct(&common_slice_chicken3, GENX(COMMON_SLICE_CHICKEN3), - .PSThreadPanicDispatch = 0x3, - .PSThreadPanicDispatchMask = 0x3); - - anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) { - lri.RegisterOffset = GENX(COMMON_SLICE_CHICKEN3_num); - lri.DataDWord = common_slice_chicken3; - } - /* WaEnableStateCacheRedirectToCS:icl */ uint32_t slice_common_eco_chicken1; anv_pack_struct(&slice_common_eco_chicken1, diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index c12040c84eb..d0120d8026c 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -98,20 +98,20 @@ foreach g : [['70', ['gen7_cmd_buffer.c']], ['75', ['gen7_cmd_buffer.c']], 'anv_gen@0@'.format(_gen), [anv_gen_files, g[1], anv_entrypoints[0], anv_extensions_h], include_directories : [ - inc_common, inc_compiler, inc_include, inc_intel, inc_vulkan_util, - inc_vulkan_wsi, + inc_common, inc_compiler, inc_include, inc_intel, inc_vulkan_wsi, ], c_args : [ c_vis_args, no_override_init_args, c_sse2_args, '-DGEN_VERSIONx10=@0@'.format(_gen), ], - dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml], + dependencies : [ + dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml, idep_vulkan_util_headers, + ], ) endforeach libanv_files = files( 'anv_allocator.c', - 'anv_android_stubs.c', 'anv_android.h', 'anv_batch_chain.c', 'anv_blorp.c', @@ -144,6 +144,7 @@ anv_deps = [ dep_libdrm, dep_valgrind, idep_nir_headers, + idep_vulkan_util_headers, ] anv_flags = [ c_vis_args, @@ -176,6 +177,14 @@ if with_xlib_lease anv_flags += '-DVK_USE_PLATFORM_XLIB_XRANDR_EXT' endif +if with_platform_android + anv_deps += dep_android + anv_flags += '-DVK_USE_PLATFORM_ANDROID_KHR' + libanv_files += files('anv_android.c') +else + libanv_files += files('anv_android_stubs.c') +endif + libanv_common = static_library( 'anv_common', [ @@ -183,7 +192,7 @@ libanv_common = static_library( gen_xml_pack, ], include_directories : [ - inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_util, + inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_wsi, ], c_args : anv_flags, @@ -194,16 +203,15 @@ libvulkan_intel = shared_library( 'vulkan_intel', [files('anv_gem.c'), anv_entrypoints[0], anv_extensions_h], include_directories : [ - inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_util, - inc_vulkan_wsi, + inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_wsi, ], link_whole : [libanv_common, libanv_gen_libs], link_with : [ libintel_compiler, libintel_common, libintel_dev, libisl, libblorp, - libvulkan_util, libvulkan_wsi, libmesa_util, + libvulkan_wsi, libmesa_util, ], dependencies : [ - dep_thread, dep_dl, dep_m, anv_deps, idep_nir, idep_genxml, + dep_thread, dep_dl, dep_m, anv_deps, idep_nir, idep_genxml, idep_vulkan_util ], c_args : anv_flags, link_args : ['-Wl,--build-id=sha1', ld_args_bsymbolic, ld_args_gc_sections], @@ -215,23 +223,22 @@ if with_tests 'vulkan_intel_test', [files('anv_gem_stubs.c'), anv_entrypoints[0], anv_extensions_h], include_directories : [ - inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_util, - inc_vulkan_wsi, + inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_wsi, ], link_whole : libanv_common, link_with : [ libanv_gen_libs, libintel_compiler, libintel_common, libintel_dev, - libisl, libblorp, libvulkan_util, libvulkan_wsi, libmesa_util, + libisl, libblorp, libvulkan_wsi, libmesa_util, ], dependencies : [ - dep_thread, dep_dl, dep_m, anv_deps, idep_nir, + dep_thread, dep_dl, dep_m, anv_deps, idep_nir, idep_vulkan_util ], c_args : anv_flags, ) - foreach t : ['block_pool_no_free', 'state_pool_no_free', - 'state_pool_free_list_only', 'state_pool', - 'state_pool_padding'] + foreach t : ['block_pool_no_free', 'block_pool_grow_first', + 'state_pool_no_free', 'state_pool_free_list_only', + 'state_pool', 'state_pool_padding'] test( 'anv_@0@'.format(t), executable( @@ -239,9 +246,9 @@ if with_tests ['tests/@0@.c'.format(t), anv_entrypoints[0], anv_extensions_h], c_args : [ c_sse2_args ], link_with : libvulkan_intel_test, - dependencies : [dep_libdrm, dep_thread, dep_m, dep_valgrind], + dependencies : [dep_libdrm, dep_thread, dep_m, dep_valgrind, idep_vulkan_util, ], include_directories : [ - inc_common, inc_intel, inc_compiler, inc_vulkan_util, inc_vulkan_wsi, + inc_common, inc_intel, inc_compiler, inc_vulkan_wsi, ], ), suite : ['intel'], diff --git a/src/intel/vulkan/tests/block_pool_grow_first.c b/src/intel/vulkan/tests/block_pool_grow_first.c new file mode 100644 index 00000000000..aea12b29de8 --- /dev/null +++ b/src/intel/vulkan/tests/block_pool_grow_first.c @@ -0,0 +1,63 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#undef NDEBUG + +#include "anv_private.h" + +int main(int argc, char **argv) +{ + struct anv_instance instance; + struct anv_device device = { + .instance = &instance, + }; + struct anv_block_pool pool; + + /* Create a pool with initial size smaller than the block allocated, so + * that it must grow in the first allocation. + */ + const uint32_t block_size = 16 * 1024; + const uint32_t initial_size = block_size / 2; + + anv_block_pool_init(&pool, &device, 4096, initial_size, EXEC_OBJECT_PINNED); + assert(pool.size == initial_size); + + uint32_t padding; + int32_t offset = anv_block_pool_alloc(&pool, block_size, &padding); + + /* Pool will have grown at least space to fit the new allocation. */ + assert(pool.size > initial_size); + assert(pool.size >= initial_size + block_size); + + /* The whole initial size is considered padding and the allocation should be + * right next to it. + */ + assert(padding == initial_size); + assert(offset == initial_size); + + /* Use the memory to ensure it is valid. */ + void *map = anv_block_pool_map(&pool, offset); + memset(map, 22, block_size); + + anv_block_pool_finish(&pool); +} diff --git a/src/intel/vulkan/vk_format_info.h b/src/intel/vulkan/vk_format_info.h index 2e126645763..fe88773cda9 100644 --- a/src/intel/vulkan/vk_format_info.h +++ b/src/intel/vulkan/vk_format_info.h @@ -32,8 +32,13 @@ /* See i915_private_android_types.h in minigbm. */ #define HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL 0x100 +enum { + /* Usage bit equal to GRALLOC_USAGE_HW_CAMERA_MASK */ + AHARDWAREBUFFER_USAGE_CAMERA_MASK = 0x00060000U, +}; + static inline VkFormat -vk_format_from_android(unsigned android_format) +vk_format_from_android(unsigned android_format, unsigned android_usage) { switch (android_format) { case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM: @@ -49,6 +54,11 @@ vk_format_from_android(unsigned android_format) return VK_FORMAT_A2B10G10R10_UNORM_PACK32; case HAL_PIXEL_FORMAT_NV12_Y_TILED_INTEL: return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; + case AHARDWAREBUFFER_FORMAT_IMPLEMENTATION_DEFINED: + if (android_usage & AHARDWAREBUFFER_USAGE_CAMERA_MASK) + return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; + else + return VK_FORMAT_R8G8B8_UNORM; case AHARDWAREBUFFER_FORMAT_BLOB: default: return VK_FORMAT_UNDEFINED; diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c index 7ef59f0477b..6b611e18ae9 100644 --- a/src/loader/loader_dri3_helper.c +++ b/src/loader/loader_dri3_helper.c @@ -1120,6 +1120,9 @@ dri3_cpp_for_format(uint32_t format) { case __DRI_IMAGE_FORMAT_SARGB8: case __DRI_IMAGE_FORMAT_SABGR8: return 4; + case __DRI_IMAGE_FORMAT_XBGR16161616F: + case __DRI_IMAGE_FORMAT_ABGR16161616F: + return 8; case __DRI_IMAGE_FORMAT_NONE: default: return 0; @@ -1178,6 +1181,8 @@ image_format_to_fourcc(int format) case __DRI_IMAGE_FORMAT_ARGB2101010: return __DRI_IMAGE_FOURCC_ARGB2101010; case __DRI_IMAGE_FORMAT_XBGR2101010: return __DRI_IMAGE_FOURCC_XBGR2101010; case __DRI_IMAGE_FORMAT_ABGR2101010: return __DRI_IMAGE_FOURCC_ABGR2101010; + case __DRI_IMAGE_FORMAT_XBGR16161616F: return __DRI_IMAGE_FOURCC_XBGR16161616F; + case __DRI_IMAGE_FORMAT_ABGR16161616F: return __DRI_IMAGE_FOURCC_ABGR16161616F; } return 0; } diff --git a/src/mapi/glapi/gen/gl_XML.py b/src/mapi/glapi/gen/gl_XML.py index 478f82ca314..2854a9a5688 100644 --- a/src/mapi/glapi/gen/gl_XML.py +++ b/src/mapi/glapi/gen/gl_XML.py @@ -49,7 +49,7 @@ def parse_GL_API( file_name, factory = None ): # that are not part of the ABI. for func in api.functionIterateByCategory(): - if func.assign_offset: + if func.assign_offset and func.offset < 0: func.offset = api.next_offset; api.next_offset += 1 @@ -683,8 +683,12 @@ def process_element(self, element): if name in static_data.offsets and static_data.offsets[name] <= static_data.MAX_OFFSETS: self.offset = static_data.offsets[name] + elif name in static_data.offsets and static_data.offsets[name] > static_data.MAX_OFFSETS: + self.offset = static_data.offsets[name] + self.assign_offset = True else: - self.offset = -1 + if self.exec_flavor != "skip": + raise RuntimeError("Entry-point %s is missing offset in static_data.py. Add one at the bottom of the list." % (name)) self.assign_offset = self.exec_flavor != "skip" or name in static_data.unused_functions if not self.name: diff --git a/src/mapi/glapi/gen/gl_gentable.py b/src/mapi/glapi/gen/gl_gentable.py index 9d8923cf8db..92e1a546cff 100644 --- a/src/mapi/glapi/gen/gl_gentable.py +++ b/src/mapi/glapi/gen/gl_gentable.py @@ -45,7 +45,7 @@ #endif #if (defined(GLXEXT) && defined(HAVE_BACKTRACE)) \\ - || (!defined(GLXEXT) && defined(DEBUG) && !defined(__CYGWIN__) && !defined(__MINGW32__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)) + || (!defined(GLXEXT) && defined(DEBUG) && defined(HAVE_EXECINFO_H)) #define USE_BACKTRACE #endif diff --git a/src/mapi/glapi/gen/static_data.py b/src/mapi/glapi/gen/static_data.py index bc49324348f..5044e0f78cf 100644 --- a/src/mapi/glapi/gen/static_data.py +++ b/src/mapi/glapi/gen/static_data.py @@ -29,7 +29,7 @@ """Table of functions that have ABI-mandated offsets in the dispatch table. The first MAX_OFFSETS entries are required by indirect GLX. The rest are -required to preserve the glapi <> drivers ABI. This is to be addressed shortly. +required to preserve the glapi <> GL/GLES ABI. This is to be addressed shortly. This list will never change.""" offsets = { @@ -1453,6 +1453,7 @@ "TexParameterxv": 1417, "BlendBarrier": 1418, "PrimitiveBoundingBox": 1419, + "MaxShaderCompilerThreadsKHR": 1420, } functions = [ diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 3515e312023..0a8b9bb885d 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -104,6 +104,8 @@ static void cleanup_temp_texture(struct gl_context *ctx, struct temp_texture *tex); static void meta_glsl_clear_cleanup(struct gl_context *ctx, struct clear_state *clear); +static void meta_copypix_cleanup(struct gl_context *ctx, + struct copypix_state *copypix); static void meta_decompress_cleanup(struct gl_context *ctx, struct decompress_state *decompress); static void meta_drawpix_cleanup(struct gl_context *ctx, @@ -422,6 +424,7 @@ _mesa_meta_free(struct gl_context *ctx) _mesa_make_current(ctx, NULL, NULL); _mesa_meta_glsl_blit_cleanup(ctx, &ctx->Meta->Blit); meta_glsl_clear_cleanup(ctx, &ctx->Meta->Clear); + meta_copypix_cleanup(ctx, &ctx->Meta->CopyPix); _mesa_meta_glsl_generate_mipmap_cleanup(ctx, &ctx->Meta->Mipmap); cleanup_temp_texture(ctx, &ctx->Meta->TempTex); meta_decompress_cleanup(ctx, &ctx->Meta->Decompress); @@ -1465,6 +1468,8 @@ _mesa_meta_setup_drawpix_texture(struct gl_context *ctx, /* load image */ _mesa_TexSubImage2D(tex->Target, 0, 0, 0, width, height, format, type, pixels); + + _mesa_reference_buffer_object(ctx, &save_unpack_obj, NULL); } } else { @@ -1595,6 +1600,17 @@ meta_glsl_clear_cleanup(struct gl_context *ctx, struct clear_state *clear) } } +static void +meta_copypix_cleanup(struct gl_context *ctx, struct copypix_state *copypix) +{ + if (copypix->VAO == 0) + return; + _mesa_DeleteVertexArrays(1, ©pix->VAO); + copypix->VAO = 0; + _mesa_reference_buffer_object(ctx, ©pix->buf_obj, NULL); +} + + /** * Given a bitfield of BUFFER_BIT_x draw buffers, call glDrawBuffers to * set GL to only draw to those buffers. diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index ac3a04bceff..a384cadd557 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -886,6 +886,14 @@ static const struct { .image_format = __DRI_IMAGE_FORMAT_XRGB8888, .mesa_format = MESA_FORMAT_B8G8R8X8_UNORM, }, + { + .image_format = __DRI_IMAGE_FORMAT_ABGR16161616F, + .mesa_format = MESA_FORMAT_RGBA_FLOAT16, + }, + { + .image_format = __DRI_IMAGE_FORMAT_XBGR16161616F, + .mesa_format = MESA_FORMAT_RGBX_FLOAT16, + }, { .image_format = __DRI_IMAGE_FORMAT_ARGB2101010, .mesa_format = MESA_FORMAT_B10G10R10A2_UNORM, diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index 5a66bcf8e05..f9a4acefed1 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -181,28 +181,47 @@ driCreateConfigs(mesa_format format, GLboolean enable_accum, GLboolean color_depth_match, GLboolean mutable_render_buffer) { - static const uint32_t masks_table[][4] = { + static const struct { + uint32_t masks[4]; + int shifts[4]; + } format_table[] = { /* MESA_FORMAT_B5G6R5_UNORM */ - { 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 }, + {{ 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 }, + { 11, 5, 0, -1 }}, /* MESA_FORMAT_B8G8R8X8_UNORM */ - { 0x00FF0000, 0x0000FF00, 0x000000FF, 0x00000000 }, + {{ 0x00FF0000, 0x0000FF00, 0x000000FF, 0x00000000 }, + { 16, 8, 0, -1 }}, /* MESA_FORMAT_B8G8R8A8_UNORM */ - { 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000 }, + {{ 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000 }, + { 16, 8, 0, 24 }}, /* MESA_FORMAT_B10G10R10X2_UNORM */ - { 0x3FF00000, 0x000FFC00, 0x000003FF, 0x00000000 }, + {{ 0x3FF00000, 0x000FFC00, 0x000003FF, 0x00000000 }, + { 20, 10, 0, -1 }}, /* MESA_FORMAT_B10G10R10A2_UNORM */ - { 0x3FF00000, 0x000FFC00, 0x000003FF, 0xC0000000 }, + {{ 0x3FF00000, 0x000FFC00, 0x000003FF, 0xC0000000 }, + { 20, 10, 0, 30 }}, /* MESA_FORMAT_R8G8B8A8_UNORM */ - { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 }, + {{ 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 }, + { 0, 8, 16, 24 }}, /* MESA_FORMAT_R8G8B8X8_UNORM */ - { 0x000000FF, 0x0000FF00, 0x00FF0000, 0x00000000 }, + {{ 0x000000FF, 0x0000FF00, 0x00FF0000, 0x00000000 }, + { 0, 8, 16, -1 }}, /* MESA_FORMAT_R10G10B10X2_UNORM */ - { 0x000003FF, 0x000FFC00, 0x3FF00000, 0x00000000 }, + {{ 0x000003FF, 0x000FFC00, 0x3FF00000, 0x00000000 }, + { 0, 10, 20, -1 }}, /* MESA_FORMAT_R10G10B10A2_UNORM */ - { 0x000003FF, 0x000FFC00, 0x3FF00000, 0xC0000000 }, + {{ 0x000003FF, 0x000FFC00, 0x3FF00000, 0xC0000000 }, + { 0, 10, 20, 30 }}, + /* MESA_FORMAT_RGBX_FLOAT16 */ + {{ 0, 0, 0, 0}, + { 0, 16, 32, -1 }}, + /* MESA_FORMAT_RGBA_FLOAT16 */ + {{ 0, 0, 0, 0}, + { 0, 16, 32, 48 }}, }; const uint32_t * masks; + const int * shifts; __DRIconfig **configs, **c; struct gl_config *modes; unsigned i, j, k, h; @@ -213,37 +232,55 @@ driCreateConfigs(mesa_format format, int blue_bits; int alpha_bits; bool is_srgb; + bool is_float; switch (format) { case MESA_FORMAT_B5G6R5_UNORM: - masks = masks_table[0]; + masks = format_table[0].masks; + shifts = format_table[0].shifts; break; case MESA_FORMAT_B8G8R8X8_UNORM: case MESA_FORMAT_B8G8R8X8_SRGB: - masks = masks_table[1]; + masks = format_table[1].masks; + shifts = format_table[1].shifts; break; case MESA_FORMAT_B8G8R8A8_UNORM: case MESA_FORMAT_B8G8R8A8_SRGB: - masks = masks_table[2]; + masks = format_table[2].masks; + shifts = format_table[2].shifts; break; case MESA_FORMAT_R8G8B8A8_UNORM: case MESA_FORMAT_R8G8B8A8_SRGB: - masks = masks_table[5]; + masks = format_table[5].masks; + shifts = format_table[5].shifts; break; case MESA_FORMAT_R8G8B8X8_UNORM: - masks = masks_table[6]; + masks = format_table[6].masks; + shifts = format_table[6].shifts; break; case MESA_FORMAT_B10G10R10X2_UNORM: - masks = masks_table[3]; + masks = format_table[3].masks; + shifts = format_table[3].shifts; break; case MESA_FORMAT_B10G10R10A2_UNORM: - masks = masks_table[4]; + masks = format_table[4].masks; + shifts = format_table[4].shifts; + break; + case MESA_FORMAT_RGBX_FLOAT16: + masks = format_table[9].masks; + shifts = format_table[9].shifts; + break; + case MESA_FORMAT_RGBA_FLOAT16: + masks = format_table[10].masks; + shifts = format_table[10].shifts; break; case MESA_FORMAT_R10G10B10X2_UNORM: - masks = masks_table[7]; + masks = format_table[7].masks; + shifts = format_table[7].shifts; break; case MESA_FORMAT_R10G10B10A2_UNORM: - masks = masks_table[8]; + masks = format_table[8].masks; + shifts = format_table[8].shifts; break; default: fprintf(stderr, "[%s:%u] Unknown framebuffer type %s (%d).\n", @@ -257,6 +294,7 @@ driCreateConfigs(mesa_format format, blue_bits = _mesa_get_format_bits(format, GL_BLUE_BITS); alpha_bits = _mesa_get_format_bits(format, GL_ALPHA_BITS); is_srgb = _mesa_get_format_color_encoding(format) == GL_SRGB; + is_float = _mesa_get_format_datatype(format) == GL_FLOAT; num_modes = num_depth_stencil_bits * num_db_modes * num_accum_bits * num_msaa_modes; configs = calloc(num_modes + 1, sizeof *configs); @@ -286,6 +324,7 @@ driCreateConfigs(mesa_format format, c++; memset(modes, 0, sizeof *modes); + modes->floatMode = is_float; modes->redBits = red_bits; modes->greenBits = green_bits; modes->blueBits = blue_bits; @@ -294,6 +333,10 @@ driCreateConfigs(mesa_format format, modes->greenMask = masks[1]; modes->blueMask = masks[2]; modes->alphaMask = masks[3]; + modes->redShift = shifts[0]; + modes->greenShift = shifts[1]; + modes->blueShift = shifts[2]; + modes->alphaShift = shifts[3]; modes->rgbBits = modes->redBits + modes->greenBits + modes->blueBits + modes->alphaBits; @@ -414,9 +457,13 @@ static const struct { unsigned int attrib, offset; } attribMap[] = { __ATTRIB(__DRI_ATTRIB_TRANSPARENT_BLUE_VALUE, transparentBlue), __ATTRIB(__DRI_ATTRIB_TRANSPARENT_ALPHA_VALUE, transparentAlpha), __ATTRIB(__DRI_ATTRIB_RED_MASK, redMask), + __ATTRIB(__DRI_ATTRIB_RED_SHIFT, redShift), __ATTRIB(__DRI_ATTRIB_GREEN_MASK, greenMask), + __ATTRIB(__DRI_ATTRIB_GREEN_SHIFT, greenShift), __ATTRIB(__DRI_ATTRIB_BLUE_MASK, blueMask), + __ATTRIB(__DRI_ATTRIB_BLUE_SHIFT, blueShift), __ATTRIB(__DRI_ATTRIB_ALPHA_MASK, alphaMask), + __ATTRIB(__DRI_ATTRIB_ALPHA_SHIFT, alphaShift), __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_WIDTH, maxPbufferWidth), __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_HEIGHT, maxPbufferHeight), __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_PIXELS, maxPbufferPixels), @@ -451,6 +498,8 @@ driGetConfigAttribIndex(const __DRIconfig *config, case __DRI_ATTRIB_RENDER_TYPE: /* no support for color index mode */ *value = __DRI_ATTRIB_RGBA_BIT; + if (config->modes.floatMode) + *value |= __DRI_ATTRIB_FLOAT_BIT; break; case __DRI_ATTRIB_CONFIG_CAVEAT: if (config->modes.visualRating == GLX_NON_CONFORMANT_CONFIG) diff --git a/src/mesa/drivers/dri/i915/intel_regions.c b/src/mesa/drivers/dri/i915/intel_regions.c index fee734801cd..803ea9326e8 100644 --- a/src/mesa/drivers/dri/i915/intel_regions.c +++ b/src/mesa/drivers/dri/i915/intel_regions.c @@ -57,7 +57,7 @@ */ #define DEBUG_BACKTRACE_SIZE 0 -#if DEBUG_BACKTRACE_SIZE == 0 +#if DEBUG_BACKTRACE_SIZE == 0 || !defined(HAVE_EXECINFO_H) /* Use the standard debug output */ #define _DBG(...) DBG(__VA_ARGS__) #else diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index 29b46147f39..ab1eb3d99a9 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -274,6 +274,8 @@ LOCAL_LDFLAGS += $(MESA_DRI_LDFLAGS) LOCAL_CFLAGS := \ $(MESA_DRI_CFLAGS) +LOCAL_CFLAGS += -Wno-error + LOCAL_C_INCLUDES := \ $(MESA_DRI_C_INCLUDES) \ $(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_glsl,,) \ diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 7b0ddfb64dd..46774c69bd2 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -402,6 +402,8 @@ vma_alloc(struct brw_bufmgr *bufmgr, /* Without softpin support, we let the kernel assign addresses. */ assert(brw_using_softpin(bufmgr)); + alignment = ALIGN(alignment, PAGE_SIZE); + struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size); uint64_t addr; @@ -1487,7 +1489,7 @@ brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd) brw_bo_make_external(bo); if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle, - DRM_CLOEXEC, prime_fd) != 0) + DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0) return -errno; bo->reusable = false; @@ -1717,6 +1719,9 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd) const uint64_t _4GB = 4ull << 30; + /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */ + const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE; + if (devinfo->gen >= 8 && gtt_size > _4GB) { bufmgr->initial_kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; @@ -1726,9 +1731,13 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd) bufmgr->initial_kflags |= EXEC_OBJECT_PINNED; util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_LOW_4G], - PAGE_SIZE, _4GB); + PAGE_SIZE, _4GB_minus_1); + + /* Leave the last 4GB out of the high vma range, so that no state + * base address + size can overflow 48 bits. + */ util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_OTHER], - 1 * _4GB, gtt_size - 1 * _4GB); + 1 * _4GB, gtt_size - 2 * _4GB); } else if (devinfo->gen >= 10) { /* Softpin landed in 4.5, but GVT used an aliasing PPGTT until * kernel commit 6b3816d69628becb7ff35978aa0751798b4a940a in diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c index 30e09861491..1508171da10 100644 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ b/src/mesa/drivers/dri/i965/brw_clear.c @@ -167,7 +167,7 @@ brw_fast_clear_depth(struct gl_context *ctx) */ float clear_value = mt->format == MESA_FORMAT_Z_FLOAT32 ? ctx->Depth.Clear : - (unsigned)(ctx->Depth.Clear * fb->_DepthMax) / (float)fb->_DepthMax; + _mesa_lroundeven(ctx->Depth.Clear * fb->_DepthMax) / (float)(fb->_DepthMax); const uint32_t num_layers = depth_att->Layered ? depth_irb->layer_count : 1; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 46791c7d2c8..263c17b4d58 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1183,6 +1183,7 @@ struct brw_context int n_active_oa_queries; int n_active_pipeline_stats_queries; + int n_active_null_renderers; /* The number of queries depending on running OA counters which * extends beyond brw_end_perf_query() since we need to wait until diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 17bca1991f1..b42c131d1b9 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1652,11 +1652,17 @@ enum brw_pixel_shader_coverage_mask_mode { #define GEN10_CACHE_MODE_SS 0x0e420 #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) -#define INSTPM 0x20c0 +#define INSTPM 0x20c0 /* Gen6-8 */ # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6) +# define INSTPM_GLOBAL_DEBUG_ENABLE (1 << 4) +# define INSTPM_MEDIA_INSTRUCTION_DISABLE (1 << 3) +# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE (1 << 2) +# define INSTPM_3D_STATE_INSTRUCTION_DISABLE (1 << 1) #define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */ # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4) +# define CSDBG2_MEDIA_INSTRUCTION_DISABLE (1 << 1) +# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE (1 << 0) #define GEN7_RPSTAT1 0xA01C #define GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT 7 @@ -1676,10 +1682,6 @@ enum brw_pixel_shader_coverage_mask_mode { # define GLK_SCEC_BARRIER_MODE_MASK REG_MASK(1 << 7) # define GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE (1 << 11) -#define COMMON_SLICE_CHICKEN3 0x7304 -# define PS_THREAD_PANIC_DISPATCH (3 << 6) -# define PS_THREAD_PANIC_DISPATCH_MASK REG_MASK(3 << 6) - #define HALF_SLICE_CHICKEN7 0xE194 # define TEXEL_OFFSET_FIX_ENABLE (1 << 1) # define TEXEL_OFFSET_FIX_MASK REG_MASK(1 << 1) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index d07349419cc..a3cfa765c0f 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -875,6 +875,16 @@ brw_finish_drawing(struct gl_context *ctx) brw_bo_unreference(brw->draw.draw_params_count_bo); brw->draw.draw_params_count_bo = NULL; } + + if (brw->draw.draw_params_bo) { + brw_bo_unreference(brw->draw.draw_params_bo); + brw->draw.draw_params_bo = NULL; + } + + if (brw->draw.derived_draw_params_bo) { + brw_bo_unreference(brw->draw.derived_draw_params_bo); + brw->draw.derived_draw_params_bo = NULL; + } } /** diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index b752294250e..e73cadc5d3e 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -493,6 +493,27 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) } } + if (devinfo->gen == 9 && pipeline == BRW_RENDER_PIPELINE) { + /* We seem to have issues with geometry flickering when 3D and compute + * are combined in the same batch and this appears to fix it. + */ + const uint32_t subslices = MAX2(brw->screen->subslice_total, 1); + const uint32_t maxNumberofThreads = + devinfo->max_cs_threads * subslices - 1; + + BEGIN_BATCH(9); + OUT_BATCH(MEDIA_VFE_STATE << 16 | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(2 << 8 | maxNumberofThreads << 16); + OUT_BATCH(0); + OUT_BATCH(2 << 16); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + if (devinfo->gen >= 6) { /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction] * PIPELINE_SELECT [DevBWR+]": diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index cd7961905bd..255a0746757 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -287,6 +287,12 @@ dump_perf_query_callback(GLuint id, void *query_void, void *brw_void) o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"), obj->pipeline_stats.bo ? "yes" : "no"); break; + case GEN_PERF_QUERY_TYPE_NULL: + DBG("%4d: %-6s %-8s NULL_RENDERER\n", + id, + o->Used ? "Dirty," : "New,", + o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,")); + break; default: unreachable("Unknown query type"); break; @@ -388,6 +394,10 @@ brw_get_perf_query_info(struct gl_context *ctx, *n_active = brw->perfquery.n_active_pipeline_stats_queries; break; + case GEN_PERF_QUERY_TYPE_NULL: + *n_active = brw->perfquery.n_active_null_renderers; + break; + default: unreachable("Unknown query type"); break; @@ -962,6 +972,7 @@ brw_begin_perf_query(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *obj = brw_perf_query(o); const struct gen_perf_query_info *query = obj->query; + const struct gen_device_info *devinfo = &brw->screen->devinfo; /* We can assume the frontend hides mistaken attempts to Begin a * query object multiple times before its End. Similarly if an @@ -1046,7 +1057,6 @@ brw_begin_perf_query(struct gl_context *ctx, /* If the OA counters aren't already on, enable them. */ if (brw->perfquery.oa_stream_fd == -1) { __DRIscreen *screen = brw->screen->driScrnPriv; - const struct gen_device_info *devinfo = &brw->screen->devinfo; /* The period_exponent gives a sampling period as follows: * sample_period = timestamp_period * 2^(period_exponent + 1) @@ -1191,6 +1201,21 @@ brw_begin_perf_query(struct gl_context *ctx, ++brw->perfquery.n_active_pipeline_stats_queries; break; + case GEN_PERF_QUERY_TYPE_NULL: + ++brw->perfquery.n_active_null_renderers; + if (devinfo->gen >= 9) { + brw_load_register_imm32(brw, CS_DEBUG_MODE2, + REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE) | + CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE); + } else { + brw_load_register_imm32(brw, INSTPM, + REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE | + INSTPM_MEDIA_INSTRUCTION_DISABLE) | + INSTPM_3D_RENDERER_INSTRUCTION_DISABLE | + INSTPM_MEDIA_INSTRUCTION_DISABLE); + } + break; + default: unreachable("Unknown query type"); break; @@ -1211,6 +1236,7 @@ brw_end_perf_query(struct gl_context *ctx, { struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *obj = brw_perf_query(o); + const struct gen_device_info *devinfo = &brw->screen->devinfo; DBG("End(%d)\n", o->Id); @@ -1253,6 +1279,19 @@ brw_end_perf_query(struct gl_context *ctx, --brw->perfquery.n_active_pipeline_stats_queries; break; + case GEN_PERF_QUERY_TYPE_NULL: + if (--brw->perfquery.n_active_null_renderers == 0) { + if (devinfo->gen >= 9) { + brw_load_register_imm32(brw, CS_DEBUG_MODE2, + REG_MASK(CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE)); + } else { + brw_load_register_imm32(brw, INSTPM, + REG_MASK(INSTPM_3D_RENDERER_INSTRUCTION_DISABLE | + INSTPM_MEDIA_INSTRUCTION_DISABLE)); + } + } + break; + default: unreachable("Unknown query type"); break; @@ -1278,6 +1317,9 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o) bo = obj->pipeline_stats.bo; break; + case GEN_PERF_QUERY_TYPE_NULL: + break; + default: unreachable("Unknown query type"); break; @@ -1328,6 +1370,8 @@ brw_is_perf_query_ready(struct gl_context *ctx, return (obj->pipeline_stats.bo && !brw_batch_references(&brw->batch, obj->pipeline_stats.bo) && !brw_bo_busy(obj->pipeline_stats.bo)); + case GEN_PERF_QUERY_TYPE_NULL: + return true; default: unreachable("Unknown query type"); @@ -1506,6 +1550,9 @@ brw_get_perf_query_data(struct gl_context *ctx, written = get_pipeline_stats_data(brw, obj, data_size, (uint8_t *)data); break; + case GEN_PERF_QUERY_TYPE_NULL: + break; + default: unreachable("Unknown query type"); break; @@ -1576,6 +1623,9 @@ brw_delete_perf_query(struct gl_context *ctx, } break; + case GEN_PERF_QUERY_TYPE_NULL: + break; + default: unreachable("Unknown query type"); break; @@ -1775,6 +1825,7 @@ brw_init_perf_query_info(struct gl_context *ctx) if (gen_perf_load_oa_metrics(brw->perfquery.perf, screen->fd, devinfo)) brw_perf_query_register_mdapi_oa_query(brw); + brw_perf_query_register_gpa_null_query(brw); brw->perfquery.unaccumulated = ralloc_array(brw, struct brw_perf_query_object *, 2); diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.h b/src/mesa/drivers/dri/i965/brw_performance_query.h index b0bf60cc4ff..4592dc3c7ff 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.h +++ b/src/mesa/drivers/dri/i965/brw_performance_query.h @@ -116,5 +116,6 @@ struct brw_perf_query_object void brw_perf_query_register_mdapi_oa_query(struct brw_context *brw); void brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw); +void brw_perf_query_register_gpa_null_query(struct brw_context *brw); #endif /* BRW_PERFORMANCE_QUERY_H */ diff --git a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c index 379515d328f..21ed33a38e9 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c @@ -258,3 +258,13 @@ brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw) query->data_size = sizeof(uint64_t) * query->n_counters; } + +void +brw_perf_query_register_gpa_null_query(struct brw_context *brw) +{ + struct gen_perf_query_info *query = + gen_perf_query_append_query_info(brw->perfquery.perf, 0); + + query->kind = GEN_PERF_QUERY_TYPE_NULL; + query->name = "Intel_Null_Hardware_Query"; +} diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 938b9defeda..c41d9551a1e 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -109,12 +109,6 @@ brw_upload_initial_gpu_state(struct brw_context *brw) brw_load_register_imm32(brw, GEN8_L3CNTLREG, GEN8_L3CNTLREG_EDBC_NO_HANG); - /* WA_2204188704: Pixel Shader Panic dispatch must be disabled. - */ - brw_load_register_imm32(brw, COMMON_SLICE_CHICKEN3, - PS_THREAD_PANIC_DISPATCH_MASK | - PS_THREAD_PANIC_DISPATCH); - /* WaEnableStateCacheRedirectToCS:icl */ brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1, GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE | diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 8175fbf0db4..f1defb3f148 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1685,6 +1685,11 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw) ISL_FORMAT_RAW, 3 * sizeof(GLuint), 1, RELOC_WRITE); + + /* The state buffer now holds a reference to our upload, drop ours. */ + if (bo != brw->compute.num_work_groups_bo) + brw_bo_unreference(bo); + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; } } diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index d84793f71f8..28a54f33e7c 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -99,6 +99,7 @@ DRI_CONF_BEGIN DRI_CONF_GLSL_ZERO_INIT("false") DRI_CONF_ALLOW_RGB10_CONFIGS("false") DRI_CONF_ALLOW_RGB565_CONFIGS("true") + DRI_CONF_ALLOW_FP16_CONFIGS("false") DRI_CONF_SECTION_END DRI_CONF_END }; @@ -188,6 +189,12 @@ static const struct __DRI2flushExtensionRec intelFlushExtension = { }; static const struct intel_image_format intel_image_formats[] = { + { __DRI_IMAGE_FOURCC_ABGR16161616F, __DRI_IMAGE_COMPONENTS_RGBA, 1, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR16161616F, 8 } } }, + + { __DRI_IMAGE_FOURCC_XBGR16161616F, __DRI_IMAGE_COMPONENTS_RGB, 1, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR16161616F, 8 } } }, + { __DRI_IMAGE_FOURCC_ARGB2101010, __DRI_IMAGE_COMPONENTS_RGBA, 1, { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB2101010, 4 } } }, @@ -1383,7 +1390,8 @@ intel_query_dma_buf_modifiers(__DRIscreen *_screen, int fourcc, int max, for (i = 0; i < num_mods && i < max; i++) { if (f->components == __DRI_IMAGE_COMPONENTS_Y_U_V || f->components == __DRI_IMAGE_COMPONENTS_Y_UV || - f->components == __DRI_IMAGE_COMPONENTS_Y_XUXV) { + f->components == __DRI_IMAGE_COMPONENTS_Y_XUXV || + f->components == __DRI_IMAGE_COMPONENTS_Y_UXVX) { external_only[i] = GL_TRUE; } else { @@ -1732,7 +1740,11 @@ intelCreateBuffer(__DRIscreen *dri_screen, fb->Visual.samples = num_samples; } - if (mesaVis->redBits == 10 && mesaVis->alphaBits > 0) { + if (mesaVis->redBits == 16 && mesaVis->alphaBits > 0 && mesaVis->floatMode) { + rgbFormat = MESA_FORMAT_RGBA_FLOAT16; + } else if (mesaVis->redBits == 16 && mesaVis->floatMode) { + rgbFormat = MESA_FORMAT_RGBX_FLOAT16; + } else if (mesaVis->redBits == 10 && mesaVis->alphaBits > 0) { rgbFormat = mesaVis->redMask == 0x3ff00000 ? MESA_FORMAT_B10G10R10A2_UNORM : MESA_FORMAT_R10G10B10A2_UNORM; } else if (mesaVis->redBits == 10) { @@ -2146,6 +2158,45 @@ intel_loader_get_cap(const __DRIscreen *dri_screen, enum dri_loader_cap cap) return 0; } +static bool +intel_allowed_format(__DRIscreen *dri_screen, mesa_format format) +{ + struct intel_screen *screen = dri_screen->driverPrivate; + + /* Expose only BGRA ordering if the loader doesn't support RGBA ordering. */ + bool allow_rgba_ordering = intel_loader_get_cap(dri_screen, DRI_LOADER_CAP_RGBA_ORDERING); + if (!allow_rgba_ordering && + (format == MESA_FORMAT_R8G8B8A8_UNORM || + format == MESA_FORMAT_R8G8B8X8_UNORM || + format == MESA_FORMAT_R8G8B8A8_SRGB)) + return false; + + /* Shall we expose 10 bpc formats? */ + bool allow_rgb10_configs = driQueryOptionb(&screen->optionCache, + "allow_rgb10_configs"); + if (!allow_rgb10_configs && + (format == MESA_FORMAT_B10G10R10A2_UNORM || + format == MESA_FORMAT_B10G10R10X2_UNORM)) + return false; + + /* Shall we expose 565 formats? */ + bool allow_rgb565_configs = driQueryOptionb(&screen->optionCache, + "allow_rgb565_configs"); + if (!allow_rgb565_configs && format == MESA_FORMAT_B5G6R5_UNORM) + return false; + + /* Shall we expose fp16 formats? */ + bool allow_fp16_configs = driQueryOptionb(&screen->optionCache, + "allow_fp16_configs"); + allow_fp16_configs &= intel_loader_get_cap(dri_screen, DRI_LOADER_CAP_FP16); + if (!allow_fp16_configs && + (format == MESA_FORMAT_RGBA_FLOAT16 || + format == MESA_FORMAT_RGBX_FLOAT16)) + return false; + + return true; +} + static __DRIconfig** intel_screen_make_configs(__DRIscreen *dri_screen) { @@ -2160,6 +2211,9 @@ intel_screen_make_configs(__DRIscreen *dri_screen) MESA_FORMAT_B10G10R10A2_UNORM, MESA_FORMAT_B10G10R10X2_UNORM, + MESA_FORMAT_RGBA_FLOAT16, + MESA_FORMAT_RGBX_FLOAT16, + /* The 32-bit RGBA format must not precede the 32-bit BGRA format. * Likewise for RGBX and BGRX. Otherwise, the GLX client and the GLX * server may disagree on which format the GLXFBConfig represents, @@ -2196,19 +2250,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen) uint8_t depth_bits[4], stencil_bits[4]; __DRIconfig **configs = NULL; - /* Expose only BGRA ordering if the loader doesn't support RGBA ordering. */ - unsigned num_formats; - if (intel_loader_get_cap(dri_screen, DRI_LOADER_CAP_RGBA_ORDERING)) - num_formats = ARRAY_SIZE(formats); - else - num_formats = ARRAY_SIZE(formats) - 3; /* all - RGBA_ORDERING formats */ - - /* Shall we expose 10 bpc formats? */ - bool allow_rgb10_configs = driQueryOptionb(&screen->optionCache, - "allow_rgb10_configs"); - /* Shall we expose 565 formats? */ - bool allow_rgb565_configs = driQueryOptionb(&screen->optionCache, - "allow_rgb565_configs"); + unsigned num_formats = ARRAY_SIZE(formats); /* Generate singlesample configs, each without accumulation buffer * and with EGL_MUTABLE_RENDER_BUFFER_BIT_KHR. @@ -2217,12 +2259,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen) __DRIconfig **new_configs; int num_depth_stencil_bits = 2; - if (!allow_rgb10_configs && - (formats[i] == MESA_FORMAT_B10G10R10A2_UNORM || - formats[i] == MESA_FORMAT_B10G10R10X2_UNORM)) - continue; - - if (!allow_rgb565_configs && formats[i] == MESA_FORMAT_B5G6R5_UNORM) + if (!intel_allowed_format(dri_screen, formats[i])) continue; /* Starting with DRI2 protocol version 1.1 we can request a depth/stencil @@ -2262,12 +2299,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen) for (unsigned i = 0; i < num_formats; i++) { __DRIconfig **new_configs; - if (!allow_rgb10_configs && - (formats[i] == MESA_FORMAT_B10G10R10A2_UNORM || - formats[i] == MESA_FORMAT_B10G10R10X2_UNORM)) - continue; - - if (!allow_rgb565_configs && formats[i] == MESA_FORMAT_B5G6R5_UNORM) + if (!intel_allowed_format(dri_screen, formats[i])) continue; if (formats[i] == MESA_FORMAT_B5G6R5_UNORM) { @@ -2303,12 +2335,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen) if (devinfo->gen < 6) break; - if (!allow_rgb10_configs && - (formats[i] == MESA_FORMAT_B10G10R10A2_UNORM || - formats[i] == MESA_FORMAT_B10G10R10X2_UNORM)) - continue; - - if (!allow_rgb565_configs && formats[i] == MESA_FORMAT_B5G6R5_UNORM) + if (!intel_allowed_format(dri_screen, formats[i])) continue; __DRIconfig **new_configs; diff --git a/src/mesa/drivers/osmesa/meson.build b/src/mesa/drivers/osmesa/meson.build index a406bb3c210..c479b740131 100644 --- a/src/mesa/drivers/osmesa/meson.build +++ b/src/mesa/drivers/osmesa/meson.build @@ -33,7 +33,8 @@ libosmesa = shared_library( include_directories : [ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, ], - link_with : [libmesa_classic, libglapi_static, osmesa_link_with], + link_whole : libglapi_static, + link_with : [libmesa_classic, osmesa_link_with], dependencies : [dep_thread, dep_selinux], version : '8.0.0', install : true, diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index a46599a2872..4e48b76fcb8 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -84,14 +84,8 @@ supported_buffer_bitmask(const struct gl_context *ctx, return mask; } - -/** - * Helper routine used by glDrawBuffer and glDrawBuffersARB. - * Given a GLenum naming one or more color buffers (such as - * GL_FRONT_AND_BACK), return the corresponding bitmask of BUFFER_BIT_* flags. - */ -static GLbitfield -draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer) +static GLenum +back_to_front_if_single_buffered(const struct gl_context *ctx, GLenum buffer) { /* If the front buffer is the only buffer, GL_BACK and all other flags * that include BACK select the front buffer for drawing. There are @@ -129,6 +123,19 @@ draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer) } } + return buffer; +} + +/** + * Helper routine used by glDrawBuffer and glDrawBuffersARB. + * Given a GLenum naming one or more color buffers (such as + * GL_FRONT_AND_BACK), return the corresponding bitmask of BUFFER_BIT_* flags. + */ +static GLbitfield +draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer) +{ + buffer = back_to_front_if_single_buffered(ctx, buffer); + switch (buffer) { case GL_NONE: return 0; @@ -192,20 +199,12 @@ draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer) static gl_buffer_index read_buffer_enum_to_index(const struct gl_context *ctx, GLenum buffer) { + buffer = back_to_front_if_single_buffered(ctx, buffer); + switch (buffer) { case GL_FRONT: return BUFFER_FRONT_LEFT; case GL_BACK: - if (_mesa_is_gles(ctx)) { - /* In draw_buffer_enum_to_bitmask, when GLES contexts draw to - * GL_BACK with a single-buffered configuration, we actually end - * up drawing to the sole front buffer in our internal - * representation. For consistency, we must read from that - * front left buffer too. - */ - if (!ctx->DrawBuffer->Visual.doubleBufferMode) - return BUFFER_FRONT_LEFT; - } return BUFFER_BACK_LEFT; case GL_RIGHT: return BUFFER_FRONT_RIGHT; diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 2c3d9a11ce3..dddcf3da0c5 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -361,6 +361,7 @@ static void one_time_fini(void) { _mesa_destroy_shader_compiler(); + _mesa_destroy_shader_compiler_types(); _mesa_locale_fini(); } @@ -393,6 +394,8 @@ one_time_init( struct gl_context *ctx ) _mesa_locale_init(); + _mesa_init_shader_compiler_types(); + _mesa_one_time_init_extension_overrides(ctx); _mesa_get_cpu_features(); @@ -616,6 +619,17 @@ _mesa_init_constants(struct gl_constants *consts, gl_api api) consts->MaxProgramMatrices = MAX_PROGRAM_MATRICES; consts->MaxProgramMatrixStackDepth = MAX_PROGRAM_MATRIX_STACK_DEPTH; + /* Set the absolute minimum possible GLSL version. API_OPENGL_CORE can + * mean an OpenGL 3.0 forward-compatible context, so that implies a minimum + * possible version of 1.30. Otherwise, the minimum possible version 1.20. + * Since Mesa unconditionally advertises GL_ARB_shading_language_100 and + * GL_ARB_shader_objects, every driver has GLSL 1.20... even if they don't + * advertise any extensions to enable any shader stages (e.g., + * GL_ARB_vertex_shader). + */ + consts->GLSLVersion = api == API_OPENGL_CORE ? 130 : 120; + consts->GLSLVersionCompat = consts->GLSLVersion; + /* Assume that if GLSL 1.30+ (or GLSL ES 3.00+) is supported that * gl_VertexID is implemented using a native hardware register with OpenGL * semantics. @@ -1544,9 +1558,12 @@ check_compatible(const struct gl_context *ctx, ctxvis->foo != bufvis->foo) \ return GL_FALSE - check_component(redMask); - check_component(greenMask); - check_component(blueMask); + check_component(redShift); + check_component(greenShift); + check_component(blueShift); + check_component(redBits); + check_component(greenBits); + check_component(blueBits); check_component(depthBits); check_component(stencilBits); diff --git a/src/mesa/main/format_utils.h b/src/mesa/main/format_utils.h index 78365cab533..3a528ceb3db 100644 --- a/src/mesa/main/format_utils.h +++ b/src/mesa/main/format_utils.h @@ -87,7 +87,7 @@ _mesa_float_to_unorm(float x, unsigned dst_bits) else if (x > 1.0f) return MAX_UINT(dst_bits); else - return _mesa_lroundevenf(x * MAX_UINT(dst_bits)); + return _mesa_i64roundevenf(x * MAX_UINT(dst_bits)); } static inline unsigned diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 0ca87561f05..6ba70d0809c 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -170,6 +170,7 @@ struct gl_config GLint redBits, greenBits, blueBits, alphaBits; /* bits per comp */ GLuint redMask, greenMask, blueMask, alphaMask; + GLint redShift, greenShift, blueShift, alphaShift; GLint rgbBits; /* total bits for rgb */ GLint indexBits; /* total bits for colorindex */ diff --git a/src/mesa/main/program_binary.c b/src/mesa/main/program_binary.c index 7390fef5887..39537cfccce 100644 --- a/src/mesa/main/program_binary.c +++ b/src/mesa/main/program_binary.c @@ -178,6 +178,8 @@ write_program_payload(struct gl_context *ctx, struct blob *blob, shader->Program); } + blob_write_uint32(blob, sh_prog->SeparateShader); + serialize_glsl_program(blob, ctx, sh_prog); for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) { @@ -195,6 +197,8 @@ static bool read_program_payload(struct gl_context *ctx, struct blob_reader *blob, GLenum binary_format, struct gl_shader_program *sh_prog) { + sh_prog->SeparateShader = blob_read_uint32(blob); + if (!deserialize_glsl_program(blob, ctx, sh_prog)) return false; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 6b73e6c7e7a..9564664daa3 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -162,6 +162,8 @@ _mesa_free_shader_state(struct gl_context *ctx) _mesa_reference_shader_program(ctx, &ctx->Shader.ReferencedPrograms[i], NULL); + free(ctx->SubroutineIndex[i].IndexPtr); + ctx->SubroutineIndex[i].IndexPtr = NULL; } _mesa_reference_shader_program(ctx, &ctx->Shader.ActiveProgram, NULL); diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c index 242a8c29909..219674be086 100644 --- a/src/mesa/main/shaderimage.c +++ b/src/mesa/main/shaderimage.c @@ -588,11 +588,11 @@ set_image_binding(struct gl_image_unit *u, struct gl_texture_object *texObj, if (texObj && _mesa_tex_target_is_layered(texObj->Target)) { u->Layered = layered; u->Layer = layer; - u->_Layer = (u->Layered ? 0 : u->Layer); } else { u->Layered = GL_FALSE; u->Layer = 0; } + u->_Layer = (u->Layered ? 0 : u->Layer); _mesa_reference_texobj(&u->TexObj, texObj); } diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk index c6470e6289e..13d0da85882 100644 --- a/src/mesa/program/Android.mk +++ b/src/mesa/program/Android.mk @@ -41,7 +41,7 @@ endef include $(MESA_TOP)/src/mesa/Makefile.sources include $(CLEAR_VARS) - +LOCAL_CFLAGS += -Wno-error LOCAL_MODULE := libmesa_program LOCAL_MODULE_CLASS := STATIC_LIBRARIES LOCAL_STATIC_LIBRARIES := libmesa_nir \ diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index f875c00238f..005b855230b 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2506,8 +2506,7 @@ _mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx, void _mesa_associate_uniform_storage(struct gl_context *ctx, struct gl_shader_program *shader_program, - struct gl_program *prog, - bool propagate_to_storage) + struct gl_program *prog) { struct gl_program_parameter_list *params = prog->Parameters; gl_shader_stage shader_type = prog->info.stage; @@ -2633,26 +2632,24 @@ _mesa_associate_uniform_storage(struct gl_context *ctx, * data from the linker's backing store. This will cause values from * initializers in the source code to be copied over. */ - if (propagate_to_storage) { - unsigned array_elements = MAX2(1, storage->array_elements); - if (ctx->Const.PackedDriverUniformStorage && !prog->is_arb_asm && - (storage->is_bindless || !storage->type->contains_opaque())) { - const int dmul = storage->type->is_64bit() ? 2 : 1; - const unsigned components = - storage->type->vector_elements * - storage->type->matrix_columns; - - for (unsigned s = 0; s < storage->num_driver_storage; s++) { - gl_constant_value *uni_storage = (gl_constant_value *) - storage->driver_storage[s].data; - memcpy(uni_storage, storage->storage, - sizeof(storage->storage[0]) * components * - array_elements * dmul); - } - } else { - _mesa_propagate_uniforms_to_driver_storage(storage, 0, - array_elements); + unsigned array_elements = MAX2(1, storage->array_elements); + if (ctx->Const.PackedDriverUniformStorage && !prog->is_arb_asm && + (storage->is_bindless || !storage->type->contains_opaque())) { + const int dmul = storage->type->is_64bit() ? 2 : 1; + const unsigned components = + storage->type->vector_elements * + storage->type->matrix_columns; + + for (unsigned s = 0; s < storage->num_driver_storage; s++) { + gl_constant_value *uni_storage = (gl_constant_value *) + storage->driver_storage[s].data; + memcpy(uni_storage, storage->storage, + sizeof(storage->storage[0]) * components * + array_elements * dmul); } + } else { + _mesa_propagate_uniforms_to_driver_storage(storage, 0, + array_elements); } last_location = location; @@ -3011,7 +3008,7 @@ get_mesa_program(struct gl_context *ctx, * prog->ParameterValues to get reallocated (e.g., anything that adds a * program constant) has to happen before creating this linkage. */ - _mesa_associate_uniform_storage(ctx, shader_program, prog, true); + _mesa_associate_uniform_storage(ctx, shader_program, prog); if (!shader_program->data->LinkStatus) { goto fail_exit; } diff --git a/src/mesa/program/ir_to_mesa.h b/src/mesa/program/ir_to_mesa.h index f5665e6316e..33eb801bae8 100644 --- a/src/mesa/program/ir_to_mesa.h +++ b/src/mesa/program/ir_to_mesa.h @@ -50,8 +50,7 @@ _mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx, void _mesa_associate_uniform_storage(struct gl_context *ctx, struct gl_shader_program *shader_program, - struct gl_program *prog, - bool propagate_to_storage); + struct gl_program *prog); #ifdef __cplusplus } diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 3a49bd4d6c1..593d15331fd 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -176,8 +176,10 @@ set_vertex_shader(struct st_context *st) if (use_nir) { st->clear.vs = make_nir_clear_vertex_shader(st, false); } else { - const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC }; + const enum tgsi_semantic semantic_names[] = { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC + }; const uint semantic_indexes[] = { 0, 0 }; st->clear.vs = util_make_vertex_passthrough_shader(st->pipe, 2, semantic_names, diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 6f6b42596e6..fe6be3ab048 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -324,10 +324,12 @@ st_make_passthrough_vertex_shader(struct st_context *st) MESA_SHADER_VERTEX, 3, inputs, outputs, NULL, 0); } else { - const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_COLOR, - st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD : - TGSI_SEMANTIC_GENERIC }; + const enum tgsi_semantic semantic_names[] = { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_COLOR, + st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD : + TGSI_SEMANTIC_GENERIC + }; const uint semantic_indexes[] = { 0, 0, 0 }; st->passthrough_vs = diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c index f08ffde6b01..bba1ce41376 100644 --- a/src/mesa/state_tracker/st_cb_drawtex.c +++ b/src/mesa/state_tracker/st_cb_drawtex.c @@ -92,7 +92,7 @@ semantic_to_varying_slot(unsigned semantic) static void * lookup_shader(struct st_context *st, uint num_attribs, - const uint *semantic_names, + const enum tgsi_semantic *semantic_names, const uint *semantic_indexes) { struct pipe_context *pipe = st->pipe; @@ -168,7 +168,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, struct pipe_resource *vbuffer = NULL; GLuint i, numTexCoords, numAttribs; GLboolean emitColor; - uint semantic_names[2 + MAX_TEXTURE_UNITS]; + enum tgsi_semantic semantic_names[2 + MAX_TEXTURE_UNITS]; uint semantic_indexes[2 + MAX_TEXTURE_UNITS]; struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS]; unsigned offset; diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 5fe6c79a93a..5e3425a73a6 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -415,9 +415,15 @@ st_new_renderbuffer_fb(enum pipe_format format, unsigned samples, boolean sw) case PIPE_FORMAT_R32G32B32A32_FLOAT: strb->Base.InternalFormat = GL_RGBA32F; break; + case PIPE_FORMAT_R32G32B32X32_FLOAT: + strb->Base.InternalFormat = GL_RGB32F; + break; case PIPE_FORMAT_R16G16B16A16_FLOAT: strb->Base.InternalFormat = GL_RGBA16F; break; + case PIPE_FORMAT_R16G16B16X16_FLOAT: + strb->Base.InternalFormat = GL_RGB16F; + break; default: _mesa_problem(NULL, "Unexpected format %s in st_new_renderbuffer_fb", diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 8f2acafbca3..3c48d176ca4 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -645,7 +645,7 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe, PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET)); /* GL limits and extensions */ - st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions, ctx->API); + st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions); st_init_extensions(pipe->screen, &ctx->Const, &ctx->Extensions, &st->options, ctx->API); @@ -913,6 +913,19 @@ destroy_tex_sampler_cb(GLuint id, void *data, void *userData) st_texture_release_context_sampler_view(st, st_texture_object(texObj)); } +static void +destroy_framebuffer_attachment_sampler_cb(GLuint id, void *data, void *userData) +{ + struct gl_framebuffer* glfb = (struct gl_framebuffer*) data; + struct st_context *st = (struct st_context *) userData; + + for (unsigned i = 0; i < BUFFER_COUNT; i++) { + struct gl_renderbuffer_attachment *att = &glfb->Attachment[i]; + if (att->Texture) { + st_texture_release_context_sampler_view(st, st_texture_object(att->Texture)); + } + } +} void st_destroy_context(struct st_context *st) @@ -971,6 +984,8 @@ st_destroy_context(struct st_context *st) st_framebuffer_reference(&stfb, NULL); } + _mesa_HashWalk(ctx->Shared->FrameBuffers, destroy_framebuffer_attachment_sampler_cb, st); + pipe_sampler_view_reference(&st->pixel_xfer.pixelmap_sampler_view, NULL); pipe_resource_reference(&st->pixel_xfer.pixelmap_texture, NULL); diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 560dd7b31a4..371e7d2ec8f 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -76,8 +76,7 @@ static int _clamp(int a, int min, int max) * Note that we have to limit/clamp against Mesa's internal limits too. */ void st_init_limits(struct pipe_screen *screen, - struct gl_constants *c, struct gl_extensions *extensions, - gl_api api) + struct gl_constants *c, struct gl_extensions *extensions) { int supported_irs; unsigned sh; @@ -449,14 +448,8 @@ void st_init_limits(struct pipe_screen *screen, c->GLSLFrontFacingIsSysVal = screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL); - /* GL_ARB_get_program_binary - * - * The QT framework has a bug in their shader program cache, which is built - * on GL_ARB_get_program_binary. In an effort to allow them to fix the bug - * we don't enable more than 1 binary format for compatibility profiles. - */ - if (api != API_OPENGL_COMPAT && - screen->get_disk_shader_cache && screen->get_disk_shader_cache(screen)) + /* GL_ARB_get_program_binary */ + if (screen->get_disk_shader_cache && screen->get_disk_shader_cache(screen)) c->NumProgramBinaryFormats = 1; c->MaxAtomicBufferBindings = diff --git a/src/mesa/state_tracker/st_extensions.h b/src/mesa/state_tracker/st_extensions.h index fdfac7ece70..7bf1aa8c8cb 100644 --- a/src/mesa/state_tracker/st_extensions.h +++ b/src/mesa/state_tracker/st_extensions.h @@ -35,8 +35,7 @@ struct pipe_screen; extern void st_init_limits(struct pipe_screen *screen, struct gl_constants *c, - struct gl_extensions *extensions, - gl_api api); + struct gl_extensions *extensions); extern void st_init_extensions(struct pipe_screen *screen, struct gl_constants *consts, diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 97b2831b880..12ea1ce55b8 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -500,7 +500,7 @@ st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog, * prog->ParameterValues to get reallocated (e.g., anything that adds a * program constant) has to happen before creating this linkage. */ - _mesa_associate_uniform_storage(st->ctx, shader_program, prog, true); + _mesa_associate_uniform_storage(st->ctx, shader_program, prog); st_set_prog_affected_state_flags(prog); diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index f2344703d71..18a5571aaa8 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7247,7 +7247,7 @@ get_mesa_program_tgsi(struct gl_context *ctx, * prog->ParameterValues to get reallocated (e.g., anything that adds a * program constant) has to happen before creating this linkage. */ - _mesa_associate_uniform_storage(ctx, shader_program, prog, true); + _mesa_associate_uniform_storage(ctx, shader_program, prog); if (!shader_program->data->LinkStatus) { free_glsl_to_tgsi_visitor(v); _mesa_reference_program(ctx, &shader->Program, NULL); diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index bee1f6b1366..ff0bec8f569 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -1105,10 +1105,17 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi, else { GET_CURRENT_CONTEXT(ctx); - ret = _mesa_make_current(NULL, NULL, NULL); - - if (ctx) + if (ctx) { + /* Before releasing the context, release its associated + * winsys buffers first. Then purge the context's winsys buffers list + * to free the resources of any winsys buffers that no longer have + * an existing drawable. + */ + ret = _mesa_make_current(ctx, NULL, NULL); st_framebuffers_purge(ctx->st); + } + + ret = _mesa_make_current(NULL, NULL, NULL); } return ret; @@ -1262,7 +1269,7 @@ get_version(struct pipe_screen *screen, _mesa_init_constants(&consts, api); _mesa_init_extensions(&extensions); - st_init_limits(screen, &consts, &extensions, api); + st_init_limits(screen, &consts, &extensions); st_init_extensions(screen, &consts, &extensions, options, api); return _mesa_get_version(&extensions, &consts, api); diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 9f6e492d6fb..769b02b24fc 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -428,7 +428,7 @@ static nir_shader * st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog, gl_shader_stage stage) { - enum pipe_shader_type p_stage = stage; /* valid for VS/FS */ + enum pipe_shader_type p_stage = pipe_shader_type_from_mesa(stage); const bool is_scalar = st->pipe->screen->get_shader_param(st->pipe->screen, p_stage, PIPE_SHADER_CAP_SCALAR_ISA); diff --git a/src/mesa/state_tracker/st_shader_cache.c b/src/mesa/state_tracker/st_shader_cache.c index b18829754cb..ae1602310db 100644 --- a/src/mesa/state_tracker/st_shader_cache.c +++ b/src/mesa/state_tracker/st_shader_cache.c @@ -366,7 +366,7 @@ st_deserialise_ir_program(struct gl_context *ctx, } st_set_prog_affected_state_flags(prog); - _mesa_associate_uniform_storage(ctx, shProg, prog, false); + _mesa_associate_uniform_storage(ctx, shProg, prog); /* Create Gallium shaders now instead of on demand. */ if (ST_DEBUG & DEBUG_PRECOMPILE || diff --git a/src/mesa/state_tracker/st_tgsi_lower_yuv.c b/src/mesa/state_tracker/st_tgsi_lower_yuv.c index 6acd173adc9..73437ddda70 100644 --- a/src/mesa/state_tracker/st_tgsi_lower_yuv.c +++ b/src/mesa/state_tracker/st_tgsi_lower_yuv.c @@ -269,31 +269,39 @@ yuv_to_rgb(struct tgsi_transform_context *tctx, tctx->emit_instruction(tctx, &inst); /* DP3 dst.x, tmpA, imm[0] */ - inst = dp3_instruction(); - reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X); - reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); - reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W)); - tctx->emit_instruction(tctx, &inst); + if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { + inst = dp3_instruction(); + reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X); + reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &inst); + } /* DP3 dst.y, tmpA, imm[1] */ - inst = dp3_instruction(); - reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y); - reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); - reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W)); - tctx->emit_instruction(tctx, &inst); + if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { + inst = dp3_instruction(); + reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y); + reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &inst); + } /* DP3 dst.z, tmpA, imm[2] */ - inst = dp3_instruction(); - reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z); - reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); - reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W)); - tctx->emit_instruction(tctx, &inst); + if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { + inst = dp3_instruction(); + reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z); + reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W)); + tctx->emit_instruction(tctx, &inst); + } /* MOV dst.w, imm[0].x */ - inst = mov_instruction(); - reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W); - reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W)); - tctx->emit_instruction(tctx, &inst); + if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { + inst = mov_instruction(); + reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W); + reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W)); + tctx->emit_instruction(tctx, &inst); + } } static void @@ -434,7 +442,7 @@ st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots, /* TODO better job of figuring out how many extra tokens we need.. * this is a pain about tgsi_transform :-/ */ - newlen = tgsi_num_tokens(tokens) + 120; + newlen = tgsi_num_tokens(tokens) + 300; newtoks = tgsi_alloc_tokens(newlen); if (!newtoks) return NULL; diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf index f62315498b2..6389b796d33 100644 --- a/src/util/00-mesa-defaults.conf +++ b/src/util/00-mesa-defaults.conf @@ -111,6 +111,11 @@ TODO: document the other workarounds.