From 3dacfe03ac3159ac8b4d168756cca4031410cb9a Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Thu, 27 Mar 2025 14:57:41 -0700 Subject: [PATCH 1/2] Sync kernel-patches/vmtest Sync workflows, configs and temporary patches with current kernel-patches/vmtest master. Signed-off-by: Ihor Solodrai --- .github/scripts/download-gcc-bpf.sh | 30 ++++++++++++++++++++++++ .github/scripts/tmpfsify-workspace.sh | 21 +++++++++++++++++ .github/workflows/gcc-bpf.yml | 30 +++++++++++++++++------- .github/workflows/kernel-build-test.yml | 6 +++-- .github/workflows/kernel-build.yml | 31 +++++++++++++++++++++++-- .github/workflows/kernel-test.yml | 3 ++- 6 files changed, 107 insertions(+), 14 deletions(-) create mode 100755 .github/scripts/download-gcc-bpf.sh create mode 100755 .github/scripts/tmpfsify-workspace.sh diff --git a/.github/scripts/download-gcc-bpf.sh b/.github/scripts/download-gcc-bpf.sh new file mode 100755 index 0000000..894584a --- /dev/null +++ b/.github/scripts/download-gcc-bpf.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -euo pipefail + +GCC_BPF_RELEASE_GH_REPO=$1 +INSTALL_DIR=$(realpath $2) + +cd /tmp + +tag=$(gh release list -L 1 -R ${GCC_BPF_RELEASE_GH_REPO} --json tagName -q .[].tagName) +if [[ -z "$tag" ]]; then + echo "Could not find latest GCC BPF release at ${GCC_BPF_RELEASE_GH_REPO}" + exit 1 +fi + +url="https://github.com/${GCC_BPF_RELEASE_GH_REPO}/releases/download/${tag}/${tag}.tar.zst" +echo "Downloading $url" +wget -q "$url" + +tarball=${tag}.tar.zst +dir=$(tar tf $tarball | head -1 || true) + +echo "Extracting $tarball ..." +tar -I zstd -xf $tarball && rm -f $tarball + +rm -rf $INSTALL_DIR +mv -v $dir $INSTALL_DIR + +cd - + diff --git a/.github/scripts/tmpfsify-workspace.sh b/.github/scripts/tmpfsify-workspace.sh new file mode 100755 index 0000000..6fd62b4 --- /dev/null +++ b/.github/scripts/tmpfsify-workspace.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -x -euo pipefail + +TMPFS_SIZE=20 # GB +MEM_TOTAL=$(awk '/MemTotal/ {print int($2/1024)}' /proc/meminfo) + +# sanity check: total mem is at least double TMPFS_SIZE +if [ $MEM_TOTAL -lt $(($TMPFS_SIZE*1024*2)) ]; then + echo "tmpfsify-workspace.sh: will not allocate tmpfs, total memory is too low (${MEM_TOTAL}MB)" + exit 0 +fi + +dir="$(basename "$GITHUB_WORKSPACE")" +cd "$(dirname "$GITHUB_WORKSPACE")" +mv "${dir}" "${dir}.backup" +mkdir "${dir}" +sudo mount -t tmpfs -o size=${TMPFS_SIZE}G tmpfs "${dir}" +rsync -a "${dir}.backup/" "${dir}" +cd - + diff --git a/.github/workflows/gcc-bpf.yml b/.github/workflows/gcc-bpf.yml index 84c6b8f..ae2de18 100644 --- a/.github/workflows/gcc-bpf.yml +++ b/.github/workflows/gcc-bpf.yml @@ -25,14 +25,19 @@ on: jobs: test: name: GCC BPF - runs-on: ${{ fromJSON(inputs.runs_on) }} - timeout-minutes: 100 + runs-on: >- + ${{ + contains(fromJSON(inputs.runs_on), 'codebuild') + && format('codebuild-bpf-ci-{0}-{1}', github.run_id, github.run_attempt) + || fromJSON(inputs.runs_on) + }} env: ARCH: ${{ inputs.arch }} - BPF_GCC_INSTALL_DIR: ${{ github.workspace }}/gcc-bpf BPF_NEXT_BASE_BRANCH: 'master' - REPO_ROOT: ${{ github.workspace }}/src + GCC_BPF_INSTALL_DIR: ${{ github.workspace }}/gcc-bpf + GCC_BPF_RELEASE_REPO: 'theihor/gcc-bpf' KBUILD_OUTPUT: ${{ github.workspace }}/src/kbuild-output + REPO_ROOT: ${{ github.workspace }}/src steps: @@ -45,6 +50,12 @@ jobs: dest: ${{ env.REPO_ROOT }} rev: ${{ env.BPF_NEXT_BASE_BRANCH }} + - if: ${{ ! inputs.download_sources }} + name: Checkout ${{ github.repository }} to ./src + uses: actions/checkout@v4 + with: + path: 'src' + - uses: ./patch-kernel with: patches-root: '${{ github.workspace }}/ci/diffs' @@ -65,16 +76,17 @@ jobs: arch: ${{ inputs.arch }} llvm-version: ${{ inputs.llvm-version }} - - name: Build GCC BPF compiler - uses: ./build-bpf-gcc - with: - install-dir: ${{ env.BPF_GCC_INSTALL_DIR }} + - name: Download GCC BPF compiler + shell: bash + env: + GH_TOKEN: ${{ github.token }} + run: .github/scripts/download-gcc-bpf.sh ${{ env.GCC_BPF_RELEASE_REPO }} ${{ env.GCC_BPF_INSTALL_DIR }} - name: Build selftests/bpf/test_progs-bpf_gcc uses: ./build-selftests env: + BPF_GCC: ${{ env.GCC_BPF_INSTALL_DIR }} MAX_MAKE_JOBS: 32 - BPF_GCC: ${{ env.BPF_GCC_INSTALL_DIR }} SELFTESTS_BPF_TARGETS: 'test_progs-bpf_gcc' with: arch: ${{ inputs.arch }} diff --git a/.github/workflows/kernel-build-test.yml b/.github/workflows/kernel-build-test.yml index fd4e294..6e4aba7 100644 --- a/.github/workflows/kernel-build-test.yml +++ b/.github/workflows/kernel-build-test.yml @@ -51,6 +51,7 @@ on: default: false jobs: + # Build kernel and selftest build: uses: ./.github/workflows/kernel-build.yml @@ -70,7 +71,7 @@ jobs: arch: ${{ inputs.arch }} toolchain_full: ${{ inputs.toolchain_full }} toolchain: ${{ inputs.toolchain }} - runs_on: ${{ inputs.runs_on }} + runs_on: ${{ inputs.build_runs_on }} llvm-version: ${{ inputs.llvm-version }} kernel: ${{ inputs.kernel }} download_sources: ${{ inputs.download_sources }} @@ -101,7 +102,8 @@ jobs: uses: ./.github/workflows/gcc-bpf.yml needs: [build] with: - runs_on: ${{ inputs.runs_on }} + # GCC BPF does not need /dev/kvm, so use the "build" runners + runs_on: ${{ inputs.build_runs_on }} arch: ${{ inputs.arch }} llvm-version: ${{ inputs.llvm-version }} toolchain: ${{ inputs.toolchain }} diff --git a/.github/workflows/kernel-build.yml b/.github/workflows/kernel-build.yml index 47c7cb7..88ccaf8 100644 --- a/.github/workflows/kernel-build.yml +++ b/.github/workflows/kernel-build.yml @@ -42,22 +42,36 @@ on: jobs: build: name: build for ${{ inputs.arch }} with ${{ inputs.toolchain_full }}${{ inputs.release && '-O2' || '' }} - runs-on: ${{ fromJSON(inputs.runs_on) }} - timeout-minutes: 100 + # To run on CodeBuild, runs-on value must correspond to the AWS + # CodeBuild project associated with the kernel-patches webhook + # However matrix.py passes just a 'codebuild' string + runs-on: >- + ${{ + contains(fromJSON(inputs.runs_on), 'codebuild') + && format('codebuild-bpf-ci-{0}-{1}', github.run_id, github.run_attempt) + || fromJSON(inputs.runs_on) + }} env: ARTIFACTS_ARCHIVE: "vmlinux-${{ inputs.arch }}-${{ inputs.toolchain_full }}.tar.zst" BPF_NEXT_BASE_BRANCH: 'master' BPF_NEXT_FETCH_DEPTH: 64 # A bit of history is needed to facilitate incremental builds + CROSS_COMPILE: ${{ inputs.arch != 'x86_64' && 'true' || '' }} # BUILD_SCHED_EXT_SELFTESTS: ${{ inputs.arch == 'x86_64' || inputs.arch == 'aarch64' && 'true' || '' }} KBUILD_OUTPUT: ${{ github.workspace }}/kbuild-output KERNEL: ${{ inputs.kernel }} KERNEL_ROOT: ${{ github.workspace }} REPO_PATH: "" REPO_ROOT: ${{ github.workspace }} + RUNNER_TYPE: ${{ contains(fromJSON(inputs.runs_on), 'codebuild') && 'codebuild' || 'default' }} steps: - uses: actions/checkout@v4 with: fetch-depth: ${{ inputs.download_sources && 1 || env.BPF_NEXT_FETCH_DEPTH }} + + - if: ${{ env.RUNNER_TYPE == 'codebuild' }} + shell: bash + run: .github/scripts/tmpfsify-workspace.sh + - if: ${{ inputs.download_sources }} name: Download bpf-next tree env: @@ -66,6 +80,7 @@ jobs: with: dest: '.kernel' rev: ${{ env.BPF_NEXT_BASE_BRANCH }} + - uses: ./prepare-incremental-build with: repo-root: ${{ inputs.download_sources && '.kernel' || env.REPO_ROOT }} @@ -99,6 +114,18 @@ jobs: llvm-version: ${{ inputs.llvm-version }} pahole: master + # We have to setup qemu+binfmt in order to enable cross-compation of selftests. + # During selftests build, freshly built bpftool is executed. + # On self-hosted bare-metal hosts binfmt is pre-configured. + - if: ${{ env.RUNNER_TYPE == 'codebuild' && env.CROSS_COMPILE }} + name: Set up docker + uses: docker/setup-docker-action@v4 + - if: ${{ env.RUNNER_TYPE == 'codebuild' && env.CROSS_COMPILE }} + name: Setup binfmt and qemu + uses: docker/setup-qemu-action@v3 + with: + image: tonistiigi/binfmt:qemu-v9.2.0 + - name: Build kernel image uses: ./build-linux with: diff --git a/.github/workflows/kernel-test.yml b/.github/workflows/kernel-test.yml index 63b4294..4c29851 100644 --- a/.github/workflows/kernel-test.yml +++ b/.github/workflows/kernel-test.yml @@ -73,7 +73,7 @@ jobs: SELFTESTS_BPF: ${{ github.workspace }}/selftests/bpf VMTEST_CONFIGS: ${{ github.workspace }}/ci/vmtest/configs TEST_PROGS_TRAFFIC_MONITOR: ${{ inputs.arch == 'x86_64' && 'true' || '' }} - TEST_PROGS_WATCHDOG_TIMEOUT: 300 + TEST_PROGS_WATCHDOG_TIMEOUT: 600 with: arch: ${{ inputs.arch }} vmlinuz: '${{ github.workspace }}/vmlinuz' @@ -83,6 +83,7 @@ jobs: # Here we must use kbuild-output local to the repo, because # it was extracted from the artifacts. kbuild-output: ${{ env.REPO_ROOT }}/kbuild-output + - if: ${{ always() }} uses: actions/upload-artifact@v4 with: From 7fafba228b0eb0801a272057b20809f7afb14448 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Thu, 27 Mar 2025 14:59:52 -0700 Subject: [PATCH 2/2] ci/diffs: remove temporary patches Some of these patches are necessary at kernel-patches/vmtest, due to BPF CI testing various bpf branches. Here in libbpf/ci only bpf-next is tested, and the testing workflow passes without any temp patches. Signed-off-by: Ihor Solodrai --- ...-unnecessary-audit-log-for-CPU-secur.patch | 33 - ...e-SPECULATION_MITIGATIONS-to-arch-Kc.patch | 69 - ...lftest-failures-due-to-llvm18-change.patch | 94 - ...er-bug-due-to-incorrect-branch-offse.patch | 67 - ...-bpf-Fix-a-btf_dump-selftest-failure.patch | 40 - ...sockopt_lock_sock-in-ip_sock_set_tos.patch | 99 - ...lter-out-_GNU_SOURCE-when-compiling-.patch | 51 - ...x-bpf_cookie-and-find_vma-in-nested-.patch | 50 - ...x-pyperf180-compilation-failure-with.patch | 78 - ...sable-detection-of-llvm-when-buildin.patch | 41 - ...x-inet_csk_accept-prototype-in-test_.patch | 32 - ...f-work-around-latest-Clang-smartness.patch | 31 - ...kxceiver-ksft_print_msg-fix-format-t.patch | 89 - ...s-compilation-to-non-host-endianness.patch | 142 - ...Fix-symbol-counting-logic-by-looking.patch | 65 - ...s-compilation-to-non-host-endianness.patch | 117 - ci/diffs/0099-s390x_nolockdep.diff | 48 - ci/diffs/0099-selftest-cross-compile.diff | 13 - ...ancing-slot-in-iter_folioq_get_pages.patch | 46 - ...lftests-bpf-Fix-uprobe-consumer-test.patch | 58 - ...ext-fix-build-after-renames-in-sched.patch | 231 -- ...-samples-bpf-fix-samples-compilation.patch | 61 - ...to-remove-ftrace_test_recursion_tryl.patch | 46 - ...g-Define-ftrace_get_symaddr-for-s390.patch | 28 - ...dd-fno-strict-aliasing-to-BPF_CFLAGS.patch | 75 - ...d-std-gnu11-to-BPF_CFLAGS-and-CFLAGS.patch | 63 - ...nge-the-read-result-collector-to-onl.patch | 2542 ----------------- ...nvalid-irq-restore-in-scx_ops_bypass.patch | 56 - ...99-scx-Fix-maximal-BPF-selftest-prog.patch | 56 - 29 files changed, 4421 deletions(-) delete mode 100644 ci/diffs/0001-Revert-bpf-Avoid-unnecessary-audit-log-for-CPU-secur.patch delete mode 100644 ci/diffs/0001-arch-Kconfig-Move-SPECULATION_MITIGATIONS-to-arch-Kc.patch delete mode 100644 ci/diffs/0001-bpf-Fix-a-few-selftest-failures-due-to-llvm18-change.patch delete mode 100644 ci/diffs/0001-bpf-Fix-a-verifier-bug-due-to-incorrect-branch-offse.patch delete mode 100644 ci/diffs/0001-bpf-next-selftests-bpf-Fix-a-btf_dump-selftest-failure.patch delete mode 100644 ci/diffs/0001-net-bpf-Use-sockopt_lock_sock-in-ip_sock_set_tos.patch delete mode 100644 ci/diffs/0001-selftests-bpf-Filter-out-_GNU_SOURCE-when-compiling-.patch delete mode 100644 ci/diffs/0001-selftests-bpf-Fix-bpf_cookie-and-find_vma-in-nested-.patch delete mode 100644 ci/diffs/0001-selftests-bpf-Fix-pyperf180-compilation-failure-with.patch delete mode 100644 ci/diffs/0001-selftests-bpf-disable-detection-of-llvm-when-buildin.patch delete mode 100644 ci/diffs/0001-selftests-bpf-fix-inet_csk_accept-prototype-in-test_.patch delete mode 100644 ci/diffs/0001-selftests-bpf-work-around-latest-Clang-smartness.patch delete mode 100644 ci/diffs/0001-selftests-bpf-xskxceiver-ksft_print_msg-fix-format-t.patch delete mode 100644 ci/diffs/0001-tools-resolve_btfids-fix-cross-compilation-to-non-host-endianness.patch delete mode 100644 ci/diffs/0001-tracing-kprobes-Fix-symbol-counting-logic-by-looking.patch delete mode 100644 ci/diffs/0002-tools-resolve_btfids-fix-cross-compilation-to-non-host-endianness.patch delete mode 100644 ci/diffs/0099-s390x_nolockdep.diff delete mode 100644 ci/diffs/0099-selftest-cross-compile.diff delete mode 100644 ci/diffs/0199-iov_iter-fix-advancing-slot-in-iter_folioq_get_pages.patch delete mode 100644 ci/diffs/0299-selftests-bpf-Fix-uprobe-consumer-test.patch delete mode 100644 ci/diffs/0399-selftests-sched_ext-fix-build-after-renames-in-sched.patch delete mode 100644 ci/diffs/0499-samples-bpf-fix-samples-compilation.patch delete mode 100644 ci/diffs/2000-s390-fgraph-Fix-to-remove-ftrace_test_recursion_tryl.patch delete mode 100644 ci/diffs/2001-s390-tracing-Define-ftrace_get_symaddr-for-s390.patch delete mode 100644 ci/diffs/2001-selftests-bpf-add-fno-strict-aliasing-to-BPF_CFLAGS.patch delete mode 100644 ci/diffs/2002-selftests-bpf-add-std-gnu11-to-BPF_CFLAGS-and-CFLAGS.patch delete mode 100644 ci/diffs/8888-Revert-netfs-Change-the-read-result-collector-to-onl.patch delete mode 100644 ci/diffs/9998-sched_ext-Fix-invalid-irq-restore-in-scx_ops_bypass.patch delete mode 100644 ci/diffs/9999-scx-Fix-maximal-BPF-selftest-prog.patch diff --git a/ci/diffs/0001-Revert-bpf-Avoid-unnecessary-audit-log-for-CPU-secur.patch b/ci/diffs/0001-Revert-bpf-Avoid-unnecessary-audit-log-for-CPU-secur.patch deleted file mode 100644 index 3b61392..0000000 --- a/ci/diffs/0001-Revert-bpf-Avoid-unnecessary-audit-log-for-CPU-secur.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 5440a12ac8fb2a8e051c597fcf5d85b427fe612a Mon Sep 17 00:00:00 2001 -From: Andrii Nakryiko -Date: Fri, 13 Oct 2023 12:44:34 -0700 -Subject: [PATCH] Revert "bpf: Avoid unnecessary audit log for CPU security - mitigations" - -This reverts commit 236334aeec0f93217cf9235f2004e61a0a1a5985. ---- - include/linux/bpf.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/include/linux/bpf.h b/include/linux/bpf.h -index f0891ba24cb1..61bde4520f5c 100644 ---- a/include/linux/bpf.h -+++ b/include/linux/bpf.h -@@ -2164,12 +2164,12 @@ static inline bool bpf_allow_uninit_stack(void) - - static inline bool bpf_bypass_spec_v1(void) - { -- return cpu_mitigations_off() || perfmon_capable(); -+ return perfmon_capable() || cpu_mitigations_off(); - } - - static inline bool bpf_bypass_spec_v4(void) - { -- return cpu_mitigations_off() || perfmon_capable(); -+ return perfmon_capable() || cpu_mitigations_off(); - } - - int bpf_map_new_fd(struct bpf_map *map, int flags); --- -2.34.1 - diff --git a/ci/diffs/0001-arch-Kconfig-Move-SPECULATION_MITIGATIONS-to-arch-Kc.patch b/ci/diffs/0001-arch-Kconfig-Move-SPECULATION_MITIGATIONS-to-arch-Kc.patch deleted file mode 100644 index 63bdd28..0000000 --- a/ci/diffs/0001-arch-Kconfig-Move-SPECULATION_MITIGATIONS-to-arch-Kc.patch +++ /dev/null @@ -1,69 +0,0 @@ -From c71766e8ff7a7f950522d25896fba758585500df Mon Sep 17 00:00:00 2001 -From: Song Liu -Date: Mon, 22 Apr 2024 21:14:40 -0700 -Subject: [PATCH] arch/Kconfig: Move SPECULATION_MITIGATIONS to arch/Kconfig - -SPECULATION_MITIGATIONS is currently defined only for x86. As a result, -IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) is always false for other -archs. f337a6a21e2f effectively set "mitigations=off" by default on -non-x86 archs, which is not desired behavior. Jakub observed this -change when running bpf selftests on s390 and arm64. - -Fix this by moving SPECULATION_MITIGATIONS to arch/Kconfig so that it is -available in all archs and thus can be used safely in kernel/cpu.c - -Fixes: f337a6a21e2f ("x86/cpu: Actually turn off mitigations by default for SPECULATION_MITIGATIONS=n") -Cc: stable@vger.kernel.org -Cc: Sean Christopherson -Cc: Ingo Molnar -Cc: Daniel Sneddon -Cc: Jakub Kicinski -Signed-off-by: Song Liu ---- - arch/Kconfig | 10 ++++++++++ - arch/x86/Kconfig | 10 ---------- - 2 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/arch/Kconfig b/arch/Kconfig -index 9f066785bb71..8f4af75005f8 100644 ---- a/arch/Kconfig -+++ b/arch/Kconfig -@@ -1609,4 +1609,14 @@ config CC_HAS_SANE_FUNCTION_ALIGNMENT - # strict alignment always, even with -falign-functions. - def_bool CC_HAS_MIN_FUNCTION_ALIGNMENT || CC_IS_CLANG - -+menuconfig SPECULATION_MITIGATIONS -+ bool "Mitigations for speculative execution vulnerabilities" -+ default y -+ help -+ Say Y here to enable options which enable mitigations for -+ speculative execution hardware vulnerabilities. -+ -+ If you say N, all mitigations will be disabled. You really -+ should know what you are doing to say so. -+ - endmenu -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index 39886bab943a..50c890fce5e0 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -2486,16 +2486,6 @@ config PREFIX_SYMBOLS - def_bool y - depends on CALL_PADDING && !CFI_CLANG - --menuconfig SPECULATION_MITIGATIONS -- bool "Mitigations for speculative execution vulnerabilities" -- default y -- help -- Say Y here to enable options which enable mitigations for -- speculative execution hardware vulnerabilities. -- -- If you say N, all mitigations will be disabled. You really -- should know what you are doing to say so. -- - if SPECULATION_MITIGATIONS - - config MITIGATION_PAGE_TABLE_ISOLATION --- -2.43.0 - diff --git a/ci/diffs/0001-bpf-Fix-a-few-selftest-failures-due-to-llvm18-change.patch b/ci/diffs/0001-bpf-Fix-a-few-selftest-failures-due-to-llvm18-change.patch deleted file mode 100644 index a13d767..0000000 --- a/ci/diffs/0001-bpf-Fix-a-few-selftest-failures-due-to-llvm18-change.patch +++ /dev/null @@ -1,94 +0,0 @@ -From fb9a697860acd8f54f2ba6647923794378eb33da Mon Sep 17 00:00:00 2001 -From: Yonghong Song -Date: Sun, 26 Nov 2023 21:03:42 -0800 -Subject: [PATCH] bpf: Fix a few selftest failures due to llvm18 change - -With latest upstream llvm18, the following test cases failed: - - $ ./test_progs -j - #13/2 bpf_cookie/multi_kprobe_link_api:FAIL - #13/3 bpf_cookie/multi_kprobe_attach_api:FAIL - #13 bpf_cookie:FAIL - #77 fentry_fexit:FAIL - #78/1 fentry_test/fentry:FAIL - #78 fentry_test:FAIL - #82/1 fexit_test/fexit:FAIL - #82 fexit_test:FAIL - #112/1 kprobe_multi_test/skel_api:FAIL - #112/2 kprobe_multi_test/link_api_addrs:FAIL - [...] - #112 kprobe_multi_test:FAIL - #356/17 test_global_funcs/global_func17:FAIL - #356 test_global_funcs:FAIL - -Further analysis shows llvm upstream patch [1] is responsible for the above -failures. For example, for function bpf_fentry_test7() in net/bpf/test_run.c, -without [1], the asm code is: - - 0000000000000400 : - 400: f3 0f 1e fa endbr64 - 404: e8 00 00 00 00 callq 0x409 - 409: 48 89 f8 movq %rdi, %rax - 40c: c3 retq - 40d: 0f 1f 00 nopl (%rax) - -... and with [1], the asm code is: - - 0000000000005d20 : - 5d20: e8 00 00 00 00 callq 0x5d25 - 5d25: c3 retq - -... and is called instead of -and this caused test failures for #13/#77 etc. except #356. - -For test case #356/17, with [1] (progs/test_global_func17.c)), the main prog -looks like: - - 0000000000000000 : - 0: b4 00 00 00 2a 00 00 00 w0 = 0x2a - 1: 95 00 00 00 00 00 00 00 exit - -... which passed verification while the test itself expects a verification -failure. - -Let us add 'barrier_var' style asm code in both places to prevent function -specialization which caused selftests failure. - - [1] https://github.com/llvm/llvm-project/pull/72903 - -Signed-off-by: Yonghong Song -Signed-off-by: Daniel Borkmann -Link: https://lore.kernel.org/bpf/20231127050342.1945270-1-yonghong.song@linux.dev ---- - net/bpf/test_run.c | 2 +- - tools/testing/selftests/bpf/progs/test_global_func17.c | 1 + - 2 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c -index c9fdcc5cdce1..711cf5d59816 100644 ---- a/net/bpf/test_run.c -+++ b/net/bpf/test_run.c -@@ -542,7 +542,7 @@ struct bpf_fentry_test_t { - - int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg) - { -- asm volatile (""); -+ asm volatile ("": "+r"(arg)); - return (long)arg; - } - -diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c -index a32e11c7d933..5de44b09e8ec 100644 ---- a/tools/testing/selftests/bpf/progs/test_global_func17.c -+++ b/tools/testing/selftests/bpf/progs/test_global_func17.c -@@ -5,6 +5,7 @@ - - __noinline int foo(int *p) - { -+ barrier_var(p); - return p ? (*p = 42) : 0; - } - --- -2.34.1 - diff --git a/ci/diffs/0001-bpf-Fix-a-verifier-bug-due-to-incorrect-branch-offse.patch b/ci/diffs/0001-bpf-Fix-a-verifier-bug-due-to-incorrect-branch-offse.patch deleted file mode 100644 index 5832a42..0000000 --- a/ci/diffs/0001-bpf-Fix-a-verifier-bug-due-to-incorrect-branch-offse.patch +++ /dev/null @@ -1,67 +0,0 @@ -From dfce9cb3140592b886838e06f3e0c25fea2a9cae Mon Sep 17 00:00:00 2001 -From: Yonghong Song -Date: Thu, 30 Nov 2023 18:46:40 -0800 -Subject: [PATCH 1/1] bpf: Fix a verifier bug due to incorrect branch offset - comparison with cpu=v4 - -Bpf cpu=v4 support is introduced in [1] and Commit 4cd58e9af8b9 -("bpf: Support new 32bit offset jmp instruction") added support for new -32bit offset jmp instruction. Unfortunately, in function -bpf_adj_delta_to_off(), for new branch insn with 32bit offset, the offset -(plus/minor a small delta) compares to 16-bit offset bound -[S16_MIN, S16_MAX], which caused the following verification failure: - $ ./test_progs-cpuv4 -t verif_scale_pyperf180 - ... - insn 10 cannot be patched due to 16-bit range - ... - libbpf: failed to load object 'pyperf180.bpf.o' - scale_test:FAIL:expect_success unexpected error: -12 (errno 12) - #405 verif_scale_pyperf180:FAIL - -Note that due to recent llvm18 development, the patch [2] (already applied -in bpf-next) needs to be applied to bpf tree for testing purpose. - -The fix is rather simple. For 32bit offset branch insn, the adjusted -offset compares to [S32_MIN, S32_MAX] and then verification succeeded. - - [1] https://lore.kernel.org/all/20230728011143.3710005-1-yonghong.song@linux.dev - [2] https://lore.kernel.org/bpf/20231110193644.3130906-1-yonghong.song@linux.dev - -Fixes: 4cd58e9af8b9 ("bpf: Support new 32bit offset jmp instruction") -Signed-off-by: Yonghong Song -Signed-off-by: Andrii Nakryiko -Link: https://lore.kernel.org/bpf/20231201024640.3417057-1-yonghong.song@linux.dev ---- - kernel/bpf/core.c | 12 ++++++++---- - 1 file changed, 8 insertions(+), 4 deletions(-) - -diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c -index cd3afe57ece3..fe254ae035fe 100644 ---- a/kernel/bpf/core.c -+++ b/kernel/bpf/core.c -@@ -371,14 +371,18 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, - static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, - s32 end_new, s32 curr, const bool probe_pass) - { -- const s32 off_min = S16_MIN, off_max = S16_MAX; -+ s64 off_min, off_max, off; - s32 delta = end_new - end_old; -- s32 off; - -- if (insn->code == (BPF_JMP32 | BPF_JA)) -+ if (insn->code == (BPF_JMP32 | BPF_JA)) { - off = insn->imm; -- else -+ off_min = S32_MIN; -+ off_max = S32_MAX; -+ } else { - off = insn->off; -+ off_min = S16_MIN; -+ off_max = S16_MAX; -+ } - - if (curr < pos && curr + off + 1 >= end_old) - off += delta; --- -2.34.1 - diff --git a/ci/diffs/0001-bpf-next-selftests-bpf-Fix-a-btf_dump-selftest-failure.patch b/ci/diffs/0001-bpf-next-selftests-bpf-Fix-a-btf_dump-selftest-failure.patch deleted file mode 100644 index ea6b238..0000000 --- a/ci/diffs/0001-bpf-next-selftests-bpf-Fix-a-btf_dump-selftest-failure.patch +++ /dev/null @@ -1,40 +0,0 @@ -From patchwork Fri Aug 2 18:54:34 2024 -From: Yonghong Song -Subject: [PATCH bpf-next] selftests/bpf: Fix a btf_dump selftest failure - -Jakub reported bpf selftest "btf_dump" failure after forwarding to -v6.11-rc1 with netdev. - Error: #33 btf_dump - Error: #33/15 btf_dump/btf_dump: var_data - btf_dump_data:FAIL:find type id unexpected find type id: actual -2 < expected 0 - -The reason for the failure is due to - commit 94ede2a3e913 ("profiling: remove stale percpu flip buffer variables") -where percpu static variable "cpu_profile_flip" is removed. - -Let us replace "cpu_profile_flip" with a variable in bpf subsystem -so whenever that variable gets deleted or renamed, we can detect the -failure immediately. In this case, I picked a static percpu variable -"bpf_cgrp_storage_busy" which is defined in kernel/bpf/bpf_cgrp_storage.c. - -Reported-by: Jakub Kicinski -Signed-off-by: Yonghong Song ---- - tools/testing/selftests/bpf/prog_tests/btf_dump.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c -index 09a8e6f9b379..b293b8501fd6 100644 ---- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c -+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c -@@ -805,8 +805,8 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, - TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, - "int cpu_number = (int)100", 100); - #endif -- TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT, -- "static int cpu_profile_flip = (int)2", 2); -+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_cgrp_storage_busy", int, BTF_F_COMPACT, -+ "static int bpf_cgrp_storage_busy = (int)2", 2); - } - - static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str, diff --git a/ci/diffs/0001-net-bpf-Use-sockopt_lock_sock-in-ip_sock_set_tos.patch b/ci/diffs/0001-net-bpf-Use-sockopt_lock_sock-in-ip_sock_set_tos.patch deleted file mode 100644 index bd12bd9..0000000 --- a/ci/diffs/0001-net-bpf-Use-sockopt_lock_sock-in-ip_sock_set_tos.patch +++ /dev/null @@ -1,99 +0,0 @@ -From c8268f8e9fa33c32e1f2f86fc7b703408a396c70 Mon Sep 17 00:00:00 2001 -From: Yonghong Song -Date: Fri, 27 Oct 2023 11:24:24 -0700 -Subject: [PATCH] net: bpf: Use sockopt_lock_sock() in ip_sock_set_tos() - -With latest sync from net-next tree, bpf-next has a bpf selftest failure: - [root@arch-fb-vm1 bpf]# ./test_progs -t setget_sockopt - ... - [ 76.194349] ============================================ - [ 76.194682] WARNING: possible recursive locking detected - [ 76.195039] 6.6.0-rc7-g37884503df08-dirty #67 Tainted: G W OE - [ 76.195518] -------------------------------------------- - [ 76.195852] new_name/154 is trying to acquire lock: - [ 76.196159] ffff8c3e06ad8d30 (sk_lock-AF_INET){+.+.}-{0:0}, at: ip_sock_set_tos+0x19/0x30 - [ 76.196669] - [ 76.196669] but task is already holding lock: - [ 76.197028] ffff8c3e06ad8d30 (sk_lock-AF_INET){+.+.}-{0:0}, at: inet_listen+0x21/0x70 - [ 76.197517] - [ 76.197517] other info that might help us debug this: - [ 76.197919] Possible unsafe locking scenario: - [ 76.197919] - [ 76.198287] CPU0 - [ 76.198444] ---- - [ 76.198600] lock(sk_lock-AF_INET); - [ 76.198831] lock(sk_lock-AF_INET); - [ 76.199062] - [ 76.199062] *** DEADLOCK *** - [ 76.199062] - [ 76.199420] May be due to missing lock nesting notation - [ 76.199420] - [ 76.199879] 2 locks held by new_name/154: - [ 76.200131] #0: ffff8c3e06ad8d30 (sk_lock-AF_INET){+.+.}-{0:0}, at: inet_listen+0x21/0x70 - [ 76.200644] #1: ffffffff90f96a40 (rcu_read_lock){....}-{1:2}, at: __cgroup_bpf_run_filter_sock_ops+0x55/0x290 - [ 76.201268] - [ 76.201268] stack backtrace: - [ 76.201538] CPU: 4 PID: 154 Comm: new_name Tainted: G W OE 6.6.0-rc7-g37884503df08-dirty #67 - [ 76.202134] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 - [ 76.202699] Call Trace: - [ 76.202858] - [ 76.203002] dump_stack_lvl+0x4b/0x80 - [ 76.203239] __lock_acquire+0x740/0x1ec0 - [ 76.203503] lock_acquire+0xc1/0x2a0 - [ 76.203766] ? ip_sock_set_tos+0x19/0x30 - [ 76.204050] ? sk_stream_write_space+0x12a/0x230 - [ 76.204389] ? lock_release+0xbe/0x260 - [ 76.204661] lock_sock_nested+0x32/0x80 - [ 76.204942] ? ip_sock_set_tos+0x19/0x30 - [ 76.205208] ip_sock_set_tos+0x19/0x30 - [ 76.205452] do_ip_setsockopt+0x4b3/0x1580 - [ 76.205719] __bpf_setsockopt+0x62/0xa0 - [ 76.205963] bpf_sock_ops_setsockopt+0x11/0x20 - [ 76.206247] bpf_prog_630217292049c96e_bpf_test_sockopt_int+0xbc/0x123 - [ 76.206660] bpf_prog_493685a3bae00bbd_bpf_test_ip_sockopt+0x49/0x4b - [ 76.207055] bpf_prog_b0bcd27f269aeea0_skops_sockopt+0x44c/0xec7 - [ 76.207437] __cgroup_bpf_run_filter_sock_ops+0xda/0x290 - [ 76.207829] __inet_listen_sk+0x108/0x1b0 - [ 76.208122] inet_listen+0x48/0x70 - [ 76.208373] __sys_listen+0x74/0xb0 - [ 76.208630] __x64_sys_listen+0x16/0x20 - [ 76.208911] do_syscall_64+0x3f/0x90 - [ 76.209174] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 - ... - -Both ip_sock_set_tos() and inet_listen() calls lock_sock(sk) which -caused a dead lock. - -To fix the issue, use sockopt_lock_sock() in ip_sock_set_tos() -instead. sockopt_lock_sock() will avoid lock_sock() if it is in bpf -context. - -Fixes: 878d951c6712 ("inet: lock the socket in ip_sock_set_tos()") -Suggested-by: Martin KaFai Lau -Signed-off-by: Yonghong Song -Signed-off-by: Andrii Nakryiko -Reviewed-by: Eric Dumazet -Link: https://lore.kernel.org/bpf/20231027182424.1444845-1-yonghong.song@linux.dev ---- - net/ipv4/ip_sockglue.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c -index 9c68b6b74d9f..2efc53526a38 100644 ---- a/net/ipv4/ip_sockglue.c -+++ b/net/ipv4/ip_sockglue.c -@@ -602,9 +602,9 @@ void __ip_sock_set_tos(struct sock *sk, int val) - - void ip_sock_set_tos(struct sock *sk, int val) - { -- lock_sock(sk); -+ sockopt_lock_sock(sk); - __ip_sock_set_tos(sk, val); -- release_sock(sk); -+ sockopt_release_sock(sk); - } - EXPORT_SYMBOL(ip_sock_set_tos); - --- -2.34.1 - diff --git a/ci/diffs/0001-selftests-bpf-Filter-out-_GNU_SOURCE-when-compiling-.patch b/ci/diffs/0001-selftests-bpf-Filter-out-_GNU_SOURCE-when-compiling-.patch deleted file mode 100644 index da5bcdc..0000000 --- a/ci/diffs/0001-selftests-bpf-Filter-out-_GNU_SOURCE-when-compiling-.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 41c24102af7b6236277a214428b203d51a3462df Mon Sep 17 00:00:00 2001 -From: Stanislav Fomichev -Date: Thu, 25 Jul 2024 14:40:29 -0700 -Subject: [PATCH 1/1] selftests/bpf: Filter out _GNU_SOURCE when compiling - test_cpp - -Jakub reports build failures when merging linux/master with net tree: - -CXX test_cpp -In file included from :454: -:2:9: error: '_GNU_SOURCE' macro redefined [-Werror,-Wmacro-redefined] - 2 | #define _GNU_SOURCE - | ^ -:445:9: note: previous definition is here - 445 | #define _GNU_SOURCE 1 - -The culprit is commit cc937dad85ae ("selftests: centralize -D_GNU_SOURCE= to -CFLAGS in lib.mk") which unconditionally added -D_GNU_SOUCE to CLFAGS. -Apparently clang++ also unconditionally adds it for the C++ targets [0] -which causes a conflict. Add small change in the selftests makefile -to filter it out for test_cpp. - -Not sure which tree it should go via, targeting bpf for now, but net -might be better? - -0: https://stackoverflow.com/questions/11670581/why-is-gnu-source-defined-by-default-and-how-to-turn-it-off - -Signed-off-by: Stanislav Fomichev -Signed-off-by: Andrii Nakryiko -Acked-by: Jiri Olsa -Link: https://lore.kernel.org/bpf/20240725214029.1760809-1-sdf@fomichev.me ---- - tools/testing/selftests/bpf/Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile -index dd49c1d23a60..81d4757ecd4c 100644 ---- a/tools/testing/selftests/bpf/Makefile -+++ b/tools/testing/selftests/bpf/Makefile -@@ -713,7 +713,7 @@ $(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp - # Make sure we are able to include and link libbpf against c++. - $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) - $(call msg,CXX,,$@) -- $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ -+ $(Q)$(CXX) $(subst -D_GNU_SOURCE=,,$(CFLAGS)) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ - - # Benchmark runner - $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) --- -2.43.0 - diff --git a/ci/diffs/0001-selftests-bpf-Fix-bpf_cookie-and-find_vma-in-nested-.patch b/ci/diffs/0001-selftests-bpf-Fix-bpf_cookie-and-find_vma-in-nested-.patch deleted file mode 100644 index 4ebfe20..0000000 --- a/ci/diffs/0001-selftests-bpf-Fix-bpf_cookie-and-find_vma-in-nested-.patch +++ /dev/null @@ -1,50 +0,0 @@ -From f3d2080e8cf23125f79e345061149ae40f66816f Mon Sep 17 00:00:00 2001 -From: Song Liu -Date: Mon, 3 Jun 2024 23:43:17 -0700 -Subject: [PATCH bpf-next] selftests/bpf: Fix bpf_cookie and find_vma in nested - VM - -bpf_cookie and find_vma are flaky in nested VMs, which is used by some CI -systems. It turns out these failures are caused by unreliable perf event -in nested VM. Fix these by: - - 1. Use PERF_COUNT_SW_CPU_CLOCK in find_vma; - 2. Increase sample_freq in bpf_cookie. - -Signed-off-by: Song Liu ---- - tools/testing/selftests/bpf/prog_tests/bpf_cookie.c | 2 +- - tools/testing/selftests/bpf/prog_tests/find_vma.c | 4 ++-- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c -index 4407ea428e77..070c52c312e5 100644 ---- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c -+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c -@@ -451,7 +451,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) - attr.type = PERF_TYPE_SOFTWARE; - attr.config = PERF_COUNT_SW_CPU_CLOCK; - attr.freq = 1; -- attr.sample_freq = 1000; -+ attr.sample_freq = 10000; - pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); - if (!ASSERT_GE(pfd, 0, "perf_fd")) - goto cleanup; -diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c -index 5165b38f0e59..f7619e0ade10 100644 ---- a/tools/testing/selftests/bpf/prog_tests/find_vma.c -+++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c -@@ -29,8 +29,8 @@ static int open_pe(void) - - /* create perf event */ - attr.size = sizeof(attr); -- attr.type = PERF_TYPE_HARDWARE; -- attr.config = PERF_COUNT_HW_CPU_CYCLES; -+ attr.type = PERF_TYPE_SOFTWARE; -+ attr.config = PERF_COUNT_SW_CPU_CLOCK; - attr.freq = 1; - attr.sample_freq = 1000; - pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); --- -2.43.0 - diff --git a/ci/diffs/0001-selftests-bpf-Fix-pyperf180-compilation-failure-with.patch b/ci/diffs/0001-selftests-bpf-Fix-pyperf180-compilation-failure-with.patch deleted file mode 100644 index d55d2e7..0000000 --- a/ci/diffs/0001-selftests-bpf-Fix-pyperf180-compilation-failure-with.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 100888fb6d8a185866b1520031ee7e3182b173de Mon Sep 17 00:00:00 2001 -From: Yonghong Song -Date: Fri, 10 Nov 2023 11:36:44 -0800 -Subject: [PATCH] selftests/bpf: Fix pyperf180 compilation failure with clang18 - -With latest clang18 (main branch of llvm-project repo), when building bpf selftests, - [~/work/bpf-next (master)]$ make -C tools/testing/selftests/bpf LLVM=1 -j - -The following compilation error happens: - fatal error: error in backend: Branch target out of insn range - ... - Stack dump: - 0. Program arguments: clang -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian - -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include - -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -I/home/yhs/work/bpf-next/tools/include/uapi - -I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include -idirafter - /home/yhs/work/llvm-project/llvm/build.18/install/lib/clang/18/include -idirafter /usr/local/include - -idirafter /usr/include -Wno-compare-distinct-pointer-types -DENABLE_ATOMICS_TESTS -O2 --target=bpf - -c progs/pyperf180.c -mcpu=v3 -o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/pyperf180.bpf.o - 1. parser at end of file - 2. Code generation - ... - -The compilation failure only happens to cpu=v2 and cpu=v3. cpu=v4 is okay -since cpu=v4 supports 32-bit branch target offset. - -The above failure is due to upstream llvm patch [1] where some inlining behavior -are changed in clang18. - -To workaround the issue, previously all 180 loop iterations are fully unrolled. -The bpf macro __BPF_CPU_VERSION__ (implemented in clang18 recently) is used to avoid -unrolling changes if cpu=v4. If __BPF_CPU_VERSION__ is not available and the -compiler is clang18, the unrollng amount is unconditionally reduced. - - [1] https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e - -Signed-off-by: Yonghong Song -Signed-off-by: Andrii Nakryiko -Tested-by: Alan Maguire -Link: https://lore.kernel.org/bpf/20231110193644.3130906-1-yonghong.song@linux.dev ---- - tools/testing/selftests/bpf/progs/pyperf180.c | 22 +++++++++++++++++++ - 1 file changed, 22 insertions(+) - -diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c -index c39f559d3100..42c4a8b62e36 100644 ---- a/tools/testing/selftests/bpf/progs/pyperf180.c -+++ b/tools/testing/selftests/bpf/progs/pyperf180.c -@@ -1,4 +1,26 @@ - // SPDX-License-Identifier: GPL-2.0 - // Copyright (c) 2019 Facebook - #define STACK_MAX_LEN 180 -+ -+/* llvm upstream commit at clang18 -+ * https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e -+ * changed inlining behavior and caused compilation failure as some branch -+ * target distance exceeded 16bit representation which is the maximum for -+ * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18 -+ * to specify which cpu version is used for compilation. So a smaller -+ * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which -+ * reduced some branch target distances and resolved the compilation failure. -+ * -+ * To capture the case where a developer/ci uses clang18 but the corresponding -+ * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count -+ * will be set as well to prevent potential compilation failures. -+ */ -+#ifdef __BPF_CPU_VERSION__ -+#if __BPF_CPU_VERSION__ < 4 -+#define UNROLL_COUNT 90 -+#endif -+#elif __clang_major__ == 18 -+#define UNROLL_COUNT 90 -+#endif -+ - #include "pyperf.h" --- -2.34.1 - diff --git a/ci/diffs/0001-selftests-bpf-disable-detection-of-llvm-when-buildin.patch b/ci/diffs/0001-selftests-bpf-disable-detection-of-llvm-when-buildin.patch deleted file mode 100644 index 6497a6c..0000000 --- a/ci/diffs/0001-selftests-bpf-disable-detection-of-llvm-when-buildin.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 42839864a62ee244ec280b09149b1cb439f681db Mon Sep 17 00:00:00 2001 -From: Manu Bretelle -Date: Fri, 27 Oct 2023 18:25:39 -0700 -Subject: [PATCH bpf-next] selftests/bpf: disable detection of llvm when - building bpftool - -The VMs in which we run the selftests do not have llvm installed. -We build selftests/bpftool in a host that have llvm. -bpftool currently will use llvm first and fallback to libbfd but there -is no way to disable detection from the command line. - -Removing it from the feature detection should force us to use libbfd. - -Signed-off-by: Manu Bretelle ---- - tools/bpf/bpftool/Makefile | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile -index e9154ace80ff..01314458e25e 100644 ---- a/tools/bpf/bpftool/Makefile -+++ b/tools/bpf/bpftool/Makefile -@@ -95,7 +95,6 @@ RM ?= rm -f - FEATURE_USER = .bpftool - - FEATURE_TESTS := clang-bpf-co-re --FEATURE_TESTS += llvm - FEATURE_TESTS += libcap - FEATURE_TESTS += libbfd - FEATURE_TESTS += libbfd-liberty -@@ -104,7 +103,6 @@ FEATURE_TESTS += disassembler-four-args - FEATURE_TESTS += disassembler-init-styled - - FEATURE_DISPLAY := clang-bpf-co-re --FEATURE_DISPLAY += llvm - FEATURE_DISPLAY += libcap - FEATURE_DISPLAY += libbfd - FEATURE_DISPLAY += libbfd-liberty --- -2.39.3 - diff --git a/ci/diffs/0001-selftests-bpf-fix-inet_csk_accept-prototype-in-test_.patch b/ci/diffs/0001-selftests-bpf-fix-inet_csk_accept-prototype-in-test_.patch deleted file mode 100644 index 3fa007c..0000000 --- a/ci/diffs/0001-selftests-bpf-fix-inet_csk_accept-prototype-in-test_.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 0daad0a615e687e1247230f3d0c31ae60ba32314 Mon Sep 17 00:00:00 2001 -From: Andrii Nakryiko -Date: Tue, 28 May 2024 15:29:38 -0700 -Subject: [PATCH bpf-next] selftests/bpf: fix inet_csk_accept prototype in - test_sk_storage_tracing.c - -Recent kernel change ([0]) changed inet_csk_accept() prototype. Adapt -progs/test_sk_storage_tracing.c to take that into account. - - [0] 92ef0fd55ac8 ("net: change proto and proto_ops accept type") - -Signed-off-by: Andrii Nakryiko ---- - tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c -index 02e718f06e0f..40531e56776e 100644 ---- a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c -+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c -@@ -84,7 +84,7 @@ int BPF_PROG(trace_tcp_connect, struct sock *sk) - } - - SEC("fexit/inet_csk_accept") --int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern, -+int BPF_PROG(inet_csk_accept, struct sock *sk, struct proto_accept_arg *arg, - struct sock *accepted_sk) - { - set_task_info(accepted_sk); --- -2.43.0 - diff --git a/ci/diffs/0001-selftests-bpf-work-around-latest-Clang-smartness.patch b/ci/diffs/0001-selftests-bpf-work-around-latest-Clang-smartness.patch deleted file mode 100644 index ec1e29a..0000000 --- a/ci/diffs/0001-selftests-bpf-work-around-latest-Clang-smartness.patch +++ /dev/null @@ -1,31 +0,0 @@ -From d31a7125891994681503770cff46a119692fb2b9 Mon Sep 17 00:00:00 2001 -From: Andrii Nakryiko -Date: Mon, 11 Dec 2023 17:09:38 -0800 -Subject: [PATCH 1/1] selftests/bpf: work around latest Clang smartness - -Work around the issue while we deal with it in the Clang itself. -See [0]. - - [0] https://github.com/llvm/llvm-project/pull/73662#issuecomment-1849281758 - -Signed-off-by: Andrii Nakryiko ---- - tools/testing/selftests/bpf/progs/iters.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c -index 3aca3dc145b5..929ba6fa2105 100644 ---- a/tools/testing/selftests/bpf/progs/iters.c -+++ b/tools/testing/selftests/bpf/progs/iters.c -@@ -1420,7 +1420,7 @@ SEC("raw_tp") - __success - int iter_arr_with_actual_elem_count(const void *ctx) - { -- int i, n = loop_data.n, sum = 0; -+ unsigned i, n = loop_data.n, sum = 0; - - if (n > ARRAY_SIZE(loop_data.data)) - return 0; --- -2.34.1 - diff --git a/ci/diffs/0001-selftests-bpf-xskxceiver-ksft_print_msg-fix-format-t.patch b/ci/diffs/0001-selftests-bpf-xskxceiver-ksft_print_msg-fix-format-t.patch deleted file mode 100644 index e631fac..0000000 --- a/ci/diffs/0001-selftests-bpf-xskxceiver-ksft_print_msg-fix-format-t.patch +++ /dev/null @@ -1,89 +0,0 @@ -From fe69a1b1b6ed9ffc2c578c63f526026a8ab74f0c Mon Sep 17 00:00:00 2001 -From: Anders Roxell -Date: Thu, 9 Nov 2023 18:43:28 +0100 -Subject: [PATCH] selftests: bpf: xskxceiver: ksft_print_msg: fix format type - error - -Crossbuilding selftests/bpf for architecture arm64, format specifies -type error show up like. - -xskxceiver.c:912:34: error: format specifies type 'int' but the argument -has type '__u64' (aka 'unsigned long long') [-Werror,-Wformat] - ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", - ~~ - %llu - __func__, pkt->pkt_nb, meta->count); - ^~~~~~~~~~~ -xskxceiver.c:929:55: error: format specifies type 'unsigned long long' but - the argument has type 'u64' (aka 'unsigned long') [-Werror,-Wformat] - ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len); - ~~~~ ^~~~ - -Fixing the issues by casting to (unsigned long long) and changing the -specifiers to be %llu from %d and %u, since with u64s it might be %llx -or %lx, depending on architecture. - -Signed-off-by: Anders Roxell -Link: https://lore.kernel.org/r/20231109174328.1774571-1-anders.roxell@linaro.org -Signed-off-by: Alexei Starovoitov ---- - tools/testing/selftests/bpf/xskxceiver.c | 19 ++++++++++++------- - 1 file changed, 12 insertions(+), 7 deletions(-) - -diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c -index 591ca9637b23..b604c570309a 100644 ---- a/tools/testing/selftests/bpf/xskxceiver.c -+++ b/tools/testing/selftests/bpf/xskxceiver.c -@@ -908,8 +908,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) - struct xdp_info *meta = data - sizeof(struct xdp_info); - - if (meta->count != pkt->pkt_nb) { -- ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", -- __func__, pkt->pkt_nb, meta->count); -+ ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n", -+ __func__, pkt->pkt_nb, -+ (unsigned long long)meta->count); - return false; - } - -@@ -926,11 +927,13 @@ static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 exp - - if (addr >= umem->num_frames * umem->frame_size || - addr + len > umem->num_frames * umem->frame_size) { -- ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len); -+ ksft_print_msg("Frag invalid addr: %llx len: %u\n", -+ (unsigned long long)addr, len); - return false; - } - if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) { -- ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len); -+ ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", -+ (unsigned long long)addr, len); - return false; - } - -@@ -1029,7 +1032,8 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) - u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); - - ksft_print_msg("[%s] Too many packets completed\n", __func__); -- ksft_print_msg("Last completion address: %llx\n", addr); -+ ksft_print_msg("Last completion address: %llx\n", -+ (unsigned long long)addr); - return TEST_FAILURE; - } - -@@ -1513,8 +1517,9 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject) - } - - if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) { -- ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", -- __func__, stats.tx_invalid_descs, -+ ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n", -+ __func__, -+ (unsigned long long)stats.tx_invalid_descs, - ifobject->xsk->pkt_stream->nb_pkts); - return TEST_FAILURE; - } --- -2.34.1 - diff --git a/ci/diffs/0001-tools-resolve_btfids-fix-cross-compilation-to-non-host-endianness.patch b/ci/diffs/0001-tools-resolve_btfids-fix-cross-compilation-to-non-host-endianness.patch deleted file mode 100644 index 19d269d..0000000 --- a/ci/diffs/0001-tools-resolve_btfids-fix-cross-compilation-to-non-host-endianness.patch +++ /dev/null @@ -1,142 +0,0 @@ -From 3772e6cdb51f21a11df2acf6aa431cc8b9137bfb Mon Sep 17 00:00:00 2001 -From: Viktor Malik -Date: Tue, 6 Feb 2024 13:46:09 +0100 -Subject: [PATCH 1/2] tools/resolve_btfids: Refactor set sorting with types - from btf_ids.h - -Instead of using magic offsets to access BTF ID set data, leverage types -from btf_ids.h (btf_id_set and btf_id_set8) which define the actual -layout of the data. Thanks to this change, set sorting should also -continue working if the layout changes. - -This requires to sync the definition of 'struct btf_id_set8' from -include/linux/btf_ids.h to tools/include/linux/btf_ids.h. We don't sync -the rest of the file at the moment, b/c that would require to also sync -multiple dependent headers and we don't need any other defs from -btf_ids.h. - -Signed-off-by: Viktor Malik -Signed-off-by: Andrii Nakryiko -Acked-by: Daniel Xu -Link: https://lore.kernel.org/bpf/ff7f062ddf6a00815fda3087957c4ce667f50532.1707223196.git.vmalik@redhat.com ---- - tools/bpf/resolve_btfids/main.c | 35 ++++++++++++++++++++------------- - tools/include/linux/btf_ids.h | 9 +++++++++ - 2 files changed, 30 insertions(+), 14 deletions(-) - -diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c -index 27a23196d58e..32634f00abba 100644 ---- a/tools/bpf/resolve_btfids/main.c -+++ b/tools/bpf/resolve_btfids/main.c -@@ -70,6 +70,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -78,7 +79,7 @@ - #include - - #define BTF_IDS_SECTION ".BTF_ids" --#define BTF_ID "__BTF_ID__" -+#define BTF_ID_PREFIX "__BTF_ID__" - - #define BTF_STRUCT "struct" - #define BTF_UNION "union" -@@ -161,7 +162,7 @@ static int eprintf(int level, int var, const char *fmt, ...) - - static bool is_btf_id(const char *name) - { -- return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1); -+ return name && !strncmp(name, BTF_ID_PREFIX, sizeof(BTF_ID_PREFIX) - 1); - } - - static struct btf_id *btf_id__find(struct rb_root *root, const char *name) -@@ -441,7 +442,7 @@ static int symbols_collect(struct object *obj) - * __BTF_ID__TYPE__vfs_truncate__0 - * prefix = ^ - */ -- prefix = name + sizeof(BTF_ID) - 1; -+ prefix = name + sizeof(BTF_ID_PREFIX) - 1; - - /* struct */ - if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) { -@@ -649,19 +650,18 @@ static int cmp_id(const void *pa, const void *pb) - static int sets_patch(struct object *obj) - { - Elf_Data *data = obj->efile.idlist; -- int *ptr = data->d_buf; - struct rb_node *next; - - next = rb_first(&obj->sets); - while (next) { -- unsigned long addr, idx; -+ struct btf_id_set8 *set8; -+ struct btf_id_set *set; -+ unsigned long addr, off; - struct btf_id *id; -- int *base; -- int cnt; - - id = rb_entry(next, struct btf_id, rb_node); - addr = id->addr[0]; -- idx = addr - obj->efile.idlist_addr; -+ off = addr - obj->efile.idlist_addr; - - /* sets are unique */ - if (id->addr_cnt != 1) { -@@ -670,14 +670,21 @@ static int sets_patch(struct object *obj) - return -1; - } - -- idx = idx / sizeof(int); -- base = &ptr[idx] + (id->is_set8 ? 2 : 1); -- cnt = ptr[idx]; -+ if (id->is_set) { -+ set = data->d_buf + off; -+ qsort(set->ids, set->cnt, sizeof(set->ids[0]), cmp_id); -+ } else { -+ set8 = data->d_buf + off; -+ /* -+ * Make sure id is at the beginning of the pairs -+ * struct, otherwise the below qsort would not work. -+ */ -+ BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id); -+ qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id); -+ } - - pr_debug("sorting addr %5lu: cnt %6d [%s]\n", -- (idx + 1) * sizeof(int), cnt, id->name); -- -- qsort(base, cnt, id->is_set8 ? sizeof(uint64_t) : sizeof(int), cmp_id); -+ off, id->is_set ? set->cnt : set8->cnt, id->name); - - next = rb_next(next); - } -diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h -index 2f882d5cb30f..72535f00572f 100644 ---- a/tools/include/linux/btf_ids.h -+++ b/tools/include/linux/btf_ids.h -@@ -8,6 +8,15 @@ struct btf_id_set { - u32 ids[]; - }; - -+struct btf_id_set8 { -+ u32 cnt; -+ u32 flags; -+ struct { -+ u32 id; -+ u32 flags; -+ } pairs[]; -+}; -+ - #ifdef CONFIG_DEBUG_INFO_BTF - - #include /* for __PASTE */ --- -2.39.3 - - - diff --git a/ci/diffs/0001-tracing-kprobes-Fix-symbol-counting-logic-by-looking.patch b/ci/diffs/0001-tracing-kprobes-Fix-symbol-counting-logic-by-looking.patch deleted file mode 100644 index 24ebc23..0000000 --- a/ci/diffs/0001-tracing-kprobes-Fix-symbol-counting-logic-by-looking.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 08969a676d234a178ff9f8c67936a2ad98a741eb Mon Sep 17 00:00:00 2001 -From: Andrii Nakryiko -Date: Fri, 27 Oct 2023 16:22:24 -0700 -Subject: [PATCH] tracing/kprobes: Fix symbol counting logic by looking at - modules as well - -Recent changes to count number of matching symbols when creating -a kprobe event failed to take into account kernel modules. As such, it -breaks kprobes on kernel module symbols, by assuming there is no match. - -Fix this my calling module_kallsyms_on_each_symbol() in addition to -kallsyms_on_each_match_symbol() to perform a proper counting. - -Cc: Francis Laniel -Cc: stable@vger.kernel.org -Cc: Masami Hiramatsu -Cc: Steven Rostedt -Fixes: b022f0c7e404 ("tracing/kprobes: Return EADDRNOTAVAIL when func matches several symbols") -Signed-off-by: Andrii Nakryiko ---- - kernel/trace/trace_kprobe.c | 24 ++++++++++++++++++++---- - 1 file changed, 20 insertions(+), 4 deletions(-) - -diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c -index effcaede4759..1efb27f35963 100644 ---- a/kernel/trace/trace_kprobe.c -+++ b/kernel/trace/trace_kprobe.c -@@ -714,14 +714,30 @@ static int count_symbols(void *data, unsigned long unused) - return 0; - } - -+struct sym_count_ctx { -+ unsigned int count; -+ const char *name; -+}; -+ -+static int count_mod_symbols(void *data, const char *name, unsigned long unused) -+{ -+ struct sym_count_ctx *ctx = data; -+ -+ if (strcmp(name, ctx->name) == 0) -+ ctx->count++; -+ -+ return 0; -+} -+ - static unsigned int number_of_same_symbols(char *func_name) - { -- unsigned int count; -+ struct sym_count_ctx ctx = { .count = 0, .name = func_name }; -+ -+ kallsyms_on_each_match_symbol(count_symbols, func_name, &ctx.count); - -- count = 0; -- kallsyms_on_each_match_symbol(count_symbols, func_name, &count); -+ module_kallsyms_on_each_symbol(NULL, count_mod_symbols, &ctx); - -- return count; -+ return ctx.count; - } - - static int __trace_kprobe_create(int argc, const char *argv[]) --- -2.34.1 - diff --git a/ci/diffs/0002-tools-resolve_btfids-fix-cross-compilation-to-non-host-endianness.patch b/ci/diffs/0002-tools-resolve_btfids-fix-cross-compilation-to-non-host-endianness.patch deleted file mode 100644 index c4d6769..0000000 --- a/ci/diffs/0002-tools-resolve_btfids-fix-cross-compilation-to-non-host-endianness.patch +++ /dev/null @@ -1,117 +0,0 @@ -From c3dcadfdf2bf8f01471066700c098b5185240df6 Mon Sep 17 00:00:00 2001 -From: Viktor Malik -Date: Tue, 6 Feb 2024 13:46:10 +0100 -Subject: [PATCH 2/2] tools/resolve_btfids: Fix cross-compilation to non-host - endianness - -The .BTF_ids section is pre-filled with zeroed BTF ID entries during the -build and afterwards patched by resolve_btfids with correct values. -Since resolve_btfids always writes in host-native endianness, it relies -on libelf to do the translation when the target ELF is cross-compiled to -a different endianness (this was introduced in commit 61e8aeda9398 -("bpf: Fix libelf endian handling in resolv_btfids")). - -Unfortunately, the translation will corrupt the flags fields of SET8 -entries because these were written during vmlinux compilation and are in -the correct endianness already. This will lead to numerous selftests -failures such as: - - $ sudo ./test_verifier 502 502 - #502/p sleepable fentry accept FAIL - Failed to load prog 'Invalid argument'! - bpf_fentry_test1 is not sleepable - verification time 34 usec - stack depth 0 - processed 0 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 - Summary: 0 PASSED, 0 SKIPPED, 1 FAILED - -Since it's not possible to instruct libelf to translate just certain -values, let's manually bswap the flags (both global and entry flags) in -resolve_btfids when needed, so that libelf then translates everything -correctly. - -Fixes: ef2c6f370a63 ("tools/resolve_btfids: Add support for 8-byte BTF sets") -Signed-off-by: Viktor Malik -Signed-off-by: Andrii Nakryiko -Link: https://lore.kernel.org/bpf/7b6bff690919555574ce0f13d2a5996cacf7bf69.1707223196.git.vmalik@redhat.com ---- - tools/bpf/resolve_btfids/main.c | 35 +++++++++++++++++++++++++++++++++ - 1 file changed, 35 insertions(+) - -diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c -index 32634f00abba..d9520cb826b3 100644 ---- a/tools/bpf/resolve_btfids/main.c -+++ b/tools/bpf/resolve_btfids/main.c -@@ -90,6 +90,14 @@ - - #define ADDR_CNT 100 - -+#if __BYTE_ORDER == __LITTLE_ENDIAN -+# define ELFDATANATIVE ELFDATA2LSB -+#elif __BYTE_ORDER == __BIG_ENDIAN -+# define ELFDATANATIVE ELFDATA2MSB -+#else -+# error "Unknown machine endianness!" -+#endif -+ - struct btf_id { - struct rb_node rb_node; - char *name; -@@ -117,6 +125,7 @@ struct object { - int idlist_shndx; - size_t strtabidx; - unsigned long idlist_addr; -+ int encoding; - } efile; - - struct rb_root sets; -@@ -320,6 +329,7 @@ static int elf_collect(struct object *obj) - { - Elf_Scn *scn = NULL; - size_t shdrstrndx; -+ GElf_Ehdr ehdr; - int idx = 0; - Elf *elf; - int fd; -@@ -351,6 +361,13 @@ static int elf_collect(struct object *obj) - return -1; - } - -+ if (gelf_getehdr(obj->efile.elf, &ehdr) == NULL) { -+ pr_err("FAILED cannot get ELF header: %s\n", -+ elf_errmsg(-1)); -+ return -1; -+ } -+ obj->efile.encoding = ehdr.e_ident[EI_DATA]; -+ - /* - * Scan all the elf sections and look for save data - * from .BTF_ids section and symbols. -@@ -681,6 +698,24 @@ static int sets_patch(struct object *obj) - */ - BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id); - qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id); -+ -+ /* -+ * When ELF endianness does not match endianness of the -+ * host, libelf will do the translation when updating -+ * the ELF. This, however, corrupts SET8 flags which are -+ * already in the target endianness. So, let's bswap -+ * them to the host endianness and libelf will then -+ * correctly translate everything. -+ */ -+ if (obj->efile.encoding != ELFDATANATIVE) { -+ int i; -+ -+ set8->flags = bswap_32(set8->flags); -+ for (i = 0; i < set8->cnt; i++) { -+ set8->pairs[i].flags = -+ bswap_32(set8->pairs[i].flags); -+ } -+ } - } - - pr_debug("sorting addr %5lu: cnt %6d [%s]\n", --- -2.39.3 - diff --git a/ci/diffs/0099-s390x_nolockdep.diff b/ci/diffs/0099-s390x_nolockdep.diff deleted file mode 100644 index 44c2d1a..0000000 --- a/ci/diffs/0099-s390x_nolockdep.diff +++ /dev/null @@ -1,48 +0,0 @@ -From 470d0c7874ac638ea62cddc3a20ec047fa4ab539 Mon Sep 17 00:00:00 2001 -From: Manu Bretelle -Date: Wed, 14 Feb 2024 17:25:35 -0800 -Subject: [PATCH] bpf/selftests: disable lockdep on s390x - -Tests are slow to run on s390x, this should make them faster. - -Signed-off-by: Manu Bretelle ---- - tools/testing/selftests/bpf/config.s390x | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x -index 706931a8c2c69..67bfd62b0b582 100644 ---- a/tools/testing/selftests/bpf/config.s390x -+++ b/tools/testing/selftests/bpf/config.s390x -@@ -23,11 +23,11 @@ CONFIG_CPUSETS=y - CONFIG_CRASH_DUMP=y - CONFIG_CRYPTO_USER_API_RNG=y - CONFIG_CRYPTO_USER_API_SKCIPHER=y --CONFIG_DEBUG_ATOMIC_SLEEP=y -+CONFIG_DEBUG_ATOMIC_SLEEP=n - CONFIG_DEBUG_INFO_BTF=y - CONFIG_DEBUG_INFO_DWARF4=y - CONFIG_DEBUG_LIST=y --CONFIG_DEBUG_LOCKDEP=y -+CONFIG_DEBUG_LOCKDEP=n - CONFIG_DEBUG_NOTIFIERS=y - CONFIG_DEBUG_PAGEALLOC=y - CONFIG_DEBUG_SECTION_MISMATCH=y -@@ -71,7 +71,7 @@ CONFIG_KRETPROBES=y - CONFIG_KSM=y - CONFIG_LATENCYTOP=y - CONFIG_LIVEPATCH=y --CONFIG_LOCK_STAT=y -+CONFIG_LOCK_STAT=n - CONFIG_MACVLAN=y - CONFIG_MACVTAP=y - CONFIG_MAGIC_SYSRQ=y -@@ -101,7 +101,7 @@ CONFIG_PCI=y - CONFIG_POSIX_MQUEUE=y - CONFIG_PROC_KCORE=y - CONFIG_PROFILING=y --CONFIG_PROVE_LOCKING=y -+CONFIG_PROVE_LOCKING=n - CONFIG_PTDUMP_DEBUGFS=y - CONFIG_RC_DEVICES=y - CONFIG_RC_LOOPBACK=y diff --git a/ci/diffs/0099-selftest-cross-compile.diff b/ci/diffs/0099-selftest-cross-compile.diff deleted file mode 100644 index e873259..0000000 --- a/ci/diffs/0099-selftest-cross-compile.diff +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile -index a38a3001527c..af68528cc944 100644 ---- a/tools/testing/selftests/bpf/Makefile -+++ b/tools/testing/selftests/bpf/Makefile -@@ -304,7 +304,7 @@ $(OUTPUT)/test_maps: $(TESTING_HELPERS) - $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(UNPRIV_HELPERS) - $(OUTPUT)/xsk.o: $(BPFOBJ) - --BPFTOOL ?= $(DEFAULT_BPFTOOL) -+BPFTOOL ?= $(TRUNNER_BPFTOOL) - $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ - $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool - $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ diff --git a/ci/diffs/0199-iov_iter-fix-advancing-slot-in-iter_folioq_get_pages.patch b/ci/diffs/0199-iov_iter-fix-advancing-slot-in-iter_folioq_get_pages.patch deleted file mode 100644 index b81d22a..0000000 --- a/ci/diffs/0199-iov_iter-fix-advancing-slot-in-iter_folioq_get_pages.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 0d24852bd71ec85ca0016b6d6fc997e6a3381552 Mon Sep 17 00:00:00 2001 -From: Omar Sandoval -Date: Mon, 30 Sep 2024 11:55:00 -0700 -Subject: [PATCH] iov_iter: fix advancing slot in iter_folioq_get_pages() - -iter_folioq_get_pages() decides to advance to the next folioq slot when -it has reached the end of the current folio. However, it is checking -offset, which is the beginning of the current part, instead of -iov_offset, which is adjusted to the end of the current part, so it -doesn't advance the slot when it's supposed to. As a result, on the next -iteration, we'll use the same folio with an out-of-bounds offset and -return an unrelated page. - -This manifested as various crashes and other failures in 9pfs in drgn's -VM testing setup and BPF CI. - -Fixes: db0aa2e9566f ("mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios") -Link: https://lore.kernel.org/linux-fsdevel/20240923183432.1876750-1-chantr4@gmail.com/ -Tested-by: Manu Bretelle -Signed-off-by: Omar Sandoval -Link: https://lore.kernel.org/r/cbaf141ba6c0e2e209717d02746584072844841a.1727722269.git.osandov@fb.com -Tested-by: Eduard Zingerman -Tested-by: Leon Romanovsky -Tested-by: Joey Gouly -Acked-by: David Howells -Signed-off-by: Christian Brauner ---- - lib/iov_iter.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/lib/iov_iter.c b/lib/iov_iter.c -index 97003155b..1abb32c0d 100644 ---- a/lib/iov_iter.c -+++ b/lib/iov_iter.c -@@ -1033,7 +1033,7 @@ static ssize_t iter_folioq_get_pages(struct iov_iter *iter, - if (maxpages == 0 || extracted >= maxsize) - break; - -- if (offset >= fsize) { -+ if (iov_offset >= fsize) { - iov_offset = 0; - slot++; - if (slot == folioq_nr_slots(folioq) && folioq->next) { --- -2.34.1 - diff --git a/ci/diffs/0299-selftests-bpf-Fix-uprobe-consumer-test.patch b/ci/diffs/0299-selftests-bpf-Fix-uprobe-consumer-test.patch deleted file mode 100644 index 11aa362..0000000 --- a/ci/diffs/0299-selftests-bpf-Fix-uprobe-consumer-test.patch +++ /dev/null @@ -1,58 +0,0 @@ -From affb32e4f056883f285f8535b766293b85752fb4 Mon Sep 17 00:00:00 2001 -From: Jiri Olsa -Date: Tue, 24 Sep 2024 13:07:30 +0200 -Subject: [PATCH] selftests/bpf: Fix uprobe consumer test - -With newly merged code the uprobe behaviour is slightly different -and affects uprobe consumer test. - -We no longer need to check if the uprobe object is still preserved -after removing last uretprobe, because it stays as long as there's -pending/installed uretprobe instance. - -This allows to run uretprobe consumers registered 'after' uprobe was -hit even if previous uretprobe got unregistered before being hit. - -The uprobe object will be now removed after the last uprobe ref is -released and in such case it's held by ri->uprobe (return instance) -which is released after the uretprobe is hit. - -Reported-by: Ihor Solodrai -Signed-off-by: Jiri Olsa -Signed-off-by: Daniel Borkmann -Tested-by: Ihor Solodrai -Closes: https://lore.kernel.org/bpf/w6U8Z9fdhjnkSp2UaFaV1fGqJXvfLEtDKEUyGDkwmoruDJ_AgF_c0FFhrkeKW18OqiP-05s9yDKiT6X-Ns-avN_ABf0dcUkXqbSJN1TQSXo=@pm.me/ ---- - .../testing/selftests/bpf/prog_tests/uprobe_multi_test.c | 9 +-------- - 1 file changed, 1 insertion(+), 8 deletions(-) - -diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c -index 844f6fc8487b..c1ac813ff9ba 100644 ---- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c -+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c -@@ -869,21 +869,14 @@ static void consumer_test(struct uprobe_multi_consumers *skel, - fmt = "prog 0/1: uprobe"; - } else { - /* -- * uprobe return is tricky ;-) -- * - * to trigger uretprobe consumer, the uretprobe needs to be installed, - * which means one of the 'return' uprobes was alive when probe was hit: - * - * idxs: 2/3 uprobe return in 'installed' mask -- * -- * in addition if 'after' state removes everything that was installed in -- * 'before' state, then uprobe kernel object goes away and return uprobe -- * is not installed and we won't hit it even if it's in 'after' state. - */ - unsigned long had_uretprobes = before & 0b1100; /* is uretprobe installed */ -- unsigned long probe_preserved = before & after; /* did uprobe go away */ - -- if (had_uretprobes && probe_preserved && test_bit(idx, after)) -+ if (had_uretprobes && test_bit(idx, after)) - val++; - fmt = "idx 2/3: uretprobe"; - } --- -2.34.1 - diff --git a/ci/diffs/0399-selftests-sched_ext-fix-build-after-renames-in-sched.patch b/ci/diffs/0399-selftests-sched_ext-fix-build-after-renames-in-sched.patch deleted file mode 100644 index ba37429..0000000 --- a/ci/diffs/0399-selftests-sched_ext-fix-build-after-renames-in-sched.patch +++ /dev/null @@ -1,231 +0,0 @@ -From 5565144e82b97c5d2082ab19866836dfe5b2e592 Mon Sep 17 00:00:00 2001 -From: Ihor Solodrai -Date: Thu, 21 Nov 2024 13:20:46 -0800 -Subject: [PATCH] selftests/sched_ext: fix build after renames in sched_ext API - -The selftests are falining to build on current tip of bpf-next and -sched_ext [1]. This has broken BPF CI [2] after merge from upstream. - -Use appropriate function names in the selftests according to the -recent changes in the sched_ext API [3]. - -[1] -https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=fc39fb56917bb3cb53e99560ca3612a84456ada2 -[2] https://github.com/kernel-patches/bpf/actions/runs/11959327258/job/33340923745 -[3] https://lore.kernel.org/all/20241109194853.580310-1-tj@kernel.org/ - -Signed-off-by: Ihor Solodrai ---- - .../testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c | 2 +- - .../selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c | 4 ++-- - tools/testing/selftests/sched_ext/dsp_local_on.bpf.c | 2 +- - .../selftests/sched_ext/enq_select_cpu_fails.bpf.c | 2 +- - tools/testing/selftests/sched_ext/exit.bpf.c | 4 ++-- - tools/testing/selftests/sched_ext/maximal.bpf.c | 4 ++-- - tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c | 2 +- - .../selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c | 2 +- - .../testing/selftests/sched_ext/select_cpu_dispatch.bpf.c | 2 +- - .../selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c | 2 +- - .../selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c | 4 ++-- - tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c | 8 ++++---- - 12 files changed, 19 insertions(+), 19 deletions(-) - -diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c -index 37d9bf6fb745..6f4c3f5a1c5d 100644 ---- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c -+++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c -@@ -20,7 +20,7 @@ s32 BPF_STRUCT_OPS(ddsp_bogus_dsq_fail_select_cpu, struct task_struct *p, - * If we dispatch to a bogus DSQ that will fall back to the - * builtin global DSQ, we fail gracefully. - */ -- scx_bpf_dispatch_vtime(p, 0xcafef00d, SCX_SLICE_DFL, -+ scx_bpf_dsq_insert_vtime(p, 0xcafef00d, SCX_SLICE_DFL, - p->scx.dsq_vtime, 0); - return cpu; - } -diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c -index dffc97d9cdf1..e4a55027778f 100644 ---- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c -+++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c -@@ -17,8 +17,8 @@ s32 BPF_STRUCT_OPS(ddsp_vtimelocal_fail_select_cpu, struct task_struct *p, - - if (cpu >= 0) { - /* Shouldn't be allowed to vtime dispatch to a builtin DSQ. */ -- scx_bpf_dispatch_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, -- p->scx.dsq_vtime, 0); -+ scx_bpf_dsq_insert_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, -+ p->scx.dsq_vtime, 0); - return cpu; - } - -diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c -index 6a7db1502c29..6325bf76f47e 100644 ---- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c -+++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c -@@ -45,7 +45,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev) - - target = bpf_get_prandom_u32() % nr_cpus; - -- scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0); -+ scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0); - bpf_task_release(p); - } - -diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c -index 1efb50d61040..a7cf868d5e31 100644 ---- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c -+++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c -@@ -31,7 +31,7 @@ void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p, - /* Can only call from ops.select_cpu() */ - scx_bpf_select_cpu_dfl(p, 0, 0, &found); - -- scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); -+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); - } - - SEC(".struct_ops.link") -diff --git a/tools/testing/selftests/sched_ext/exit.bpf.c b/tools/testing/selftests/sched_ext/exit.bpf.c -index d75d4faf07f6..4bc36182d3ff 100644 ---- a/tools/testing/selftests/sched_ext/exit.bpf.c -+++ b/tools/testing/selftests/sched_ext/exit.bpf.c -@@ -33,7 +33,7 @@ void BPF_STRUCT_OPS(exit_enqueue, struct task_struct *p, u64 enq_flags) - if (exit_point == EXIT_ENQUEUE) - EXIT_CLEANLY(); - -- scx_bpf_dispatch(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); -+ scx_bpf_dsq_insert(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); - } - - void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) -@@ -41,7 +41,7 @@ void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) - if (exit_point == EXIT_DISPATCH) - EXIT_CLEANLY(); - -- scx_bpf_consume(DSQ_ID); -+ scx_bpf_dsq_move_to_local(DSQ_ID); - } - - void BPF_STRUCT_OPS(exit_enable, struct task_struct *p) -diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c -index 4d4cd8d966db..4c005fa71810 100644 ---- a/tools/testing/selftests/sched_ext/maximal.bpf.c -+++ b/tools/testing/selftests/sched_ext/maximal.bpf.c -@@ -20,7 +20,7 @@ s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu, - - void BPF_STRUCT_OPS(maximal_enqueue, struct task_struct *p, u64 enq_flags) - { -- scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); -+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); - } - - void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) -@@ -28,7 +28,7 @@ void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) - - void BPF_STRUCT_OPS(maximal_dispatch, s32 cpu, struct task_struct *prev) - { -- scx_bpf_consume(SCX_DSQ_GLOBAL); -+ scx_bpf_dsq_move_to_local(SCX_DSQ_GLOBAL); - } - - void BPF_STRUCT_OPS(maximal_runnable, struct task_struct *p, u64 enq_flags) -diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c -index f171ac470970..13d0f5be788d 100644 ---- a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c -+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c -@@ -30,7 +30,7 @@ void BPF_STRUCT_OPS(select_cpu_dfl_enqueue, struct task_struct *p, - } - scx_bpf_put_idle_cpumask(idle_mask); - -- scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); -+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); - } - - SEC(".struct_ops.link") -diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c -index 9efdbb7da928..815f1d5d61ac 100644 ---- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c -+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c -@@ -67,7 +67,7 @@ void BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_enqueue, struct task_struct *p, - saw_local = true; - } - -- scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, enq_flags); -+ scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, enq_flags); - } - - s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_init_task, -diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c -index 59bfc4f36167..4bb99699e920 100644 ---- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c -+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c -@@ -29,7 +29,7 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_select_cpu, struct task_struct *p, - cpu = prev_cpu; - - dispatch: -- scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, 0); -+ scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, 0); - return cpu; - } - -diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c -index 3bbd5fcdfb18..2a75de11b2cf 100644 ---- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c -+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c -@@ -18,7 +18,7 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_select_cpu, struct task_struct *p - s32 prev_cpu, u64 wake_flags) - { - /* Dispatching to a random DSQ should fail. */ -- scx_bpf_dispatch(p, 0xcafef00d, SCX_SLICE_DFL, 0); -+ scx_bpf_dsq_insert(p, 0xcafef00d, SCX_SLICE_DFL, 0); - - return prev_cpu; - } -diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c -index 0fda57fe0ecf..99d075695c97 100644 ---- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c -+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c -@@ -18,8 +18,8 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_select_cpu, struct task_struct *p - s32 prev_cpu, u64 wake_flags) - { - /* Dispatching twice in a row is disallowed. */ -- scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); -- scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); -+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); -+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); - - return prev_cpu; - } -diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c -index e6c67bcf5e6e..bfcb96cd4954 100644 ---- a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c -+++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c -@@ -2,8 +2,8 @@ - /* - * A scheduler that validates that enqueue flags are properly stored and - * applied at dispatch time when a task is directly dispatched from -- * ops.select_cpu(). We validate this by using scx_bpf_dispatch_vtime(), and -- * making the test a very basic vtime scheduler. -+ * ops.select_cpu(). We validate this by using scx_bpf_dsq_insert_vtime(), -+ * and making the test a very basic vtime scheduler. - * - * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. - * Copyright (c) 2024 David Vernet -@@ -47,13 +47,13 @@ s32 BPF_STRUCT_OPS(select_cpu_vtime_select_cpu, struct task_struct *p, - cpu = prev_cpu; - scx_bpf_test_and_clear_cpu_idle(cpu); - ddsp: -- scx_bpf_dispatch_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0); -+ scx_bpf_dsq_insert_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0); - return cpu; - } - - void BPF_STRUCT_OPS(select_cpu_vtime_dispatch, s32 cpu, struct task_struct *p) - { -- if (scx_bpf_consume(VTIME_DSQ)) -+ if (scx_bpf_dsq_move_to_local(VTIME_DSQ)) - consumed = true; - } - --- -2.47.0 - diff --git a/ci/diffs/0499-samples-bpf-fix-samples-compilation.patch b/ci/diffs/0499-samples-bpf-fix-samples-compilation.patch deleted file mode 100644 index 1f95c3c..0000000 --- a/ci/diffs/0499-samples-bpf-fix-samples-compilation.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 80a5958a52b86e39c1a1bf5f4702011c0cf6ab4f Mon Sep 17 00:00:00 2001 -From: Eduard Zingerman -Date: Mon, 2 Dec 2024 12:14:46 -0800 -Subject: [PATCH] samples/bpf: fix samples compilation - -Commit [0] breaks samples build. - -TODO: moar details here - -[0] 13b25489b6f8 ("kbuild: change working directory to external module directory with M=") - -Signed-off-by: Eduard Zingerman -Signed-off-by: Ihor Solodrai ---- - samples/bpf/Makefile | 13 +++++++------ - 1 file changed, 7 insertions(+), 6 deletions(-) - -diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile -index bcf103a4c14f..ee10dbf1b471 100644 ---- a/samples/bpf/Makefile -+++ b/samples/bpf/Makefile -@@ -146,13 +146,14 @@ ifeq ($(ARCH), x86) - BPF_EXTRA_CFLAGS += -fcf-protection - endif - --TPROGS_CFLAGS += -Wall -O2 --TPROGS_CFLAGS += -Wmissing-prototypes --TPROGS_CFLAGS += -Wstrict-prototypes --TPROGS_CFLAGS += $(call try-run,\ -+COMMON_CFLAGS += -Wall -O2 -+COMMON_CFLAGS += -Wmissing-prototypes -+COMMON_CFLAGS += -Wstrict-prototypes -+COMMON_CFLAGS += $(call try-run,\ - printf "int main() { return 0; }" |\ - $(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,) - -+TPROGS_CFLAGS += $(COMMON_CFLAGS) - TPROGS_CFLAGS += -I$(objtree)/usr/include - TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ - TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE) -@@ -229,7 +230,7 @@ clean: - - $(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) - # Fix up variables inherited from Kbuild that tools/ build system won't like -- $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ -+ $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \ - LDFLAGS="$(TPROGS_LDFLAGS)" srctree=$(BPF_SAMPLES_PATH)/../../ \ - O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ - $@ install_headers -@@ -305,7 +306,7 @@ $(obj)/$(TRACE_HELPERS): TPROGS_CFLAGS := $(TPROGS_CFLAGS) -D__must_check= - -include $(BPF_SAMPLES_PATH)/Makefile.target - - VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \ -- $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \ -+ $(abspath $(if $(objtree),$(objtree)/vmlinux)) \ - $(abspath ./vmlinux) - VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) - --- -2.47.0 - diff --git a/ci/diffs/2000-s390-fgraph-Fix-to-remove-ftrace_test_recursion_tryl.patch b/ci/diffs/2000-s390-fgraph-Fix-to-remove-ftrace_test_recursion_tryl.patch deleted file mode 100644 index 7e38b1a..0000000 --- a/ci/diffs/2000-s390-fgraph-Fix-to-remove-ftrace_test_recursion_tryl.patch +++ /dev/null @@ -1,46 +0,0 @@ -From faf291ff4beaef8dedebd166f11f815cdee257dc Mon Sep 17 00:00:00 2001 -From: "Masami Hiramatsu (Google)" -Date: Wed, 29 Jan 2025 00:29:37 +0900 -Subject: [PATCH 2000/2001] s390: fgraph: Fix to remove - ftrace_test_recursion_trylock() - -Fix to remove ftrace_test_recursion_trylock() from ftrace_graph_func() -because commit d576aec24df9 ("fgraph: Get ftrace recursion lock in -function_graph_enter") has been moved it to function_graph_enter_regs() -already. - -Reported-by: Jiri Olsa -Closes: https://lore.kernel.org/all/Z5O0shrdgeExZ2kF@krava/ -Fixes: d576aec24df9 ("fgraph: Get ftrace recursion lock in function_graph_enter") -Signed-off-by: Masami Hiramatsu (Google) -Tested-by: Jiri Olsa ---- - arch/s390/kernel/ftrace.c | 5 ----- - 1 file changed, 5 deletions(-) - -diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c -index c0b2c97efefb..63ba6306632e 100644 ---- a/arch/s390/kernel/ftrace.c -+++ b/arch/s390/kernel/ftrace.c -@@ -266,18 +266,13 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct ftrace_regs *fregs) - { - unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14]; -- int bit; - - if (unlikely(ftrace_graph_is_dead())) - return; - if (unlikely(atomic_read(¤t->tracing_graph_pause))) - return; -- bit = ftrace_test_recursion_trylock(ip, *parent); -- if (bit < 0) -- return; - if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs)) - *parent = (unsigned long)&return_to_handler; -- ftrace_test_recursion_unlock(bit); - } - - #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ --- -2.48.1 - diff --git a/ci/diffs/2001-s390-tracing-Define-ftrace_get_symaddr-for-s390.patch b/ci/diffs/2001-s390-tracing-Define-ftrace_get_symaddr-for-s390.patch deleted file mode 100644 index 6648320..0000000 --- a/ci/diffs/2001-s390-tracing-Define-ftrace_get_symaddr-for-s390.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 04fce0d606f59a62105729094013c4784492ec7b Mon Sep 17 00:00:00 2001 -From: "Masami Hiramatsu (Google)" -Date: Wed, 29 Jan 2025 00:29:48 +0900 -Subject: [PATCH 2001/2001] s390: tracing: Define ftrace_get_symaddr() for s390 - -Add ftrace_get_symaddr() for s390, which returns the symbol address -from ftrace's 'ip' parameter. - -Signed-off-by: Masami Hiramatsu (Google) ---- - arch/s390/include/asm/ftrace.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h -index a3b73a4f626e..185331e91f83 100644 ---- a/arch/s390/include/asm/ftrace.h -+++ b/arch/s390/include/asm/ftrace.h -@@ -51,6 +51,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) - { - return addr; - } -+#define ftrace_get_symaddr(fentry_ip) ((unsigned long)(fentry_ip)) - - #include - --- -2.48.1 - diff --git a/ci/diffs/2001-selftests-bpf-add-fno-strict-aliasing-to-BPF_CFLAGS.patch b/ci/diffs/2001-selftests-bpf-add-fno-strict-aliasing-to-BPF_CFLAGS.patch deleted file mode 100644 index 9b24de7..0000000 --- a/ci/diffs/2001-selftests-bpf-add-fno-strict-aliasing-to-BPF_CFLAGS.patch +++ /dev/null @@ -1,75 +0,0 @@ -From f44275e7155dc310d36516fc25be503da099781c Mon Sep 17 00:00:00 2001 -From: Ihor Solodrai -Date: Mon, 6 Jan 2025 20:17:31 +0000 -Subject: [PATCH] selftests/bpf: add -fno-strict-aliasing to BPF_CFLAGS - -Following the discussion at [1], set -fno-strict-aliasing flag for all -BPF object build rules. Remove now unnecessary -CFLAGS variables. - -[1] https://lore.kernel.org/bpf/20250106185447.951609-1-ihor.solodrai@pm.me/ - -CC: Jose E. Marchesi -Signed-off-by: Ihor Solodrai -Acked-by: Eduard Zingerman -Link: https://lore.kernel.org/r/20250106201728.1219791-1-ihor.solodrai@pm.me -Signed-off-by: Alexei Starovoitov ---- - tools/testing/selftests/bpf/Makefile | 28 +--------------------------- - 1 file changed, 1 insertion(+), 27 deletions(-) - -diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile -index eb4d21651aa7..d5be2f94deef 100644 ---- a/tools/testing/selftests/bpf/Makefile -+++ b/tools/testing/selftests/bpf/Makefile -@@ -54,21 +54,6 @@ PCAP_LIBS := $(shell $(PKG_CONFIG) --libs libpcap 2>/dev/null) - LDLIBS += $(PCAP_LIBS) - CFLAGS += $(PCAP_CFLAGS) - --# The following tests perform type punning and they may break strict --# aliasing rules, which are exploited by both GCC and clang by default --# while optimizing. This can lead to broken programs. --progs/bind4_prog.c-CFLAGS := -fno-strict-aliasing --progs/bind6_prog.c-CFLAGS := -fno-strict-aliasing --progs/dynptr_fail.c-CFLAGS := -fno-strict-aliasing --progs/linked_list_fail.c-CFLAGS := -fno-strict-aliasing --progs/map_kptr_fail.c-CFLAGS := -fno-strict-aliasing --progs/syscall.c-CFLAGS := -fno-strict-aliasing --progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing --progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing --progs/timer_crash.c-CFLAGS := -fno-strict-aliasing --progs/test_global_func9.c-CFLAGS := -fno-strict-aliasing --progs/verifier_nocsr.c-CFLAGS := -fno-strict-aliasing -- - # Some utility functions use LLVM libraries - jit_disasm_helpers.c-CFLAGS = $(LLVM_CFLAGS) - -@@ -103,18 +88,6 @@ progs/btf_dump_test_case_packing.c-bpf_gcc-CFLAGS := -Wno-error - progs/btf_dump_test_case_padding.c-bpf_gcc-CFLAGS := -Wno-error - progs/btf_dump_test_case_syntax.c-bpf_gcc-CFLAGS := -Wno-error - --# The following tests do type-punning, via the __imm_insn macro, from --# `struct bpf_insn' to long and then uses the value. This triggers an --# "is used uninitialized" warning in GCC due to strict-aliasing --# rules. --progs/verifier_ref_tracking.c-bpf_gcc-CFLAGS := -fno-strict-aliasing --progs/verifier_unpriv.c-bpf_gcc-CFLAGS := -fno-strict-aliasing --progs/verifier_cgroup_storage.c-bpf_gcc-CFLAGS := -fno-strict-aliasing --progs/verifier_ld_ind.c-bpf_gcc-CFLAGS := -fno-strict-aliasing --progs/verifier_map_ret_val.c-bpf_gcc-CFLAGS := -fno-strict-aliasing --progs/verifier_spill_fill.c-bpf_gcc-CFLAGS := -fno-strict-aliasing --progs/verifier_subprog_precision.c-bpf_gcc-CFLAGS := -fno-strict-aliasing --progs/verifier_uninit.c-bpf_gcc-CFLAGS := -fno-strict-aliasing - endif - - ifneq ($(CLANG_CPUV4),) -@@ -474,6 +447,7 @@ CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) - BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ - -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ - -I$(abspath $(OUTPUT)/../usr/include) \ -+ -fno-strict-aliasing \ - -Wno-compare-distinct-pointer-types - # TODO: enable me -Wsign-compare - --- -2.47.1 - diff --git a/ci/diffs/2002-selftests-bpf-add-std-gnu11-to-BPF_CFLAGS-and-CFLAGS.patch b/ci/diffs/2002-selftests-bpf-add-std-gnu11-to-BPF_CFLAGS-and-CFLAGS.patch deleted file mode 100644 index 127b264..0000000 --- a/ci/diffs/2002-selftests-bpf-add-std-gnu11-to-BPF_CFLAGS-and-CFLAGS.patch +++ /dev/null @@ -1,63 +0,0 @@ -From bab18c7db44d3aa6c84450095451580922359c7a Mon Sep 17 00:00:00 2001 -From: Ihor Solodrai -Date: Tue, 7 Jan 2025 23:58:18 +0000 -Subject: [PATCH] selftests/bpf: add -std=gnu11 to BPF_CFLAGS and CFLAGS - -Latest versions of GCC BPF use C23 standard by default. This causes -compilation errors in vmlinux.h due to bool types declarations. - -Add -std=gnu11 to BPF_CFLAGS and CFLAGS. This aligns with the version -of the standard used when building the kernel currently [1]. - -For more details see the discussions at [2] and [3]. - -[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Makefile#n465 -[2] https://lore.kernel.org/bpf/EYcXjcKDCJY7Yb0GGtAAb7nLKPEvrgWdvWpuNzXm2qi6rYMZDixKv5KwfVVMBq17V55xyC-A1wIjrqG3aw-Imqudo9q9X7D7nLU2gWgbN0w=@pm.me/ -[3] https://lore.kernel.org/bpf/20250106202715.1232864-1-ihor.solodrai@pm.me/ - -CC: Jose E. Marchesi -Signed-off-by: Ihor Solodrai -Link: https://lore.kernel.org/r/20250107235813.2964472-1-ihor.solodrai@pm.me -Signed-off-by: Alexei Starovoitov ---- - tools/testing/selftests/bpf/Makefile | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile -index d5be2f94deef..ea9cee5de0f8 100644 ---- a/tools/testing/selftests/bpf/Makefile -+++ b/tools/testing/selftests/bpf/Makefile -@@ -41,7 +41,7 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) - srctree := $(patsubst %/,%,$(dir $(srctree))) - endif - --CFLAGS += -g $(OPT_FLAGS) -rdynamic \ -+CFLAGS += -g $(OPT_FLAGS) -rdynamic -std=gnu11 \ - -Wall -Werror -fno-omit-frame-pointer \ - $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \ - -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -@@ -447,6 +447,7 @@ CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) - BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ - -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ - -I$(abspath $(OUTPUT)/../usr/include) \ -+ -std=gnu11 \ - -fno-strict-aliasing \ - -Wno-compare-distinct-pointer-types - # TODO: enable me -Wsign-compare -@@ -787,9 +788,12 @@ $(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp - $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ - - # Make sure we are able to include and link libbpf against c++. -+CXXFLAGS += $(CFLAGS) -+CXXFLAGS := $(subst -D_GNU_SOURCE=,,$(CXXFLAGS)) -+CXXFLAGS := $(subst -std=gnu11,-std=gnu++11,$(CXXFLAGS)) - $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) - $(call msg,CXX,,$@) -- $(Q)$(CXX) $(subst -D_GNU_SOURCE=,,$(CFLAGS)) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ -+ $(Q)$(CXX) $(CXXFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ - - # Benchmark runner - $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) --- -2.47.1 - diff --git a/ci/diffs/8888-Revert-netfs-Change-the-read-result-collector-to-onl.patch b/ci/diffs/8888-Revert-netfs-Change-the-read-result-collector-to-onl.patch deleted file mode 100644 index 049685b..0000000 --- a/ci/diffs/8888-Revert-netfs-Change-the-read-result-collector-to-onl.patch +++ /dev/null @@ -1,2542 +0,0 @@ -From c98e72f3806a0d1a1d5aaed9638137b96608b567 Mon Sep 17 00:00:00 2001 -From: Ihor Solodrai -Date: Fri, 24 Jan 2025 07:31:33 -0800 -Subject: [PATCH] Revert "netfs: Change the read result collector to only use - one work item" - -This reverts commit e2d46f2ec332533816417b60933954173f602121. ---- - fs/9p/vfs_addr.c | 3 +- - fs/afs/dir.c | 8 +- - fs/ceph/addr.c | 9 +- - fs/netfs/buffered_read.c | 160 ++++---- - fs/netfs/direct_read.c | 60 ++- - fs/netfs/internal.h | 21 +- - fs/netfs/main.c | 2 +- - fs/netfs/objects.c | 34 +- - fs/netfs/read_collect.c | 716 +++++++++++++++-------------------- - fs/netfs/read_pgpriv2.c | 203 ++++++---- - fs/netfs/read_retry.c | 207 +++++----- - fs/netfs/read_single.c | 37 +- - fs/netfs/write_collect.c | 4 +- - fs/netfs/write_issue.c | 2 +- - fs/netfs/write_retry.c | 14 +- - fs/smb/client/cifssmb.c | 2 - - fs/smb/client/smb2pdu.c | 5 +- - include/linux/netfs.h | 16 +- - include/trace/events/netfs.h | 79 +++- - 19 files changed, 763 insertions(+), 819 deletions(-) - -diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c -index 32619d146cbc..b38be6ff90bc 100644 ---- a/fs/9p/vfs_addr.c -+++ b/fs/9p/vfs_addr.c -@@ -81,7 +81,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); - if (pos + total >= i_size_read(rreq->inode)) - __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); -- if (!err && total) { -+ -+ if (!err) { - subreq->transferred += total; - __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); - } -diff --git a/fs/afs/dir.c b/fs/afs/dir.c -index a843c36fc471..374f82d21902 100644 ---- a/fs/afs/dir.c -+++ b/fs/afs/dir.c -@@ -323,10 +323,8 @@ ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file) - * haven't read it yet. - */ - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && -- test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) { -- ret = i_size; -+ test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) - goto valid; -- } - - up_read(&dvnode->validate_lock); - if (down_write_killable(&dvnode->validate_lock) < 0) -@@ -346,13 +344,11 @@ ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file) - - set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags); - set_bit(AFS_VNODE_DIR_READ, &dvnode->flags); -- } else { -- ret = i_size; - } - - downgrade_write(&dvnode->validate_lock); - valid: -- return ret; -+ return i_size; - - error_unlock: - up_write(&dvnode->validate_lock); -diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c -index f5224a566b69..4deb38fa470e 100644 ---- a/fs/ceph/addr.c -+++ b/fs/ceph/addr.c -@@ -223,13 +223,10 @@ static void finish_netfs_read(struct ceph_osd_request *req) - subreq->len, i_size_read(req->r_inode)); - - /* no object means success but no data */ -- if (err == -ENOENT) { -- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); -- __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); -+ if (err == -ENOENT) - err = 0; -- } else if (err == -EBLOCKLISTED) { -+ else if (err == -EBLOCKLISTED) - fsc->blocklisted = true; -- } - - if (err >= 0) { - if (sparse && err > 0) -@@ -245,8 +242,6 @@ static void finish_netfs_read(struct ceph_osd_request *req) - if (err > subreq->len) - err = subreq->len; - } -- if (err > 0) -- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); - } - - if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) { -diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c -index f761d44b3436..5d7aa1f580f1 100644 ---- a/fs/netfs/buffered_read.c -+++ b/fs/netfs/buffered_read.c -@@ -121,6 +121,12 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq) - - subreq->io_iter = rreq->buffer.iter; - -+ if (iov_iter_is_folioq(&subreq->io_iter)) { -+ subreq->curr_folioq = (struct folio_queue *)subreq->io_iter.folioq; -+ subreq->curr_folioq_slot = subreq->io_iter.folioq_slot; -+ subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot]; -+ } -+ - iov_iter_truncate(&subreq->io_iter, subreq->len); - rolling_buffer_advance(&rreq->buffer, subreq->len); - return subreq->len; -@@ -141,6 +147,19 @@ static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rr - - } - -+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async) -+{ -+ struct netfs_io_subrequest *subreq = priv; -+ -+ if (transferred_or_error > 0) { -+ subreq->transferred += transferred_or_error; -+ subreq->error = 0; -+ } else { -+ subreq->error = transferred_or_error; -+ } -+ schedule_work(&subreq->work); -+} -+ - /* - * Issue a read against the cache. - * - Eats the caller's ref on subreq. -@@ -155,47 +174,6 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq, - netfs_cache_read_terminated, subreq); - } - --static void netfs_issue_read(struct netfs_io_request *rreq, -- struct netfs_io_subrequest *subreq) --{ -- struct netfs_io_stream *stream = &rreq->io_streams[0]; -- -- __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); -- -- /* We add to the end of the list whilst the collector may be walking -- * the list. The collector only goes nextwards and uses the lock to -- * remove entries off of the front. -- */ -- spin_lock(&rreq->lock); -- list_add_tail(&subreq->rreq_link, &stream->subrequests); -- if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { -- stream->front = subreq; -- if (!stream->active) { -- stream->collected_to = stream->front->start; -- /* Store list pointers before active flag */ -- smp_store_release(&stream->active, true); -- } -- } -- -- spin_unlock(&rreq->lock); -- -- switch (subreq->source) { -- case NETFS_DOWNLOAD_FROM_SERVER: -- rreq->netfs_ops->issue_read(subreq); -- break; -- case NETFS_READ_FROM_CACHE: -- netfs_read_cache_to_pagecache(rreq, subreq); -- break; -- default: -- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); -- subreq->error = 0; -- iov_iter_zero(subreq->len, &subreq->io_iter); -- subreq->transferred = subreq->len; -- netfs_read_subreq_terminated(subreq); -- break; -- } --} -- - /* - * Perform a read to the pagecache from a series of sources of different types, - * slicing up the region to be read according to available cache blocks and -@@ -208,6 +186,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) - ssize_t size = rreq->len; - int ret = 0; - -+ atomic_inc(&rreq->nr_outstanding); -+ - do { - struct netfs_io_subrequest *subreq; - enum netfs_io_source source = NETFS_SOURCE_UNKNOWN; -@@ -222,6 +202,14 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) - subreq->start = start; - subreq->len = size; - -+ atomic_inc(&rreq->nr_outstanding); -+ spin_lock(&rreq->lock); -+ list_add_tail(&subreq->rreq_link, &rreq->subrequests); -+ subreq->prev_donated = rreq->prev_donated; -+ rreq->prev_donated = 0; -+ trace_netfs_sreq(subreq, netfs_sreq_trace_added); -+ spin_unlock(&rreq->lock); -+ - source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size); - subreq->source = source; - if (source == NETFS_DOWNLOAD_FROM_SERVER) { -@@ -249,18 +237,17 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) - netfs_stat(&netfs_n_rh_download); - if (rreq->netfs_ops->prepare_read) { - ret = rreq->netfs_ops->prepare_read(subreq); -- if (ret < 0) { -- subreq->error = ret; -- /* Not queued - release both refs. */ -- netfs_put_subrequest(subreq, false, -- netfs_sreq_trace_put_cancel); -- netfs_put_subrequest(subreq, false, -- netfs_sreq_trace_put_cancel); -- break; -- } -+ if (ret < 0) -+ goto prep_failed; - trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); - } -- goto issue; -+ -+ slice = netfs_prepare_read_iterator(subreq); -+ if (slice < 0) -+ goto prep_iter_failed; -+ -+ rreq->netfs_ops->issue_read(subreq); -+ goto done; - } - - fill_with_zeroes: -@@ -268,50 +255,67 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) - subreq->source = NETFS_FILL_WITH_ZEROES; - trace_netfs_sreq(subreq, netfs_sreq_trace_submit); - netfs_stat(&netfs_n_rh_zero); -- goto issue; -+ slice = netfs_prepare_read_iterator(subreq); -+ if (slice < 0) -+ goto prep_iter_failed; -+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); -+ subreq->error = 0; -+ netfs_read_subreq_terminated(subreq); -+ goto done; - } - - if (source == NETFS_READ_FROM_CACHE) { - trace_netfs_sreq(subreq, netfs_sreq_trace_submit); -- goto issue; -+ slice = netfs_prepare_read_iterator(subreq); -+ if (slice < 0) -+ goto prep_iter_failed; -+ netfs_read_cache_to_pagecache(rreq, subreq); -+ goto done; - } - - pr_err("Unexpected read source %u\n", source); - WARN_ON_ONCE(1); - break; - -- issue: -- slice = netfs_prepare_read_iterator(subreq); -- if (slice < 0) { -- ret = slice; -- subreq->error = ret; -- trace_netfs_sreq(subreq, netfs_sreq_trace_cancel); -- /* Not queued - release both refs. */ -- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); -- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); -- break; -- } -+ prep_iter_failed: -+ ret = slice; -+ prep_failed: -+ subreq->error = ret; -+ atomic_dec(&rreq->nr_outstanding); -+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); -+ break; -+ -+ done: - size -= slice; - start += slice; -- if (size <= 0) { -- smp_wmb(); /* Write lists before ALL_QUEUED. */ -- set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); -- } -- -- netfs_issue_read(rreq, subreq); - cond_resched(); - } while (size > 0); - -- if (unlikely(size > 0)) { -- smp_wmb(); /* Write lists before ALL_QUEUED. */ -- set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); -- netfs_wake_read_collector(rreq); -- } -+ if (atomic_dec_and_test(&rreq->nr_outstanding)) -+ netfs_rreq_terminated(rreq); - - /* Defer error return as we may need to wait for outstanding I/O. */ - cmpxchg(&rreq->error, 0, ret); - } - -+/* -+ * Wait for the read operation to complete, successfully or otherwise. -+ */ -+static int netfs_wait_for_read(struct netfs_io_request *rreq) -+{ -+ int ret; -+ -+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); -+ wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE); -+ ret = rreq->error; -+ if (ret == 0 && rreq->submitted < rreq->len) { -+ trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); -+ ret = -EIO; -+ } -+ -+ return ret; -+} -+ - /** - * netfs_readahead - Helper to manage a read request - * @ractl: The description of the readahead request -@@ -340,8 +344,6 @@ void netfs_readahead(struct readahead_control *ractl) - if (IS_ERR(rreq)) - return; - -- __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags); -- - ret = netfs_begin_cache_read(rreq, ictx); - if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) - goto cleanup_free; -@@ -458,7 +460,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio) - folio_put(sink); - - ret = netfs_wait_for_read(rreq); -- if (ret >= 0) { -+ if (ret == 0) { - flush_dcache_folio(folio); - folio_mark_uptodate(folio); - } -@@ -746,7 +748,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, - netfs_read_to_pagecache(rreq); - ret = netfs_wait_for_read(rreq); - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); -- return ret < 0 ? ret : 0; -+ return ret; - - error_put: - netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); -diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c -index 0bf3c2f5a710..1a20cc3979c7 100644 ---- a/fs/netfs/direct_read.c -+++ b/fs/netfs/direct_read.c -@@ -47,11 +47,12 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq) - */ - static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) - { -- struct netfs_io_stream *stream = &rreq->io_streams[0]; - unsigned long long start = rreq->start; - ssize_t size = rreq->len; - int ret = 0; - -+ atomic_set(&rreq->nr_outstanding, 1); -+ - do { - struct netfs_io_subrequest *subreq; - ssize_t slice; -@@ -66,18 +67,11 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) - subreq->start = start; - subreq->len = size; - -- __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); -- -+ atomic_inc(&rreq->nr_outstanding); - spin_lock(&rreq->lock); -- list_add_tail(&subreq->rreq_link, &stream->subrequests); -- if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { -- stream->front = subreq; -- if (!stream->active) { -- stream->collected_to = stream->front->start; -- /* Store list pointers before active flag */ -- smp_store_release(&stream->active, true); -- } -- } -+ list_add_tail(&subreq->rreq_link, &rreq->subrequests); -+ subreq->prev_donated = rreq->prev_donated; -+ rreq->prev_donated = 0; - trace_netfs_sreq(subreq, netfs_sreq_trace_added); - spin_unlock(&rreq->lock); - -@@ -85,6 +79,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) - if (rreq->netfs_ops->prepare_read) { - ret = rreq->netfs_ops->prepare_read(subreq); - if (ret < 0) { -+ atomic_dec(&rreq->nr_outstanding); - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); - break; - } -@@ -92,32 +87,20 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) - - netfs_prepare_dio_read_iterator(subreq); - slice = subreq->len; -+ rreq->netfs_ops->issue_read(subreq); -+ - size -= slice; - start += slice; - rreq->submitted += slice; -- if (size <= 0) { -- smp_wmb(); /* Write lists before ALL_QUEUED. */ -- set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); -- } -- -- rreq->netfs_ops->issue_read(subreq); - -- if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) -- netfs_wait_for_pause(rreq); -- if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) -- break; - if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && - test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags)) - break; - cond_resched(); - } while (size > 0); - -- if (unlikely(size > 0)) { -- smp_wmb(); /* Write lists before ALL_QUEUED. */ -- set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); -- netfs_wake_read_collector(rreq); -- } -- -+ if (atomic_dec_and_test(&rreq->nr_outstanding)) -+ netfs_rreq_terminated(rreq); - return ret; - } - -@@ -150,10 +133,21 @@ static int netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync) - goto out; - } - -- if (sync) -- ret = netfs_wait_for_read(rreq); -- else -+ if (sync) { -+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); -+ wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, -+ TASK_UNINTERRUPTIBLE); -+ -+ ret = rreq->error; -+ if (ret == 0 && rreq->submitted < rreq->len && -+ rreq->origin != NETFS_DIO_READ) { -+ trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); -+ ret = -EIO; -+ } -+ } else { - ret = -EIOCBQUEUED; -+ } -+ - out: - _leave(" = %d", ret); - return ret; -@@ -221,10 +215,8 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i - - // TODO: Set up bounce buffer if needed - -- if (!sync) { -+ if (!sync) - rreq->iocb = iocb; -- __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags); -- } - - ret = netfs_unbuffered_read(rreq, sync); - if (ret < 0) -diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h -index eb76f98c894b..e236f752af88 100644 ---- a/fs/netfs/internal.h -+++ b/fs/netfs/internal.h -@@ -82,27 +82,20 @@ static inline void netfs_see_request(struct netfs_io_request *rreq, - trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), what); - } - --static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq, -- enum netfs_sreq_ref_trace what) --{ -- trace_netfs_sreq_ref(subreq->rreq->debug_id, subreq->debug_index, -- refcount_read(&subreq->ref), what); --} -- - /* - * read_collect.c - */ --void netfs_read_collection_worker(struct work_struct *work); --void netfs_wake_read_collector(struct netfs_io_request *rreq); --void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async); --ssize_t netfs_wait_for_read(struct netfs_io_request *rreq); --void netfs_wait_for_pause(struct netfs_io_request *rreq); -+void netfs_read_termination_worker(struct work_struct *work); -+void netfs_rreq_terminated(struct netfs_io_request *rreq); - - /* - * read_pgpriv2.c - */ --void netfs_pgpriv2_copy_to_cache(struct netfs_io_request *rreq, struct folio *folio); --void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq); -+void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq, -+ struct netfs_io_request *rreq, -+ struct folio_queue *folioq, -+ int slot); -+void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq); - bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq); - - /* -diff --git a/fs/netfs/main.c b/fs/netfs/main.c -index 4e3e62040831..16760695e667 100644 ---- a/fs/netfs/main.c -+++ b/fs/netfs/main.c -@@ -71,7 +71,7 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v) - refcount_read(&rreq->ref), - rreq->flags, - rreq->error, -- 0, -+ atomic_read(&rreq->nr_outstanding), - rreq->start, rreq->submitted, rreq->len); - seq_putc(m, '\n'); - return 0; -diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c -index dc6b41ef18b0..dde4a679d9e2 100644 ---- a/fs/netfs/objects.c -+++ b/fs/netfs/objects.c -@@ -48,7 +48,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, - spin_lock_init(&rreq->lock); - INIT_LIST_HEAD(&rreq->io_streams[0].subrequests); - INIT_LIST_HEAD(&rreq->io_streams[1].subrequests); -- init_waitqueue_head(&rreq->waitq); -+ INIT_LIST_HEAD(&rreq->subrequests); - refcount_set(&rreq->ref, 1); - - if (origin == NETFS_READAHEAD || -@@ -56,12 +56,10 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, - origin == NETFS_READ_GAPS || - origin == NETFS_READ_SINGLE || - origin == NETFS_READ_FOR_WRITE || -- origin == NETFS_DIO_READ) { -- INIT_WORK(&rreq->work, netfs_read_collection_worker); -- rreq->io_streams[0].avail = true; -- } else { -+ origin == NETFS_DIO_READ) -+ INIT_WORK(&rreq->work, NULL); -+ else - INIT_WORK(&rreq->work, netfs_write_collection_worker); -- } - - __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); - if (file && file->f_flags & O_NONBLOCK) -@@ -95,6 +93,14 @@ void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async) - struct netfs_io_stream *stream; - int s; - -+ while (!list_empty(&rreq->subrequests)) { -+ subreq = list_first_entry(&rreq->subrequests, -+ struct netfs_io_subrequest, rreq_link); -+ list_del(&subreq->rreq_link); -+ netfs_put_subrequest(subreq, was_async, -+ netfs_sreq_trace_put_clear); -+ } -+ - for (s = 0; s < ARRAY_SIZE(rreq->io_streams); s++) { - stream = &rreq->io_streams[s]; - while (!list_empty(&stream->subrequests)) { -@@ -186,7 +192,21 @@ struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq - } - - memset(subreq, 0, kmem_cache_size(cache)); -- INIT_WORK(&subreq->work, NULL); -+ -+ switch (rreq->origin) { -+ case NETFS_READAHEAD: -+ case NETFS_READPAGE: -+ case NETFS_READ_GAPS: -+ case NETFS_READ_SINGLE: -+ case NETFS_READ_FOR_WRITE: -+ case NETFS_DIO_READ: -+ INIT_WORK(&subreq->work, netfs_read_subreq_termination_worker); -+ break; -+ default: -+ INIT_WORK(&subreq->work, NULL); -+ break; -+ } -+ - INIT_LIST_HEAD(&subreq->rreq_link); - refcount_set(&subreq->ref, 2); - subreq->rreq = rreq; -diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c -index f65affa5a9e4..2e9291ab1d62 100644 ---- a/fs/netfs/read_collect.c -+++ b/fs/netfs/read_collect.c -@@ -14,14 +14,6 @@ - #include - #include "internal.h" - --/* Notes made in the collector */ --#define HIT_PENDING 0x01 /* A front op was still pending */ --#define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */ --#define BUFFERED 0x08 /* The pagecache needs cleaning up */ --#define NEED_RETRY 0x10 /* A front op requests retrying */ --#define COPY_TO_CACHE 0x40 /* Need to copy subrequest to cache */ --#define ABANDON_SREQ 0x80 /* Need to abandon untransferred part of subrequest */ -- - /* - * Clear the unread part of an I/O request. - */ -@@ -39,18 +31,14 @@ static void netfs_clear_unread(struct netfs_io_subrequest *subreq) - * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it - * dirty and let writeback handle it. - */ --static void netfs_unlock_read_folio(struct netfs_io_request *rreq, -+static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq, -+ struct netfs_io_request *rreq, - struct folio_queue *folioq, - int slot) - { - struct netfs_folio *finfo; - struct folio *folio = folioq_folio(folioq, slot); - -- if (unlikely(folio_pos(folio) < rreq->abandon_to)) { -- trace_netfs_folio(folio, netfs_folio_trace_abandon); -- goto just_unlock; -- } -- - flush_dcache_folio(folio); - folio_mark_uptodate(folio); - -@@ -65,7 +53,7 @@ static void netfs_unlock_read_folio(struct netfs_io_request *rreq, - kfree(finfo); - } - -- if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) { -+ if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { - if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) { - trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); - folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE); -@@ -78,11 +66,12 @@ static void netfs_unlock_read_folio(struct netfs_io_request *rreq, - folioq_clear(folioq, slot); - } else { - // TODO: Use of PG_private_2 is deprecated. -- if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) -- netfs_pgpriv2_copy_to_cache(rreq, folio); -+ if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) -+ netfs_pgpriv2_mark_copy_to_cache(subreq, rreq, folioq, slot); -+ else -+ folioq_clear(folioq, slot); - } - --just_unlock: - if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { - if (folio->index == rreq->no_unlock_folio && - test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) { -@@ -92,249 +81,241 @@ static void netfs_unlock_read_folio(struct netfs_io_request *rreq, - folio_unlock(folio); - } - } -- -- folioq_clear(folioq, slot); - } - - /* -- * Unlock any folios we've finished with. -+ * Unlock any folios that are now completely read. Returns true if the -+ * subrequest is removed from the list. - */ --static void netfs_read_unlock_folios(struct netfs_io_request *rreq, -- unsigned int *notes) -+static bool netfs_consume_read_data(struct netfs_io_subrequest *subreq) - { -- struct folio_queue *folioq = rreq->buffer.tail; -- unsigned long long collected_to = rreq->collected_to; -- unsigned int slot = rreq->buffer.first_tail_slot; -- -- if (rreq->cleaned_to >= rreq->collected_to) -- return; -- -- // TODO: Begin decryption -- -- if (slot >= folioq_nr_slots(folioq)) { -- folioq = rolling_buffer_delete_spent(&rreq->buffer); -- if (!folioq) { -- rreq->front_folio_order = 0; -- return; -+ struct netfs_io_subrequest *prev, *next; -+ struct netfs_io_request *rreq = subreq->rreq; -+ struct folio_queue *folioq = subreq->curr_folioq; -+ size_t avail, prev_donated, next_donated, fsize, part, excess; -+ loff_t fpos, start; -+ loff_t fend; -+ int slot = subreq->curr_folioq_slot; -+ -+ if (WARN(subreq->transferred > subreq->len, -+ "Subreq overread: R%x[%x] %zu > %zu", -+ rreq->debug_id, subreq->debug_index, -+ subreq->transferred, subreq->len)) -+ subreq->transferred = subreq->len; -+ -+ trace_netfs_folioq(folioq, netfs_trace_folioq_read_progress); -+next_folio: -+ fsize = PAGE_SIZE << subreq->curr_folio_order; -+ fpos = round_down(subreq->start + subreq->consumed, fsize); -+ fend = fpos + fsize; -+ -+ if (WARN_ON_ONCE(!folioq) || -+ WARN_ON_ONCE(!folioq_folio(folioq, slot)) || -+ WARN_ON_ONCE(folioq_folio(folioq, slot)->index != fpos / PAGE_SIZE)) { -+ pr_err("R=%08x[%x] s=%llx-%llx ctl=%zx/%zx/%zx sl=%u\n", -+ rreq->debug_id, subreq->debug_index, -+ subreq->start, subreq->start + subreq->transferred - 1, -+ subreq->consumed, subreq->transferred, subreq->len, -+ slot); -+ if (folioq) { -+ struct folio *folio = folioq_folio(folioq, slot); -+ -+ pr_err("folioq: fq=%x orders=%02x%02x%02x%02x %px\n", -+ folioq->debug_id, -+ folioq->orders[0], folioq->orders[1], -+ folioq->orders[2], folioq->orders[3], -+ folioq); -+ if (folio) -+ pr_err("folio: %llx-%llx ix=%llx o=%u qo=%u\n", -+ fpos, fend - 1, folio_pos(folio), folio_order(folio), -+ folioq_folio_order(folioq, slot)); - } -- slot = 0; - } - -- for (;;) { -- struct folio *folio; -- unsigned long long fpos, fend; -- unsigned int order; -- size_t fsize; -- -- if (*notes & COPY_TO_CACHE) -- set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); -- -- folio = folioq_folio(folioq, slot); -- if (WARN_ONCE(!folio_test_locked(folio), -- "R=%08x: folio %lx is not locked\n", -- rreq->debug_id, folio->index)) -- trace_netfs_folio(folio, netfs_folio_trace_not_locked); -- -- order = folioq_folio_order(folioq, slot); -- rreq->front_folio_order = order; -- fsize = PAGE_SIZE << order; -- fpos = folio_pos(folio); -- fend = umin(fpos + fsize, rreq->i_size); -- -- trace_netfs_collect_folio(rreq, folio, fend, collected_to); -+donation_changed: -+ /* Try to consume the current folio if we've hit or passed the end of -+ * it. There's a possibility that this subreq doesn't start at the -+ * beginning of the folio, in which case we need to donate to/from the -+ * preceding subreq. -+ * -+ * We also need to include any potential donation back from the -+ * following subreq. -+ */ -+ prev_donated = READ_ONCE(subreq->prev_donated); -+ next_donated = READ_ONCE(subreq->next_donated); -+ if (prev_donated || next_donated) { -+ spin_lock(&rreq->lock); -+ prev_donated = subreq->prev_donated; -+ next_donated = subreq->next_donated; -+ subreq->start -= prev_donated; -+ subreq->len += prev_donated; -+ subreq->transferred += prev_donated; -+ prev_donated = subreq->prev_donated = 0; -+ if (subreq->transferred == subreq->len) { -+ subreq->len += next_donated; -+ subreq->transferred += next_donated; -+ next_donated = subreq->next_donated = 0; -+ } -+ trace_netfs_sreq(subreq, netfs_sreq_trace_add_donations); -+ spin_unlock(&rreq->lock); -+ } - -- /* Unlock any folio we've transferred all of. */ -- if (collected_to < fend) -- break; -+ avail = subreq->transferred; -+ if (avail == subreq->len) -+ avail += next_donated; -+ start = subreq->start; -+ if (subreq->consumed == 0) { -+ start -= prev_donated; -+ avail += prev_donated; -+ } else { -+ start += subreq->consumed; -+ avail -= subreq->consumed; -+ } -+ part = umin(avail, fsize); -+ -+ trace_netfs_progress(subreq, start, avail, part); -+ -+ if (start + avail >= fend) { -+ if (fpos == start) { -+ /* Flush, unlock and mark for caching any folio we've just read. */ -+ subreq->consumed = fend - subreq->start; -+ netfs_unlock_read_folio(subreq, rreq, folioq, slot); -+ folioq_mark2(folioq, slot); -+ if (subreq->consumed >= subreq->len) -+ goto remove_subreq; -+ } else if (fpos < start) { -+ excess = fend - subreq->start; -+ -+ spin_lock(&rreq->lock); -+ /* If we complete first on a folio split with the -+ * preceding subreq, donate to that subreq - otherwise -+ * we get the responsibility. -+ */ -+ if (subreq->prev_donated != prev_donated) { -+ spin_unlock(&rreq->lock); -+ goto donation_changed; -+ } - -- netfs_unlock_read_folio(rreq, folioq, slot); -- WRITE_ONCE(rreq->cleaned_to, fpos + fsize); -- *notes |= MADE_PROGRESS; -+ if (list_is_first(&subreq->rreq_link, &rreq->subrequests)) { -+ spin_unlock(&rreq->lock); -+ pr_err("Can't donate prior to front\n"); -+ goto bad; -+ } - -- clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); -+ prev = list_prev_entry(subreq, rreq_link); -+ WRITE_ONCE(prev->next_donated, prev->next_donated + excess); -+ subreq->start += excess; -+ subreq->len -= excess; -+ subreq->transferred -= excess; -+ trace_netfs_donate(rreq, subreq, prev, excess, -+ netfs_trace_donate_tail_to_prev); -+ trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev); -+ -+ if (subreq->consumed >= subreq->len) -+ goto remove_subreq_locked; -+ spin_unlock(&rreq->lock); -+ } else { -+ pr_err("fpos > start\n"); -+ goto bad; -+ } - -- /* Clean up the head folioq. If we clear an entire folioq, then -- * we can get rid of it provided it's not also the tail folioq -- * being filled by the issuer. -- */ -- folioq_clear(folioq, slot); -+ /* Advance the rolling buffer to the next folio. */ - slot++; - if (slot >= folioq_nr_slots(folioq)) { -- folioq = rolling_buffer_delete_spent(&rreq->buffer); -- if (!folioq) -- goto done; - slot = 0; -+ folioq = folioq->next; -+ subreq->curr_folioq = folioq; - trace_netfs_folioq(folioq, netfs_trace_folioq_read_progress); - } -- -- if (fpos + fsize >= collected_to) -- break; -+ subreq->curr_folioq_slot = slot; -+ if (folioq && folioq_folio(folioq, slot)) -+ subreq->curr_folio_order = folioq->orders[slot]; -+ cond_resched(); -+ goto next_folio; - } - -- rreq->buffer.tail = folioq; --done: -- rreq->buffer.first_tail_slot = slot; --} -+ /* Deal with partial progress. */ -+ if (subreq->transferred < subreq->len) -+ return false; - --/* -- * Collect and assess the results of various read subrequests. We may need to -- * retry some of the results. -- * -- * Note that we have a sequence of subrequests, which may be drawing on -- * different sources and may or may not be the same size or starting position -- * and may not even correspond in boundary alignment. -- */ --static void netfs_collect_read_results(struct netfs_io_request *rreq) --{ -- struct netfs_io_subrequest *front, *remove; -- struct netfs_io_stream *stream = &rreq->io_streams[0]; -- unsigned int notes; -- -- _enter("%llx-%llx", rreq->start, rreq->start + rreq->len); -- trace_netfs_rreq(rreq, netfs_rreq_trace_collect); -- trace_netfs_collect(rreq); -- --reassess: -- if (rreq->origin == NETFS_READAHEAD || -- rreq->origin == NETFS_READPAGE || -- rreq->origin == NETFS_READ_FOR_WRITE) -- notes = BUFFERED; -- else -- notes = 0; -- -- /* Remove completed subrequests from the front of the stream and -- * advance the completion point. We stop when we hit something that's -- * in progress. The issuer thread may be adding stuff to the tail -- * whilst we're doing this. -+ /* Donate the remaining downloaded data to one of the neighbouring -+ * subrequests. Note that we may race with them doing the same thing. - */ -- front = READ_ONCE(stream->front); -- while (front) { -- size_t transferred; -- -- trace_netfs_collect_sreq(rreq, front); -- _debug("sreq [%x] %llx %zx/%zx", -- front->debug_index, front->start, front->transferred, front->len); -- -- if (stream->collected_to < front->start) { -- trace_netfs_collect_gap(rreq, stream, front->start, 'F'); -- stream->collected_to = front->start; -- } -- -- if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) -- notes |= HIT_PENDING; -- smp_rmb(); /* Read counters after IN_PROGRESS flag. */ -- transferred = READ_ONCE(front->transferred); -- -- /* If we can now collect the next folio, do so. We don't want -- * to defer this as we have to decide whether we need to copy -- * to the cache or not, and that may differ between adjacent -- * subreqs. -- */ -- if (notes & BUFFERED) { -- size_t fsize = PAGE_SIZE << rreq->front_folio_order; -- -- /* Clear the tail of a short read. */ -- if (!(notes & HIT_PENDING) && -- front->error == 0 && -- transferred < front->len && -- (test_bit(NETFS_SREQ_HIT_EOF, &front->flags) || -- test_bit(NETFS_SREQ_CLEAR_TAIL, &front->flags))) { -- netfs_clear_unread(front); -- transferred = front->transferred = front->len; -- trace_netfs_sreq(front, netfs_sreq_trace_clear); -- } -+ spin_lock(&rreq->lock); - -- stream->collected_to = front->start + transferred; -- rreq->collected_to = stream->collected_to; -- -- if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags)) -- notes |= COPY_TO_CACHE; -- -- if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { -- rreq->abandon_to = front->start + front->len; -- front->transferred = front->len; -- transferred = front->len; -- trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon); -- } -- if (front->start + transferred >= rreq->cleaned_to + fsize || -- test_bit(NETFS_SREQ_HIT_EOF, &front->flags)) -- netfs_read_unlock_folios(rreq, ¬es); -- } else { -- stream->collected_to = front->start + transferred; -- rreq->collected_to = stream->collected_to; -- } -- -- /* Stall if the front is still undergoing I/O. */ -- if (notes & HIT_PENDING) -- break; -- -- if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { -- if (!stream->failed) { -- stream->error = front->error; -- rreq->error = front->error; -- set_bit(NETFS_RREQ_FAILED, &rreq->flags); -- stream->failed = true; -- } -- notes |= MADE_PROGRESS | ABANDON_SREQ; -- } else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) { -- stream->need_retry = true; -- notes |= NEED_RETRY | MADE_PROGRESS; -- break; -- } else { -- if (!stream->failed) -- stream->transferred = stream->collected_to - rreq->start; -- notes |= MADE_PROGRESS; -- } -- -- /* Remove if completely consumed. */ -- stream->source = front->source; -- spin_lock(&rreq->lock); -- -- remove = front; -- trace_netfs_sreq(front, netfs_sreq_trace_discard); -- list_del_init(&front->rreq_link); -- front = list_first_entry_or_null(&stream->subrequests, -- struct netfs_io_subrequest, rreq_link); -- stream->front = front; -+ if (subreq->prev_donated != prev_donated || -+ subreq->next_donated != next_donated) { - spin_unlock(&rreq->lock); -- netfs_put_subrequest(remove, false, -- notes & ABANDON_SREQ ? -- netfs_sreq_trace_put_abandon : -- netfs_sreq_trace_put_done); -+ cond_resched(); -+ goto donation_changed; - } - -- trace_netfs_collect_stream(rreq, stream); -- trace_netfs_collect_state(rreq, rreq->collected_to, notes); -- -- if (!(notes & BUFFERED)) -- rreq->cleaned_to = rreq->collected_to; -- -- if (notes & NEED_RETRY) -- goto need_retry; -- if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) { -- trace_netfs_rreq(rreq, netfs_rreq_trace_unpause); -- clear_bit_unlock(NETFS_RREQ_PAUSE, &rreq->flags); -- smp_mb__after_atomic(); /* Set PAUSE before task state */ -- wake_up(&rreq->waitq); -+ /* Deal with the trickiest case: that this subreq is in the middle of a -+ * folio, not touching either edge, but finishes first. In such a -+ * case, we donate to the previous subreq, if there is one and if it is -+ * contiguous, so that the donation is only handled when that completes -+ * - and remove this subreq from the list. -+ * -+ * If the previous subreq finished first, we will have acquired their -+ * donation and should be able to unlock folios and/or donate nextwards. -+ */ -+ if (!subreq->consumed && -+ !prev_donated && -+ !list_is_first(&subreq->rreq_link, &rreq->subrequests) && -+ subreq->start == prev->start + prev->len) { -+ prev = list_prev_entry(subreq, rreq_link); -+ WRITE_ONCE(prev->next_donated, prev->next_donated + subreq->len); -+ subreq->start += subreq->len; -+ subreq->len = 0; -+ subreq->transferred = 0; -+ trace_netfs_donate(rreq, subreq, prev, subreq->len, -+ netfs_trace_donate_to_prev); -+ trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev); -+ goto remove_subreq_locked; - } - -- if (notes & MADE_PROGRESS) { -- //cond_resched(); -- goto reassess; -- } -+ /* If we can't donate down the chain, donate up the chain instead. */ -+ excess = subreq->len - subreq->consumed + next_donated; - --out: -- _leave(" = %x", notes); -- return; -+ if (!subreq->consumed) -+ excess += prev_donated; - --need_retry: -- /* Okay... We're going to have to retry parts of the stream. Note -- * that any partially completed op will have had any wholly transferred -- * folios removed from it. -+ if (list_is_last(&subreq->rreq_link, &rreq->subrequests)) { -+ rreq->prev_donated = excess; -+ trace_netfs_donate(rreq, subreq, NULL, excess, -+ netfs_trace_donate_to_deferred_next); -+ } else { -+ next = list_next_entry(subreq, rreq_link); -+ WRITE_ONCE(next->prev_donated, excess); -+ trace_netfs_donate(rreq, subreq, next, excess, -+ netfs_trace_donate_to_next); -+ } -+ trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_next); -+ subreq->len = subreq->consumed; -+ subreq->transferred = subreq->consumed; -+ goto remove_subreq_locked; -+ -+remove_subreq: -+ spin_lock(&rreq->lock); -+remove_subreq_locked: -+ subreq->consumed = subreq->len; -+ list_del(&subreq->rreq_link); -+ spin_unlock(&rreq->lock); -+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_consumed); -+ return true; -+ -+bad: -+ /* Errr... prev and next both donated to us, but insufficient to finish -+ * the folio. - */ -- _debug("retry"); -- netfs_retry_reads(rreq); -- goto out; -+ printk("R=%08x[%x] s=%llx-%llx %zx/%zx/%zx\n", -+ rreq->debug_id, subreq->debug_index, -+ subreq->start, subreq->start + subreq->transferred - 1, -+ subreq->consumed, subreq->transferred, subreq->len); -+ printk("folio: %llx-%llx\n", fpos, fend - 1); -+ printk("donated: prev=%zx next=%zx\n", prev_donated, next_donated); -+ printk("s=%llx av=%zx part=%zx\n", start, avail, part); -+ BUG(); - } - - /* -@@ -343,13 +324,12 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) - static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) - { - struct netfs_io_subrequest *subreq; -- struct netfs_io_stream *stream = &rreq->io_streams[0]; - unsigned int i; - - /* Collect unbuffered reads and direct reads, adding up the transfer - * sizes until we find the first short or failed subrequest. - */ -- list_for_each_entry(subreq, &stream->subrequests, rreq_link) { -+ list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { - rreq->transferred += subreq->transferred; - - if (subreq->transferred < subreq->len || -@@ -386,12 +366,22 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) - */ - static void netfs_rreq_assess_single(struct netfs_io_request *rreq) - { -+ struct netfs_io_subrequest *subreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; - -- if (!rreq->error && stream->source == NETFS_DOWNLOAD_FROM_SERVER && -- fscache_resources_valid(&rreq->cache_resources)) { -- trace_netfs_rreq(rreq, netfs_rreq_trace_dirty); -- netfs_single_mark_inode_dirty(rreq->inode); -+ subreq = list_first_entry_or_null(&stream->subrequests, -+ struct netfs_io_subrequest, rreq_link); -+ if (subreq) { -+ if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) -+ rreq->error = subreq->error; -+ else -+ rreq->transferred = subreq->transferred; -+ -+ if (!rreq->error && subreq->source == NETFS_DOWNLOAD_FROM_SERVER && -+ fscache_resources_valid(&rreq->cache_resources)) { -+ trace_netfs_rreq(rreq, netfs_rreq_trace_dirty); -+ netfs_single_mark_inode_dirty(rreq->inode); -+ } - } - - if (rreq->iocb) { -@@ -405,32 +395,21 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq) - } - - /* -- * Perform the collection of subrequests and folios. -+ * Assess the state of a read request and decide what to do next. - * - * Note that we're in normal kernel thread context at this point, possibly - * running on a workqueue. - */ --static void netfs_read_collection(struct netfs_io_request *rreq) -+void netfs_rreq_terminated(struct netfs_io_request *rreq) - { -- struct netfs_io_stream *stream = &rreq->io_streams[0]; -- -- netfs_collect_read_results(rreq); -+ trace_netfs_rreq(rreq, netfs_rreq_trace_assess); - -- /* We're done when the app thread has finished posting subreqs and the -- * queue is empty. -- */ -- if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) -- return; -- smp_rmb(); /* Read ALL_QUEUED before subreq lists. */ -+ //netfs_rreq_is_still_valid(rreq); - -- if (!list_empty(&stream->subrequests)) -+ if (test_and_clear_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags)) { -+ netfs_retry_reads(rreq); - return; -- -- /* Okay, declare that all I/O is complete. */ -- rreq->transferred = stream->transferred; -- trace_netfs_rreq(rreq, netfs_rreq_trace_complete); -- -- //netfs_rreq_is_still_valid(rreq); -+ } - - switch (rreq->origin) { - case NETFS_DIO_READ: -@@ -451,35 +430,8 @@ static void netfs_read_collection(struct netfs_io_request *rreq) - trace_netfs_rreq(rreq, netfs_rreq_trace_done); - netfs_clear_subrequests(rreq, false); - netfs_unlock_abandoned_read_pages(rreq); -- if (unlikely(rreq->copy_to_cache)) -- netfs_pgpriv2_end_copy_to_cache(rreq); --} -- --void netfs_read_collection_worker(struct work_struct *work) --{ -- struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); -- -- netfs_see_request(rreq, netfs_rreq_trace_see_work); -- if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) -- netfs_read_collection(rreq); -- netfs_put_request(rreq, false, netfs_rreq_trace_put_work); --} -- --/* -- * Wake the collection work item. -- */ --void netfs_wake_read_collector(struct netfs_io_request *rreq) --{ -- if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { -- if (!work_pending(&rreq->work)) { -- netfs_get_request(rreq, netfs_rreq_trace_get_work); -- if (!queue_work(system_unbound_wq, &rreq->work)) -- netfs_put_request(rreq, true, netfs_rreq_trace_put_work_nq); -- } -- } else { -- trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue); -- wake_up(&rreq->waitq); -- } -+ if (unlikely(test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags))) -+ netfs_pgpriv2_write_to_the_cache(rreq); - } - - /** -@@ -495,22 +447,17 @@ void netfs_wake_read_collector(struct netfs_io_request *rreq) - void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq) - { - struct netfs_io_request *rreq = subreq->rreq; -- struct netfs_io_stream *stream = &rreq->io_streams[0]; -- size_t fsize = PAGE_SIZE << rreq->front_folio_order; -+ -+ might_sleep(); - - trace_netfs_sreq(subreq, netfs_sreq_trace_progress); - -- /* If we are at the head of the queue, wake up the collector, -- * getting a ref to it if we were the ones to do so. -- */ -- if (subreq->start + subreq->transferred > rreq->cleaned_to + fsize && -+ if (subreq->transferred > subreq->consumed && - (rreq->origin == NETFS_READAHEAD || - rreq->origin == NETFS_READPAGE || -- rreq->origin == NETFS_READ_FOR_WRITE) && -- list_is_first(&subreq->rreq_link, &stream->subrequests) -- ) { -+ rreq->origin == NETFS_READ_FOR_WRITE)) { -+ netfs_consume_read_data(subreq); - __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); -- netfs_wake_read_collector(rreq); - } - } - EXPORT_SYMBOL(netfs_read_subreq_progress); -@@ -534,7 +481,8 @@ EXPORT_SYMBOL(netfs_read_subreq_progress); - void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) - { - struct netfs_io_request *rreq = subreq->rreq; -- struct netfs_io_stream *stream = &rreq->io_streams[0]; -+ -+ might_sleep(); - - switch (subreq->source) { - case NETFS_READ_FROM_CACHE: -@@ -547,156 +495,86 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) - break; - } - -+ if (rreq->origin != NETFS_DIO_READ) { -+ /* Collect buffered reads. -+ * -+ * If the read completed validly short, then we can clear the -+ * tail before going on to unlock the folios. -+ */ -+ if (subreq->error == 0 && subreq->transferred < subreq->len && -+ (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags) || -+ test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags))) { -+ netfs_clear_unread(subreq); -+ subreq->transferred = subreq->len; -+ trace_netfs_sreq(subreq, netfs_sreq_trace_clear); -+ } -+ if (subreq->transferred > subreq->consumed && -+ (rreq->origin == NETFS_READAHEAD || -+ rreq->origin == NETFS_READPAGE || -+ rreq->origin == NETFS_READ_FOR_WRITE)) { -+ netfs_consume_read_data(subreq); -+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); -+ } -+ rreq->transferred += subreq->transferred; -+ } -+ - /* Deal with retry requests, short reads and errors. If we retry - * but don't make progress, we abandon the attempt. - */ - if (!subreq->error && subreq->transferred < subreq->len) { - if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) { - trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof); -- } else if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) { -- trace_netfs_sreq(subreq, netfs_sreq_trace_need_clear); -- } else if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { -- trace_netfs_sreq(subreq, netfs_sreq_trace_need_retry); -- } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) { -- __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); -- trace_netfs_sreq(subreq, netfs_sreq_trace_partial_read); - } else { -- __set_bit(NETFS_SREQ_FAILED, &subreq->flags); -- subreq->error = -ENODATA; - trace_netfs_sreq(subreq, netfs_sreq_trace_short); -+ if (subreq->transferred > subreq->consumed) { -+ /* If we didn't read new data, abandon retry. */ -+ if (subreq->retry_count && -+ test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) { -+ __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); -+ set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags); -+ } -+ } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) { -+ __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); -+ set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags); -+ } else { -+ __set_bit(NETFS_SREQ_FAILED, &subreq->flags); -+ subreq->error = -ENODATA; -+ } - } - } - -+ trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); -+ - if (unlikely(subreq->error < 0)) { - trace_netfs_failure(rreq, subreq, subreq->error, netfs_fail_read); - if (subreq->source == NETFS_READ_FROM_CACHE) { - netfs_stat(&netfs_n_rh_read_failed); -- __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); - } else { - netfs_stat(&netfs_n_rh_download_failed); -- __set_bit(NETFS_SREQ_FAILED, &subreq->flags); -+ set_bit(NETFS_RREQ_FAILED, &rreq->flags); -+ rreq->error = subreq->error; - } -- trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause); -- set_bit(NETFS_RREQ_PAUSE, &rreq->flags); - } - -- trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); -- -- clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); -- smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */ -+ if (atomic_dec_and_test(&rreq->nr_outstanding)) -+ netfs_rreq_terminated(rreq); - -- /* If we are at the head of the queue, wake up the collector. */ -- if (list_is_first(&subreq->rreq_link, &stream->subrequests)) -- netfs_wake_read_collector(rreq); -- -- netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated); -+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_terminated); - } - EXPORT_SYMBOL(netfs_read_subreq_terminated); - --/* -- * Handle termination of a read from the cache. -+/** -+ * netfs_read_subreq_termination_worker - Workqueue helper for read termination -+ * @work: The subreq->work in the I/O request that has been terminated. -+ * -+ * Helper function to jump to netfs_read_subreq_terminated() from the -+ * subrequest work item. - */ --void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async) -+void netfs_read_subreq_termination_worker(struct work_struct *work) - { -- struct netfs_io_subrequest *subreq = priv; -+ struct netfs_io_subrequest *subreq = -+ container_of(work, struct netfs_io_subrequest, work); - -- if (transferred_or_error > 0) { -- subreq->error = 0; -- if (transferred_or_error > 0) { -- subreq->transferred += transferred_or_error; -- __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); -- } -- } else { -- subreq->error = transferred_or_error; -- } - netfs_read_subreq_terminated(subreq); - } -- --/* -- * Wait for the read operation to complete, successfully or otherwise. -- */ --ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) --{ -- struct netfs_io_subrequest *subreq; -- struct netfs_io_stream *stream = &rreq->io_streams[0]; -- DEFINE_WAIT(myself); -- ssize_t ret; -- -- for (;;) { -- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); -- prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); -- -- subreq = list_first_entry_or_null(&stream->subrequests, -- struct netfs_io_subrequest, rreq_link); -- if (subreq && -- (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || -- test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { -- __set_current_state(TASK_RUNNING); -- netfs_read_collection(rreq); -- continue; -- } -- -- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) -- break; -- -- schedule(); -- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); -- } -- -- finish_wait(&rreq->waitq, &myself); -- -- ret = rreq->error; -- if (ret == 0) { -- ret = rreq->transferred; -- switch (rreq->origin) { -- case NETFS_DIO_READ: -- case NETFS_READ_SINGLE: -- ret = rreq->transferred; -- break; -- default: -- if (rreq->submitted < rreq->len) { -- trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); -- ret = -EIO; -- } -- break; -- } -- } -- -- return ret; --} -- --/* -- * Wait for a paused read operation to unpause or complete in some manner. -- */ --void netfs_wait_for_pause(struct netfs_io_request *rreq) --{ -- struct netfs_io_subrequest *subreq; -- struct netfs_io_stream *stream = &rreq->io_streams[0]; -- DEFINE_WAIT(myself); -- -- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); -- -- for (;;) { -- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); -- prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); -- -- subreq = list_first_entry_or_null(&stream->subrequests, -- struct netfs_io_subrequest, rreq_link); -- if (subreq && -- (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || -- test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { -- __set_current_state(TASK_RUNNING); -- netfs_read_collection(rreq); -- continue; -- } -- -- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) || -- !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) -- break; -- -- schedule(); -- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); -- } -- -- finish_wait(&rreq->waitq, &myself); --} -+EXPORT_SYMBOL(netfs_read_subreq_termination_worker); -diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c -index cf7727060215..9eee5af6b327 100644 ---- a/fs/netfs/read_pgpriv2.c -+++ b/fs/netfs/read_pgpriv2.c -@@ -13,12 +13,54 @@ - #include - #include "internal.h" - -+/* -+ * [DEPRECATED] Mark page as requiring copy-to-cache using PG_private_2. The -+ * third mark in the folio queue is used to indicate that this folio needs -+ * writing. -+ */ -+void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq, -+ struct netfs_io_request *rreq, -+ struct folio_queue *folioq, -+ int slot) -+{ -+ struct folio *folio = folioq_folio(folioq, slot); -+ -+ trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); -+ folio_start_private_2(folio); -+ folioq_mark3(folioq, slot); -+} -+ -+/* -+ * [DEPRECATED] Cancel PG_private_2 on all marked folios in the event of an -+ * unrecoverable error. -+ */ -+static void netfs_pgpriv2_cancel(struct rolling_buffer *buffer) -+{ -+ struct folio_queue *folioq = buffer->tail; -+ struct folio *folio; -+ int slot; -+ -+ while (folioq) { -+ if (!folioq->marks3) { -+ folioq = folioq->next; -+ continue; -+ } -+ -+ slot = __ffs(folioq->marks3); -+ folio = folioq_folio(folioq, slot); -+ -+ trace_netfs_folio(folio, netfs_folio_trace_cancel_copy); -+ folio_end_private_2(folio); -+ folioq_unmark3(folioq, slot); -+ } -+} -+ - /* - * [DEPRECATED] Copy a folio to the cache with PG_private_2 set. - */ --static void netfs_pgpriv2_copy_folio(struct netfs_io_request *creq, struct folio *folio) -+static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio *folio) - { -- struct netfs_io_stream *cache = &creq->io_streams[1]; -+ struct netfs_io_stream *cache = &wreq->io_streams[1]; - size_t fsize = folio_size(folio), flen = fsize; - loff_t fpos = folio_pos(folio), i_size; - bool to_eof = false; -@@ -29,17 +71,17 @@ static void netfs_pgpriv2_copy_folio(struct netfs_io_request *creq, struct folio - * of the page to beyond it, but cannot move i_size into or through the - * page since we have it locked. - */ -- i_size = i_size_read(creq->inode); -+ i_size = i_size_read(wreq->inode); - - if (fpos >= i_size) { - /* mmap beyond eof. */ - _debug("beyond eof"); - folio_end_private_2(folio); -- return; -+ return 0; - } - -- if (fpos + fsize > creq->i_size) -- creq->i_size = i_size; -+ if (fpos + fsize > wreq->i_size) -+ wreq->i_size = i_size; - - if (flen > i_size - fpos) { - flen = i_size - fpos; -@@ -53,10 +95,8 @@ static void netfs_pgpriv2_copy_folio(struct netfs_io_request *creq, struct folio - trace_netfs_folio(folio, netfs_folio_trace_store_copy); - - /* Attach the folio to the rolling buffer. */ -- if (rolling_buffer_append(&creq->buffer, folio, 0) < 0) { -- clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &creq->flags); -- return; -- } -+ if (rolling_buffer_append(&wreq->buffer, folio, 0) < 0) -+ return -ENOMEM; - - cache->submit_extendable_to = fsize; - cache->submit_off = 0; -@@ -70,11 +110,11 @@ static void netfs_pgpriv2_copy_folio(struct netfs_io_request *creq, struct folio - do { - ssize_t part; - -- creq->buffer.iter.iov_offset = cache->submit_off; -+ wreq->buffer.iter.iov_offset = cache->submit_off; - -- atomic64_set(&creq->issued_to, fpos + cache->submit_off); -+ atomic64_set(&wreq->issued_to, fpos + cache->submit_off); - cache->submit_extendable_to = fsize - cache->submit_off; -- part = netfs_advance_write(creq, cache, fpos + cache->submit_off, -+ part = netfs_advance_write(wreq, cache, fpos + cache->submit_off, - cache->submit_len, to_eof); - cache->submit_off += part; - if (part > cache->submit_len) -@@ -83,95 +123,98 @@ static void netfs_pgpriv2_copy_folio(struct netfs_io_request *creq, struct folio - cache->submit_len -= part; - } while (cache->submit_len > 0); - -- creq->buffer.iter.iov_offset = 0; -- rolling_buffer_advance(&creq->buffer, fsize); -- atomic64_set(&creq->issued_to, fpos + fsize); -+ wreq->buffer.iter.iov_offset = 0; -+ rolling_buffer_advance(&wreq->buffer, fsize); -+ atomic64_set(&wreq->issued_to, fpos + fsize); - - if (flen < fsize) -- netfs_issue_write(creq, cache); -+ netfs_issue_write(wreq, cache); -+ -+ _leave(" = 0"); -+ return 0; - } - - /* -- * [DEPRECATED] Set up copying to the cache. -+ * [DEPRECATED] Go through the buffer and write any folios that are marked with -+ * the third mark to the cache. - */ --static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache( -- struct netfs_io_request *rreq, struct folio *folio) -+void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq) - { -- struct netfs_io_request *creq; -+ struct netfs_io_request *wreq; -+ struct folio_queue *folioq; -+ struct folio *folio; -+ int error = 0; -+ int slot = 0; -+ -+ _enter(""); - - if (!fscache_resources_valid(&rreq->cache_resources)) -- goto cancel; -+ goto couldnt_start; - -- creq = netfs_create_write_req(rreq->mapping, NULL, folio_pos(folio), -- NETFS_PGPRIV2_COPY_TO_CACHE); -- if (IS_ERR(creq)) -- goto cancel; -+ /* Need the first folio to be able to set up the op. */ -+ for (folioq = rreq->buffer.tail; folioq; folioq = folioq->next) { -+ if (folioq->marks3) { -+ slot = __ffs(folioq->marks3); -+ break; -+ } -+ } -+ if (!folioq) -+ return; -+ folio = folioq_folio(folioq, slot); - -- if (!creq->io_streams[1].avail) -- goto cancel_put; -+ wreq = netfs_create_write_req(rreq->mapping, NULL, folio_pos(folio), -+ NETFS_PGPRIV2_COPY_TO_CACHE); -+ if (IS_ERR(wreq)) { -+ kleave(" [create %ld]", PTR_ERR(wreq)); -+ goto couldnt_start; -+ } - -- trace_netfs_write(creq, netfs_write_trace_copy_to_cache); -+ trace_netfs_write(wreq, netfs_write_trace_copy_to_cache); - netfs_stat(&netfs_n_wh_copy_to_cache); -- rreq->copy_to_cache = creq; -- return creq; -- --cancel_put: -- netfs_put_request(creq, false, netfs_rreq_trace_put_return); --cancel: -- rreq->copy_to_cache = ERR_PTR(-ENOBUFS); -- clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); -- return ERR_PTR(-ENOBUFS); --} -- --/* -- * [DEPRECATED] Mark page as requiring copy-to-cache using PG_private_2 and add -- * it to the copy write request. -- */ --void netfs_pgpriv2_copy_to_cache(struct netfs_io_request *rreq, struct folio *folio) --{ -- struct netfs_io_request *creq = rreq->copy_to_cache; -- -- if (!creq) -- creq = netfs_pgpriv2_begin_copy_to_cache(rreq, folio); -- if (IS_ERR(creq)) -- return; -+ if (!wreq->io_streams[1].avail) { -+ netfs_put_request(wreq, false, netfs_rreq_trace_put_return); -+ goto couldnt_start; -+ } - -- trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); -- folio_start_private_2(folio); -- netfs_pgpriv2_copy_folio(creq, folio); --} -+ for (;;) { -+ error = netfs_pgpriv2_copy_folio(wreq, folio); -+ if (error < 0) -+ break; - --/* -- * [DEPRECATED] End writing to the cache, flushing out any outstanding writes. -- */ --void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq) --{ -- struct netfs_io_request *creq = rreq->copy_to_cache; -+ folioq_unmark3(folioq, slot); -+ if (!folioq->marks3) { -+ folioq = folioq->next; -+ if (!folioq) -+ break; -+ } - -- if (IS_ERR_OR_NULL(creq)) -- return; -+ slot = __ffs(folioq->marks3); -+ folio = folioq_folio(folioq, slot); -+ } - -- netfs_issue_write(creq, &creq->io_streams[1]); -+ netfs_issue_write(wreq, &wreq->io_streams[1]); - smp_wmb(); /* Write lists before ALL_QUEUED. */ -- set_bit(NETFS_RREQ_ALL_QUEUED, &creq->flags); -+ set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); - -- netfs_put_request(creq, false, netfs_rreq_trace_put_return); -- creq->copy_to_cache = NULL; -+ netfs_put_request(wreq, false, netfs_rreq_trace_put_return); -+ _leave(" = %d", error); -+couldnt_start: -+ netfs_pgpriv2_cancel(&rreq->buffer); - } - - /* - * [DEPRECATED] Remove the PG_private_2 mark from any folios we've finished - * copying. - */ --bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *creq) -+bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq) - { -- struct folio_queue *folioq = creq->buffer.tail; -- unsigned long long collected_to = creq->collected_to; -- unsigned int slot = creq->buffer.first_tail_slot; -+ struct folio_queue *folioq = wreq->buffer.tail; -+ unsigned long long collected_to = wreq->collected_to; -+ unsigned int slot = wreq->buffer.first_tail_slot; - bool made_progress = false; - - if (slot >= folioq_nr_slots(folioq)) { -- folioq = rolling_buffer_delete_spent(&creq->buffer); -+ folioq = rolling_buffer_delete_spent(&wreq->buffer); - slot = 0; - } - -@@ -183,16 +226,16 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *creq) - folio = folioq_folio(folioq, slot); - if (WARN_ONCE(!folio_test_private_2(folio), - "R=%08x: folio %lx is not marked private_2\n", -- creq->debug_id, folio->index)) -+ wreq->debug_id, folio->index)) - trace_netfs_folio(folio, netfs_folio_trace_not_under_wback); - - fpos = folio_pos(folio); - fsize = folio_size(folio); - flen = fsize; - -- fend = min_t(unsigned long long, fpos + flen, creq->i_size); -+ fend = min_t(unsigned long long, fpos + flen, wreq->i_size); - -- trace_netfs_collect_folio(creq, folio, fend, collected_to); -+ trace_netfs_collect_folio(wreq, folio, fend, collected_to); - - /* Unlock any folio we've transferred all of. */ - if (collected_to < fend) -@@ -200,7 +243,7 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *creq) - - trace_netfs_folio(folio, netfs_folio_trace_end_copy); - folio_end_private_2(folio); -- creq->cleaned_to = fpos + fsize; -+ wreq->cleaned_to = fpos + fsize; - made_progress = true; - - /* Clean up the head folioq. If we clear an entire folioq, then -@@ -210,7 +253,7 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *creq) - folioq_clear(folioq, slot); - slot++; - if (slot >= folioq_nr_slots(folioq)) { -- folioq = rolling_buffer_delete_spent(&creq->buffer); -+ folioq = rolling_buffer_delete_spent(&wreq->buffer); - if (!folioq) - goto done; - slot = 0; -@@ -220,8 +263,8 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *creq) - break; - } - -- creq->buffer.tail = folioq; -+ wreq->buffer.tail = folioq; - done: -- creq->buffer.first_tail_slot = slot; -+ wreq->buffer.first_tail_slot = slot; - return made_progress; - } -diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c -index 2290af0d51ac..9a312a21fc15 100644 ---- a/fs/netfs/read_retry.c -+++ b/fs/netfs/read_retry.c -@@ -12,7 +12,15 @@ - static void netfs_reissue_read(struct netfs_io_request *rreq, - struct netfs_io_subrequest *subreq) - { -- __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); -+ struct iov_iter *io_iter = &subreq->io_iter; -+ -+ if (iov_iter_is_folioq(io_iter)) { -+ subreq->curr_folioq = (struct folio_queue *)io_iter->folioq; -+ subreq->curr_folioq_slot = io_iter->folioq_slot; -+ subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot]; -+ } -+ -+ atomic_inc(&rreq->nr_outstanding); - __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); - subreq->rreq->netfs_ops->issue_read(subreq); -@@ -25,12 +33,13 @@ static void netfs_reissue_read(struct netfs_io_request *rreq, - static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) - { - struct netfs_io_subrequest *subreq; -- struct netfs_io_stream *stream = &rreq->io_streams[0]; -- struct list_head *next; -+ struct netfs_io_stream *stream0 = &rreq->io_streams[0]; -+ LIST_HEAD(sublist); -+ LIST_HEAD(queue); - - _enter("R=%x", rreq->debug_id); - -- if (list_empty(&stream->subrequests)) -+ if (list_empty(&rreq->subrequests)) - return; - - if (rreq->netfs_ops->retry_request) -@@ -41,7 +50,9 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) - */ - if (!rreq->netfs_ops->prepare_read && - !rreq->cache_resources.ops) { -- list_for_each_entry(subreq, &stream->subrequests, rreq_link) { -+ struct netfs_io_subrequest *subreq; -+ -+ list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { - if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) - break; - if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { -@@ -64,44 +75,48 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) - * populating with smaller subrequests. In the event that the subreq - * we just launched finishes before we insert the next subreq, it'll - * fill in rreq->prev_donated instead. -- * -+ - * Note: Alternatively, we could split the tail subrequest right before - * we reissue it and fix up the donations under lock. - */ -- next = stream->subrequests.next; -+ list_splice_init(&rreq->subrequests, &queue); - - do { -- struct netfs_io_subrequest *from, *to, *tmp; -+ struct netfs_io_subrequest *from; - struct iov_iter source; - unsigned long long start, len; -- size_t part; -+ size_t part, deferred_next_donated = 0; - bool boundary = false; - - /* Go through the subreqs and find the next span of contiguous - * buffer that we then rejig (cifs, for example, needs the - * rsize renegotiating) and reissue. - */ -- from = list_entry(next, struct netfs_io_subrequest, rreq_link); -- to = from; -+ from = list_first_entry(&queue, struct netfs_io_subrequest, rreq_link); -+ list_move_tail(&from->rreq_link, &sublist); - start = from->start + from->transferred; - len = from->len - from->transferred; - -- _debug("from R=%08x[%x] s=%llx ctl=%zx/%zx", -+ _debug("from R=%08x[%x] s=%llx ctl=%zx/%zx/%zx", - rreq->debug_id, from->debug_index, -- from->start, from->transferred, from->len); -+ from->start, from->consumed, from->transferred, from->len); - - if (test_bit(NETFS_SREQ_FAILED, &from->flags) || - !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) - goto abandon; - -- list_for_each_continue(next, &stream->subrequests) { -- subreq = list_entry(next, struct netfs_io_subrequest, rreq_link); -- if (subreq->start + subreq->transferred != start + len || -- test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) || -+ deferred_next_donated = from->next_donated; -+ while ((subreq = list_first_entry_or_null( -+ &queue, struct netfs_io_subrequest, rreq_link))) { -+ if (subreq->start != start + len || -+ subreq->transferred > 0 || - !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) - break; -- to = subreq; -- len += to->len; -+ list_move_tail(&subreq->rreq_link, &sublist); -+ len += subreq->len; -+ deferred_next_donated = subreq->next_donated; -+ if (test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags)) -+ break; - } - - _debug(" - range: %llx-%llx %llx", start, start + len - 1, len); -@@ -114,31 +129,38 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) - source.count = len; - - /* Work through the sublist. */ -- subreq = from; -- list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) { -- if (!len) -- break; -+ while ((subreq = list_first_entry_or_null( -+ &sublist, struct netfs_io_subrequest, rreq_link))) { -+ list_del(&subreq->rreq_link); -+ - subreq->source = NETFS_DOWNLOAD_FROM_SERVER; - subreq->start = start - subreq->transferred; - subreq->len = len + subreq->transferred; -+ stream0->sreq_max_len = subreq->len; -+ - __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); - __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); - subreq->retry_count++; - -+ spin_lock(&rreq->lock); -+ list_add_tail(&subreq->rreq_link, &rreq->subrequests); -+ subreq->prev_donated += rreq->prev_donated; -+ rreq->prev_donated = 0; - trace_netfs_sreq(subreq, netfs_sreq_trace_retry); -+ spin_unlock(&rreq->lock); -+ -+ BUG_ON(!len); - - /* Renegotiate max_len (rsize) */ -- stream->sreq_max_len = subreq->len; - if (rreq->netfs_ops->prepare_read && - rreq->netfs_ops->prepare_read(subreq) < 0) { - trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed); - __set_bit(NETFS_SREQ_FAILED, &subreq->flags); -- goto abandon; - } - -- part = umin(len, stream->sreq_max_len); -- if (unlikely(stream->sreq_max_segs)) -- part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs); -+ part = umin(len, stream0->sreq_max_len); -+ if (unlikely(rreq->io_streams[0].sreq_max_segs)) -+ part = netfs_limit_iter(&source, 0, part, stream0->sreq_max_segs); - subreq->len = subreq->transferred + part; - subreq->io_iter = source; - iov_iter_truncate(&subreq->io_iter, part); -@@ -148,105 +170,57 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) - if (!len) { - if (boundary) - __set_bit(NETFS_SREQ_BOUNDARY, &subreq->flags); -+ subreq->next_donated = deferred_next_donated; - } else { - __clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags); -+ subreq->next_donated = 0; - } - -- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); - netfs_reissue_read(rreq, subreq); -- if (subreq == to) -+ if (!len) - break; -- } - -- /* If we managed to use fewer subreqs, we can discard the -- * excess; if we used the same number, then we're done. -- */ -- if (!len) { -- if (subreq == to) -- continue; -- list_for_each_entry_safe_from(subreq, tmp, -- &stream->subrequests, rreq_link) { -- trace_netfs_sreq(subreq, netfs_sreq_trace_discard); -- list_del(&subreq->rreq_link); -- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); -- if (subreq == to) -- break; -+ /* If we ran out of subrequests, allocate another. */ -+ if (list_empty(&sublist)) { -+ subreq = netfs_alloc_subrequest(rreq); -+ if (!subreq) -+ goto abandon; -+ subreq->source = NETFS_DOWNLOAD_FROM_SERVER; -+ subreq->start = start; -+ -+ /* We get two refs, but need just one. */ -+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_new); -+ trace_netfs_sreq(subreq, netfs_sreq_trace_split); -+ list_add_tail(&subreq->rreq_link, &sublist); - } -- continue; - } - -- /* We ran out of subrequests, so we need to allocate some more -- * and insert them after. -+ /* If we managed to use fewer subreqs, we can discard the -+ * excess. - */ -- do { -- subreq = netfs_alloc_subrequest(rreq); -- if (!subreq) { -- subreq = to; -- goto abandon_after; -- } -- subreq->source = NETFS_DOWNLOAD_FROM_SERVER; -- subreq->start = start; -- subreq->len = len; -- subreq->debug_index = atomic_inc_return(&rreq->subreq_counter); -- subreq->stream_nr = stream->stream_nr; -- subreq->retry_count = 1; -- -- trace_netfs_sreq_ref(rreq->debug_id, subreq->debug_index, -- refcount_read(&subreq->ref), -- netfs_sreq_trace_new); -- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); -- -- list_add(&subreq->rreq_link, &to->rreq_link); -- to = list_next_entry(to, rreq_link); -- trace_netfs_sreq(subreq, netfs_sreq_trace_retry); -- -- stream->sreq_max_len = umin(len, rreq->rsize); -- stream->sreq_max_segs = 0; -- if (unlikely(stream->sreq_max_segs)) -- part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs); -- -- netfs_stat(&netfs_n_rh_download); -- if (rreq->netfs_ops->prepare_read(subreq) < 0) { -- trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed); -- __set_bit(NETFS_SREQ_FAILED, &subreq->flags); -- goto abandon; -- } -- -- part = umin(len, stream->sreq_max_len); -- subreq->len = subreq->transferred + part; -- subreq->io_iter = source; -- iov_iter_truncate(&subreq->io_iter, part); -- iov_iter_advance(&source, part); -- -- len -= part; -- start += part; -- if (!len && boundary) { -- __set_bit(NETFS_SREQ_BOUNDARY, &to->flags); -- boundary = false; -- } -- -- netfs_reissue_read(rreq, subreq); -- } while (len); -+ while ((subreq = list_first_entry_or_null( -+ &sublist, struct netfs_io_subrequest, rreq_link))) { -+ trace_netfs_sreq(subreq, netfs_sreq_trace_discard); -+ list_del(&subreq->rreq_link); -+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); -+ } - -- } while (!list_is_head(next, &stream->subrequests)); -+ } while (!list_empty(&queue)); - - return; - -- /* If we hit an error, fail all remaining incomplete subrequests */ --abandon_after: -- if (list_is_last(&subreq->rreq_link, &stream->subrequests)) -- return; -- subreq = list_next_entry(subreq, rreq_link); -+ /* If we hit ENOMEM, fail all remaining subrequests */ - abandon: -- list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) { -- if (!subreq->error && -- !test_bit(NETFS_SREQ_FAILED, &subreq->flags) && -- !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) -- continue; -- subreq->error = -ENOMEM; -- __set_bit(NETFS_SREQ_FAILED, &subreq->flags); -+ list_splice_init(&sublist, &queue); -+ list_for_each_entry(subreq, &queue, rreq_link) { -+ if (!subreq->error) -+ subreq->error = -ENOMEM; -+ __clear_bit(NETFS_SREQ_FAILED, &subreq->flags); - __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); - } -+ spin_lock(&rreq->lock); -+ list_splice_tail_init(&queue, &rreq->subrequests); -+ spin_unlock(&rreq->lock); - } - - /* -@@ -254,19 +228,14 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) - */ - void netfs_retry_reads(struct netfs_io_request *rreq) - { -- struct netfs_io_subrequest *subreq; -- struct netfs_io_stream *stream = &rreq->io_streams[0]; -+ trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); - -- /* Wait for all outstanding I/O to quiesce before performing retries as -- * we may need to renegotiate the I/O sizes. -- */ -- list_for_each_entry(subreq, &stream->subrequests, rreq_link) { -- wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS, -- TASK_UNINTERRUPTIBLE); -- } -+ atomic_inc(&rreq->nr_outstanding); - -- trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); - netfs_retry_read_subrequests(rreq); -+ -+ if (atomic_dec_and_test(&rreq->nr_outstanding)) -+ netfs_rreq_terminated(rreq); - } - - /* -diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c -index fea0ecdecc53..2443b6885fd1 100644 ---- a/fs/netfs/read_single.c -+++ b/fs/netfs/read_single.c -@@ -77,7 +77,6 @@ static void netfs_single_read_cache(struct netfs_io_request *rreq, - { - struct netfs_cache_resources *cres = &rreq->cache_resources; - -- _enter("R=%08x[%x]", rreq->debug_id, subreq->debug_index); - netfs_stat(&netfs_n_rh_read); - cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_FAIL, - netfs_cache_read_terminated, subreq); -@@ -89,28 +88,28 @@ static void netfs_single_read_cache(struct netfs_io_request *rreq, - */ - static int netfs_single_dispatch_read(struct netfs_io_request *rreq) - { -- struct netfs_io_stream *stream = &rreq->io_streams[0]; - struct netfs_io_subrequest *subreq; - int ret = 0; - -+ atomic_set(&rreq->nr_outstanding, 1); -+ - subreq = netfs_alloc_subrequest(rreq); -- if (!subreq) -- return -ENOMEM; -+ if (!subreq) { -+ ret = -ENOMEM; -+ goto out; -+ } - - subreq->source = NETFS_SOURCE_UNKNOWN; - subreq->start = 0; - subreq->len = rreq->len; - subreq->io_iter = rreq->buffer.iter; - -- __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); -+ atomic_inc(&rreq->nr_outstanding); - -- spin_lock(&rreq->lock); -- list_add_tail(&subreq->rreq_link, &stream->subrequests); -+ spin_lock_bh(&rreq->lock); -+ list_add_tail(&subreq->rreq_link, &rreq->subrequests); - trace_netfs_sreq(subreq, netfs_sreq_trace_added); -- stream->front = subreq; -- /* Store list pointers before active flag */ -- smp_store_release(&stream->active, true); -- spin_unlock(&rreq->lock); -+ spin_unlock_bh(&rreq->lock); - - netfs_single_cache_prepare_read(rreq, subreq); - switch (subreq->source) { -@@ -138,12 +137,14 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) - break; - } - -- smp_wmb(); /* Write lists before ALL_QUEUED. */ -- set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); -+out: -+ if (atomic_dec_and_test(&rreq->nr_outstanding)) -+ netfs_rreq_terminated(rreq); - return ret; - cancel: -+ atomic_dec(&rreq->nr_outstanding); - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); -- return ret; -+ goto out; - } - - /** -@@ -184,7 +185,13 @@ ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_ite - rreq->buffer.iter = *iter; - netfs_single_dispatch_read(rreq); - -- ret = netfs_wait_for_read(rreq); -+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); -+ wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, -+ TASK_UNINTERRUPTIBLE); -+ -+ ret = rreq->error; -+ if (ret == 0) -+ ret = rreq->transferred; - netfs_put_request(rreq, true, netfs_rreq_trace_put_return); - return ret; - -diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c -index 294f67795f79..04d1ca292cf5 100644 ---- a/fs/netfs/write_collect.c -+++ b/fs/netfs/write_collect.c -@@ -323,9 +323,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) - goto need_retry; - if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { - trace_netfs_rreq(wreq, netfs_rreq_trace_unpause); -- clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags); -- smp_mb__after_atomic(); /* Set PAUSE before task state */ -- wake_up(&wreq->waitq); -+ clear_and_wake_up_bit(NETFS_RREQ_PAUSE, &wreq->flags); - } - - if (notes & NEED_REASSESS) { -diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c -index 69727411683e..6f14a7c2f040 100644 ---- a/fs/netfs/write_issue.c -+++ b/fs/netfs/write_issue.c -@@ -723,7 +723,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t - rolling_buffer_advance(&wreq->buffer, part); - if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { - trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause); -- wait_event(wreq->waitq, !test_bit(NETFS_RREQ_PAUSE, &wreq->flags)); -+ wait_on_bit(&wreq->flags, NETFS_RREQ_PAUSE, TASK_UNINTERRUPTIBLE); - } - if (test_bit(NETFS_RREQ_FAILED, &wreq->flags)) - break; -diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c -index c841a851dd73..f3d5e37d4698 100644 ---- a/fs/netfs/write_retry.c -+++ b/fs/netfs/write_retry.c -@@ -93,21 +93,15 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, - list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) { - if (!len) - break; -- -- subreq->start = start; -- subreq->len = len; -+ /* Renegotiate max_len (wsize) */ -+ trace_netfs_sreq(subreq, netfs_sreq_trace_retry); - __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); - subreq->retry_count++; -- trace_netfs_sreq(subreq, netfs_sreq_trace_retry); -- -- /* Renegotiate max_len (wsize) */ -- stream->sreq_max_len = len; - stream->prepare_write(subreq); - -- part = umin(len, stream->sreq_max_len); -- if (unlikely(stream->sreq_max_segs)) -- part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs); -+ part = min(len, stream->sreq_max_len); - subreq->len = part; -+ subreq->start = start; - subreq->transferred = 0; - len -= part; - start += part; -diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c -index 7f1cacc89dbb..f42fdb26954e 100644 ---- a/fs/smb/client/cifssmb.c -+++ b/fs/smb/client/cifssmb.c -@@ -1331,8 +1331,6 @@ cifs_readv_callback(struct mid_q_entry *mid) - } else if (rdata->got_bytes > 0) { - __set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags); - } -- if (rdata->got_bytes) -- __set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags); - } - - rdata->credits.value = 0; -diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c -index 9f54596a6866..84ab3138e39c 100644 ---- a/fs/smb/client/smb2pdu.c -+++ b/fs/smb/client/smb2pdu.c -@@ -4607,8 +4607,7 @@ smb2_readv_callback(struct mid_q_entry *mid) - __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags); - rdata->result = 0; - } -- if (rdata->got_bytes) -- __set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags); -+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags); - } - trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, rdata->credits.value, - server->credits, server->in_flight, -@@ -4617,7 +4616,7 @@ smb2_readv_callback(struct mid_q_entry *mid) - rdata->subreq.error = rdata->result; - rdata->subreq.transferred += rdata->got_bytes; - trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_progress); -- netfs_read_subreq_terminated(&rdata->subreq); -+ queue_work(cifsiod_wq, &rdata->subreq.work); - release_mid(mid); - trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, - server->credits, server->in_flight, -diff --git a/include/linux/netfs.h b/include/linux/netfs.h -index 071d05d81d38..27e62f7d2940 100644 ---- a/include/linux/netfs.h -+++ b/include/linux/netfs.h -@@ -181,6 +181,9 @@ struct netfs_io_subrequest { - unsigned long long start; /* Where to start the I/O */ - size_t len; /* Size of the I/O */ - size_t transferred; /* Amount of data transferred */ -+ size_t consumed; /* Amount of read data consumed */ -+ size_t prev_donated; /* Amount of data donated from previous subreq */ -+ size_t next_donated; /* Amount of data donated from next subreq */ - refcount_t ref; - short error; /* 0 or error that occurred */ - unsigned short debug_index; /* Index in list (for debugging output) */ -@@ -188,6 +191,9 @@ struct netfs_io_subrequest { - u8 retry_count; /* The number of retries (0 on initial pass) */ - enum netfs_io_source source; /* Where to read from/write to */ - unsigned char stream_nr; /* I/O stream this belongs to */ -+ unsigned char curr_folioq_slot; /* Folio currently being read */ -+ unsigned char curr_folio_order; /* Order of folio */ -+ struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */ - unsigned long flags; - #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ - #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ -@@ -230,16 +236,15 @@ struct netfs_io_request { - struct address_space *mapping; /* The mapping being accessed */ - struct kiocb *iocb; /* AIO completion vector */ - struct netfs_cache_resources cache_resources; -- struct netfs_io_request *copy_to_cache; /* Request to write just-read data to the cache */ - struct readahead_control *ractl; /* Readahead descriptor */ - struct list_head proc_link; /* Link in netfs_iorequests */ -+ struct list_head subrequests; /* Contributory I/O operations */ - struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ - #define NR_IO_STREAMS 2 //wreq->nr_io_streams - struct netfs_group *group; /* Writeback group being written back */ - struct rolling_buffer buffer; /* Unencrypted buffer */ - #define NETFS_ROLLBUF_PUT_MARK ROLLBUF_MARK_1 - #define NETFS_ROLLBUF_PAGECACHE_MARK ROLLBUF_MARK_2 -- wait_queue_head_t waitq; /* Processor waiter */ - void *netfs_priv; /* Private data for the netfs */ - void *netfs_priv2; /* Private data for the netfs */ - struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ -@@ -250,6 +255,7 @@ struct netfs_io_request { - atomic_t subreq_counter; /* Next subreq->debug_index */ - unsigned int nr_group_rel; /* Number of refs to release on ->group */ - spinlock_t lock; /* Lock for queuing subreqs */ -+ atomic_t nr_outstanding; /* Number of ops in progress */ - unsigned long long submitted; /* Amount submitted for I/O so far */ - unsigned long long len; /* Length of the request */ - size_t transferred; /* Amount to be indicated as transferred */ -@@ -261,17 +267,14 @@ struct netfs_io_request { - atomic64_t issued_to; /* Write issuer folio cursor */ - unsigned long long collected_to; /* Point we've collected to */ - unsigned long long cleaned_to; /* Position we've cleaned folios to */ -- unsigned long long abandon_to; /* Position to abandon folios to */ - pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ -- unsigned char front_folio_order; /* Order (size) of front folio */ -+ size_t prev_donated; /* Fallback for subreq->prev_donated */ - refcount_t ref; - unsigned long flags; --#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ - #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ - #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ - #define NETFS_RREQ_FAILED 4 /* The request failed */ - #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ --#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ - #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ - #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ - #define NETFS_RREQ_BLOCKED 10 /* We blocked */ -@@ -436,6 +439,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr - /* (Sub)request management API. */ - void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq); - void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq); -+void netfs_read_subreq_termination_worker(struct work_struct *work); - void netfs_get_subrequest(struct netfs_io_subrequest *subreq, - enum netfs_sreq_ref_trace what); - void netfs_put_subrequest(struct netfs_io_subrequest *subreq, -diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h -index 6e699cadcb29..6df2e7313371 100644 ---- a/include/trace/events/netfs.h -+++ b/include/trace/events/netfs.h -@@ -50,23 +50,18 @@ - EM(netfs_rreq_trace_assess, "ASSESS ") \ - EM(netfs_rreq_trace_copy, "COPY ") \ - EM(netfs_rreq_trace_collect, "COLLECT") \ -- EM(netfs_rreq_trace_complete, "COMPLET") \ - EM(netfs_rreq_trace_dirty, "DIRTY ") \ - EM(netfs_rreq_trace_done, "DONE ") \ - EM(netfs_rreq_trace_free, "FREE ") \ - EM(netfs_rreq_trace_redirty, "REDIRTY") \ - EM(netfs_rreq_trace_resubmit, "RESUBMT") \ -- EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \ - EM(netfs_rreq_trace_set_pause, "PAUSE ") \ - EM(netfs_rreq_trace_unlock, "UNLOCK ") \ - EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ - EM(netfs_rreq_trace_unmark, "UNMARK ") \ - EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ - EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ -- EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \ - EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \ -- EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \ -- EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \ - EM(netfs_rreq_trace_unpause, "UNPAUSE") \ - E_(netfs_rreq_trace_write_done, "WR-DONE") - -@@ -86,7 +81,6 @@ - EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \ - EM(netfs_sreq_trace_cache_prepare, "CA-PR") \ - EM(netfs_sreq_trace_cache_write, "CA-WR") \ -- EM(netfs_sreq_trace_cancel, "CANCL") \ - EM(netfs_sreq_trace_clear, "CLEAR") \ - EM(netfs_sreq_trace_discard, "DSCRD") \ - EM(netfs_sreq_trace_donate_to_prev, "DON-P") \ -@@ -97,9 +91,6 @@ - EM(netfs_sreq_trace_hit_eof, "EOF ") \ - EM(netfs_sreq_trace_io_progress, "IO ") \ - EM(netfs_sreq_trace_limited, "LIMIT") \ -- EM(netfs_sreq_trace_need_clear, "N-CLR") \ -- EM(netfs_sreq_trace_partial_read, "PARTR") \ -- EM(netfs_sreq_trace_need_retry, "NRTRY") \ - EM(netfs_sreq_trace_prepare, "PREP ") \ - EM(netfs_sreq_trace_prep_failed, "PRPFL") \ - EM(netfs_sreq_trace_progress, "PRGRS") \ -@@ -145,7 +136,6 @@ - EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \ - EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ - EM(netfs_sreq_trace_new, "NEW ") \ -- EM(netfs_sreq_trace_put_abandon, "PUT ABANDON") \ - EM(netfs_sreq_trace_put_cancel, "PUT CANCEL ") \ - EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \ - EM(netfs_sreq_trace_put_consumed, "PUT CONSUME") \ -@@ -186,7 +176,6 @@ - EM(netfs_folio_trace_mkwrite, "mkwrite") \ - EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \ - EM(netfs_folio_trace_not_under_wback, "!wback") \ -- EM(netfs_folio_trace_not_locked, "!locked") \ - EM(netfs_folio_trace_put, "put") \ - EM(netfs_folio_trace_read, "read") \ - EM(netfs_folio_trace_read_done, "read-done") \ -@@ -215,6 +204,7 @@ - EM(netfs_trace_folioq_clear, "clear") \ - EM(netfs_trace_folioq_delete, "delete") \ - EM(netfs_trace_folioq_make_space, "make-space") \ -+ EM(netfs_trace_folioq_prep_write, "prep-wr") \ - EM(netfs_trace_folioq_rollbuf_init, "roll-init") \ - E_(netfs_trace_folioq_read_progress, "r-progress") - -@@ -362,7 +352,7 @@ TRACE_EVENT(netfs_sreq, - __entry->len = sreq->len; - __entry->transferred = sreq->transferred; - __entry->start = sreq->start; -- __entry->slot = sreq->io_iter.folioq_slot; -+ __entry->slot = sreq->curr_folioq_slot; - ), - - TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d", -@@ -711,6 +701,71 @@ TRACE_EVENT(netfs_collect_stream, - __entry->collected_to, __entry->front) - ); - -+TRACE_EVENT(netfs_progress, -+ TP_PROTO(const struct netfs_io_subrequest *subreq, -+ unsigned long long start, size_t avail, size_t part), -+ -+ TP_ARGS(subreq, start, avail, part), -+ -+ TP_STRUCT__entry( -+ __field(unsigned int, rreq) -+ __field(unsigned int, subreq) -+ __field(unsigned int, consumed) -+ __field(unsigned int, transferred) -+ __field(unsigned long long, f_start) -+ __field(unsigned int, f_avail) -+ __field(unsigned int, f_part) -+ __field(unsigned char, slot) -+ ), -+ -+ TP_fast_assign( -+ __entry->rreq = subreq->rreq->debug_id; -+ __entry->subreq = subreq->debug_index; -+ __entry->consumed = subreq->consumed; -+ __entry->transferred = subreq->transferred; -+ __entry->f_start = start; -+ __entry->f_avail = avail; -+ __entry->f_part = part; -+ __entry->slot = subreq->curr_folioq_slot; -+ ), -+ -+ TP_printk("R=%08x[%02x] s=%llx ct=%x/%x pa=%x/%x sl=%x", -+ __entry->rreq, __entry->subreq, __entry->f_start, -+ __entry->consumed, __entry->transferred, -+ __entry->f_part, __entry->f_avail, __entry->slot) -+ ); -+ -+TRACE_EVENT(netfs_donate, -+ TP_PROTO(const struct netfs_io_request *rreq, -+ const struct netfs_io_subrequest *from, -+ const struct netfs_io_subrequest *to, -+ size_t amount, -+ enum netfs_donate_trace trace), -+ -+ TP_ARGS(rreq, from, to, amount, trace), -+ -+ TP_STRUCT__entry( -+ __field(unsigned int, rreq) -+ __field(unsigned int, from) -+ __field(unsigned int, to) -+ __field(unsigned int, amount) -+ __field(enum netfs_donate_trace, trace) -+ ), -+ -+ TP_fast_assign( -+ __entry->rreq = rreq->debug_id; -+ __entry->from = from->debug_index; -+ __entry->to = to ? to->debug_index : -1; -+ __entry->amount = amount; -+ __entry->trace = trace; -+ ), -+ -+ TP_printk("R=%08x[%02x] -> [%02x] %s am=%x", -+ __entry->rreq, __entry->from, __entry->to, -+ __print_symbolic(__entry->trace, netfs_donate_traces), -+ __entry->amount) -+ ); -+ - TRACE_EVENT(netfs_folioq, - TP_PROTO(const struct folio_queue *fq, - enum netfs_folioq_trace trace), --- -2.48.1 - diff --git a/ci/diffs/9998-sched_ext-Fix-invalid-irq-restore-in-scx_ops_bypass.patch b/ci/diffs/9998-sched_ext-Fix-invalid-irq-restore-in-scx_ops_bypass.patch deleted file mode 100644 index 0f65cb4..0000000 --- a/ci/diffs/9998-sched_ext-Fix-invalid-irq-restore-in-scx_ops_bypass.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 10e1d78546b3dd4ea9d773c0b0257064a99211e9 Mon Sep 17 00:00:00 2001 -From: Tejun Heo -Date: Wed, 11 Dec 2024 11:01:51 -1000 -Subject: [PATCH] sched_ext: Fix invalid irq restore in scx_ops_bypass() - -While adding outer irqsave/restore locking, 0e7ffff1b811 ("scx: Fix raciness -in scx_ops_bypass()") forgot to convert an inner rq_unlock_irqrestore() to -rq_unlock() which could re-enable IRQ prematurely leading to the following -warning: - - raw_local_irq_restore() called with IRQs enabled - WARNING: CPU: 1 PID: 96 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x30/0x40 - ... - Sched_ext: create_dsq (enabling) - pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) - pc : warn_bogus_irq_restore+0x30/0x40 - lr : warn_bogus_irq_restore+0x30/0x40 - ... - Call trace: - warn_bogus_irq_restore+0x30/0x40 (P) - warn_bogus_irq_restore+0x30/0x40 (L) - scx_ops_bypass+0x224/0x3b8 - scx_ops_enable.isra.0+0x2c8/0xaa8 - bpf_scx_reg+0x18/0x30 - ... - irq event stamp: 33739 - hardirqs last enabled at (33739): [] scx_ops_bypass+0x174/0x3b8 - hardirqs last disabled at (33738): [] _raw_spin_lock_irqsave+0xb4/0xd8 - -Drop the stray _irqrestore(). - -Signed-off-by: Tejun Heo -Reported-by: Ihor Solodrai -Link: http://lkml.kernel.org/r/qC39k3UsonrBYD_SmuxHnZIQLsuuccoCrkiqb_BT7DvH945A1_LZwE4g-5Pu9FcCtqZt4lY1HhIPi0homRuNWxkgo1rgP3bkxa0donw8kV4=@pm.me -Fixes: 0e7ffff1b811 ("scx: Fix raciness in scx_ops_bypass()") -Cc: stable@vger.kernel.org # v6.12 ---- - kernel/sched/ext.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c -index 7fff1d045477..98519e6d0dcd 100644 ---- a/kernel/sched/ext.c -+++ b/kernel/sched/ext.c -@@ -4763,7 +4763,7 @@ static void scx_ops_bypass(bool bypass) - * sees scx_rq_bypassing() before moving tasks to SCX. - */ - if (!scx_enabled()) { -- rq_unlock_irqrestore(rq, &rf); -+ rq_unlock(rq, &rf); - continue; - } - --- -2.47.1 - diff --git a/ci/diffs/9999-scx-Fix-maximal-BPF-selftest-prog.patch b/ci/diffs/9999-scx-Fix-maximal-BPF-selftest-prog.patch deleted file mode 100644 index 9b5e6d5..0000000 --- a/ci/diffs/9999-scx-Fix-maximal-BPF-selftest-prog.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 70414cacbe536a197d56f58a42f9563e5a01b8ec Mon Sep 17 00:00:00 2001 -From: David Vernet -Date: Mon, 9 Dec 2024 09:29:24 -0600 -Subject: [PATCH] scx: Fix maximal BPF selftest prog - -maximal.bpf.c is still dispatching to and consuming from SCX_DSQ_GLOBAL. -Let's have it use its own DSQ to avoid any runtime errors. - -Signed-off-by: David Vernet ---- - tools/testing/selftests/sched_ext/maximal.bpf.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c -index 4c005fa71810..430f5e13bf55 100644 ---- a/tools/testing/selftests/sched_ext/maximal.bpf.c -+++ b/tools/testing/selftests/sched_ext/maximal.bpf.c -@@ -12,6 +12,8 @@ - - char _license[] SEC("license") = "GPL"; - -+#define DSQ_ID 0 -+ - s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu, - u64 wake_flags) - { -@@ -20,7 +22,7 @@ s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu, - - void BPF_STRUCT_OPS(maximal_enqueue, struct task_struct *p, u64 enq_flags) - { -- scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); -+ scx_bpf_dsq_insert(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); - } - - void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) -@@ -28,7 +30,7 @@ void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) - - void BPF_STRUCT_OPS(maximal_dispatch, s32 cpu, struct task_struct *prev) - { -- scx_bpf_dsq_move_to_local(SCX_DSQ_GLOBAL); -+ scx_bpf_dsq_move_to_local(DSQ_ID); - } - - void BPF_STRUCT_OPS(maximal_runnable, struct task_struct *p, u64 enq_flags) -@@ -123,7 +125,7 @@ void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight) - - s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init) - { -- return 0; -+ return scx_bpf_create_dsq(DSQ_ID, -1); - } - - void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info) --- -2.47.1 -