Skip to content

Commit d70c937

Browse files
author
Rafael Aquini
committed
userfaultfd: move userfaultfd_ctx struct to header file
JIRA: https://issues.redhat.com/browse/RHEL-84184 JIRA: https://issues.redhat.com/browse/RHEL-80529 This patch is a backport of the following upstream commit: commit f91e6b4 Author: Lokesh Gidra <[email protected]> Date: Thu Feb 15 10:27:53 2024 -0800 userfaultfd: move userfaultfd_ctx struct to header file Patch series "per-vma locks in userfaultfd", v7. Performing userfaultfd operations (like copy/move etc.) in critical section of mmap_lock (read-mode) causes significant contention on the lock when operations requiring the lock in write-mode are taking place concurrently. We can use per-vma locks instead to significantly reduce the contention issue. Android runtime's Garbage Collector uses userfaultfd for concurrent compaction. mmap-lock contention during compaction potentially causes jittery experience for the user. During one such reproducible scenario, we observed the following improvements with this patch-set: - Wall clock time of compaction phase came down from ~3s to <500ms - Uninterruptible sleep time (across all threads in the process) was ~10ms (none in mmap_lock) during compaction, instead of >20s This patch (of 4): Move the struct to userfaultfd_k.h to be accessible from mm/userfaultfd.c. There are no other changes in the struct. This is required to prepare for using per-vma locks in userfaultfd operations. Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Lokesh Gidra <[email protected]> Reviewed-by: Mike Rapoport (IBM) <[email protected]> Reviewed-by: Liam R. Howlett <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Axel Rasmussen <[email protected]> Cc: Brian Geffon <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Jann Horn <[email protected]> Cc: Kalesh Singh <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Nicolas Geoffray <[email protected]> Cc: Peter Xu <[email protected]> Cc: Ryan Roberts <[email protected]> Cc: Suren Baghdasaryan <[email protected]> Cc: Tim Murray <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Rafael Aquini <[email protected]>
1 parent 4480132 commit d70c937

File tree

2 files changed

+39
-39
lines changed

2 files changed

+39
-39
lines changed

fs/userfaultfd.c

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -37,45 +37,6 @@ int sysctl_unprivileged_userfaultfd __read_mostly;
3737

3838
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
3939

40-
/*
41-
* Start with fault_pending_wqh and fault_wqh so they're more likely
42-
* to be in the same cacheline.
43-
*
44-
* Locking order:
45-
* fd_wqh.lock
46-
* fault_pending_wqh.lock
47-
* fault_wqh.lock
48-
* event_wqh.lock
49-
*
50-
* To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
51-
* since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
52-
* also taken in IRQ context.
53-
*/
54-
struct userfaultfd_ctx {
55-
/* waitqueue head for the pending (i.e. not read) userfaults */
56-
wait_queue_head_t fault_pending_wqh;
57-
/* waitqueue head for the userfaults */
58-
wait_queue_head_t fault_wqh;
59-
/* waitqueue head for the pseudo fd to wakeup poll/read */
60-
wait_queue_head_t fd_wqh;
61-
/* waitqueue head for events */
62-
wait_queue_head_t event_wqh;
63-
/* a refile sequence protected by fault_pending_wqh lock */
64-
seqcount_spinlock_t refile_seq;
65-
/* pseudo fd refcounting */
66-
refcount_t refcount;
67-
/* userfaultfd syscall flags */
68-
unsigned int flags;
69-
/* features requested from the userspace */
70-
unsigned int features;
71-
/* released */
72-
bool released;
73-
/* memory mappings are changing because of non-cooperative event */
74-
atomic_t mmap_changing;
75-
/* mm with one ore more vmas attached to this userfaultfd_ctx */
76-
struct mm_struct *mm;
77-
};
78-
7940
struct userfaultfd_fork_ctx {
8041
struct userfaultfd_ctx *orig;
8142
struct userfaultfd_ctx *new;

include/linux/userfaultfd_k.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,45 @@
3636
#define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
3737
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
3838

39+
/*
40+
* Start with fault_pending_wqh and fault_wqh so they're more likely
41+
* to be in the same cacheline.
42+
*
43+
* Locking order:
44+
* fd_wqh.lock
45+
* fault_pending_wqh.lock
46+
* fault_wqh.lock
47+
* event_wqh.lock
48+
*
49+
* To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
50+
* since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
51+
* also taken in IRQ context.
52+
*/
53+
struct userfaultfd_ctx {
54+
/* waitqueue head for the pending (i.e. not read) userfaults */
55+
wait_queue_head_t fault_pending_wqh;
56+
/* waitqueue head for the userfaults */
57+
wait_queue_head_t fault_wqh;
58+
/* waitqueue head for the pseudo fd to wakeup poll/read */
59+
wait_queue_head_t fd_wqh;
60+
/* waitqueue head for events */
61+
wait_queue_head_t event_wqh;
62+
/* a refile sequence protected by fault_pending_wqh lock */
63+
seqcount_spinlock_t refile_seq;
64+
/* pseudo fd refcounting */
65+
refcount_t refcount;
66+
/* userfaultfd syscall flags */
67+
unsigned int flags;
68+
/* features requested from the userspace */
69+
unsigned int features;
70+
/* released */
71+
bool released;
72+
/* memory mappings are changing because of non-cooperative event */
73+
atomic_t mmap_changing;
74+
/* mm with one ore more vmas attached to this userfaultfd_ctx */
75+
struct mm_struct *mm;
76+
};
77+
3978
extern int sysctl_unprivileged_userfaultfd;
4079

4180
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);

0 commit comments

Comments
 (0)