Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fs/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -994,7 +994,7 @@ static int exec_mmap(struct mm_struct *mm)
active_mm = tsk->active_mm;
tsk->active_mm = mm;
tsk->mm = mm;
mm_init_cid(mm);
mm_init_cid(mm, tsk);
/*
* This prevents preemption while active_mm is being loaded and
* it and mm are being updated, which could cause problems for
Expand Down
72 changes: 63 additions & 9 deletions include/linux/mm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,7 @@ struct vm_area_struct {
struct mm_cid {
u64 time;
int cid;
int recent_cid;
};
#endif

Expand Down Expand Up @@ -882,6 +883,27 @@ struct mm_struct {
* When the next mm_cid scan is due (in jiffies).
*/
unsigned long mm_cid_next_scan;
/**
* @nr_cpus_allowed: Number of CPUs allowed for mm.
*
* Number of CPUs allowed in the union of all mm's
* threads allowed CPUs.
*/
unsigned int nr_cpus_allowed;
/**
* @max_nr_cid: Maximum number of concurrency IDs allocated.
*
* Track the highest number of concurrency IDs allocated for the
* mm.
*/
atomic_t max_nr_cid;
/**
* @cpus_allowed_lock: Lock protecting mm cpus_allowed.
*
* Provide mutual exclusion for mm cpus_allowed and
* mm nr_cpus_allowed updates.
*/
raw_spinlock_t cpus_allowed_lock;
#endif
#ifdef CONFIG_MMU
atomic_long_t pgtables_bytes; /* size of all page tables */
Expand Down Expand Up @@ -1200,36 +1222,53 @@ static inline int mm_cid_clear_lazy_put(int cid)
return cid & ~MM_CID_LAZY_PUT;
}

/*
* mm_cpus_allowed: Union of all mm's threads allowed CPUs.
*/
static inline cpumask_t *mm_cpus_allowed(struct mm_struct *mm)
{
unsigned long bitmap = (unsigned long)mm;

bitmap += offsetof(struct mm_struct, cpu_bitmap);
/* Skip cpu_bitmap */
bitmap += cpumask_size();
return (struct cpumask *)bitmap;
}

/* Accessor for struct mm_struct's cidmask. */
static inline cpumask_t *mm_cidmask(struct mm_struct *mm)
{
unsigned long cid_bitmap = (unsigned long)mm;
unsigned long cid_bitmap = (unsigned long)mm_cpus_allowed(mm);

cid_bitmap += offsetof(struct mm_struct, cpu_bitmap);
/* Skip cpu_bitmap */
/* Skip mm_cpus_allowed */
cid_bitmap += cpumask_size();
return (struct cpumask *)cid_bitmap;
}

static inline void mm_init_cid(struct mm_struct *mm)
static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
{
int i;

for_each_possible_cpu(i) {
struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i);

pcpu_cid->cid = MM_CID_UNSET;
pcpu_cid->recent_cid = MM_CID_UNSET;
pcpu_cid->time = 0;
}
mm->nr_cpus_allowed = p->nr_cpus_allowed;
atomic_set(&mm->max_nr_cid, 0);
raw_spin_lock_init(&mm->cpus_allowed_lock);
cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask);
cpumask_clear(mm_cidmask(mm));
}

static inline int mm_alloc_cid_noprof(struct mm_struct *mm)
static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *p)
{
mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid);
if (!mm->pcpu_cid)
return -ENOMEM;
mm_init_cid(mm);
mm_init_cid(mm, p);
return 0;
}
#define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__))
Expand All @@ -1242,16 +1281,31 @@ static inline void mm_destroy_cid(struct mm_struct *mm)

static inline unsigned int mm_cid_size(void)
{
return cpumask_size();
return 2 * cpumask_size(); /* mm_cpus_allowed(), mm_cidmask(). */
Copy link

Copilot AI Oct 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected spelling of 'cidmask' to match the actual function name 'mm_cidmask()'.

Suggested change
return 2 * cpumask_size(); /* mm_cpus_allowed(), mm_cidmask(). */
return 2 * cpumask_size(); /* mm_cpus_allowed(), mm_cidmask. */

Copilot uses AI. Check for mistakes.
}

static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask)
{
struct cpumask *mm_allowed = mm_cpus_allowed(mm);

if (!mm)
return;
/* The mm_cpus_allowed is the union of each thread allowed CPUs masks. */
raw_spin_lock(&mm->cpus_allowed_lock);
cpumask_or(mm_allowed, mm_allowed, cpumask);
WRITE_ONCE(mm->nr_cpus_allowed, cpumask_weight(mm_allowed));
raw_spin_unlock(&mm->cpus_allowed_lock);
}
#else /* CONFIG_SCHED_MM_CID */
static inline void mm_init_cid(struct mm_struct *mm) { }
static inline int mm_alloc_cid(struct mm_struct *mm) { return 0; }
static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { }
static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; }
static inline void mm_destroy_cid(struct mm_struct *mm) { }

static inline unsigned int mm_cid_size(void)
{
return 0;
}
static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) { }
#endif /* CONFIG_SCHED_MM_CID */

struct mmu_gather;
Expand Down
2 changes: 1 addition & 1 deletion kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -1305,7 +1305,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
if (init_new_context(p, mm))
goto fail_nocontext;

if (mm_alloc_cid(mm))
if (mm_alloc_cid(mm, p))
goto fail_cid;

if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT,
Expand Down
22 changes: 13 additions & 9 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2705,6 +2705,7 @@ __do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx)
put_prev_task(rq, p);

p->sched_class->set_cpus_allowed(p, ctx);
mm_set_cpus_allowed(p->mm, ctx->new_mask);

if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
Expand Down Expand Up @@ -10266,6 +10267,7 @@ int __sched_mm_cid_migrate_from_try_steal_cid(struct rq *src_rq,
*/
if (!try_cmpxchg(&src_pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
return -1;
WRITE_ONCE(src_pcpu_cid->recent_cid, MM_CID_UNSET);
return src_cid;
}

Expand All @@ -10278,7 +10280,8 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
{
struct mm_cid *src_pcpu_cid, *dst_pcpu_cid;
struct mm_struct *mm = t->mm;
int src_cid, dst_cid, src_cpu;
int src_cid, src_cpu;
bool dst_cid_is_set;
struct rq *src_rq;

lockdep_assert_rq_held(dst_rq);
Expand All @@ -10295,19 +10298,19 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
* allocation closest to 0 in cases where few threads migrate around
* many CPUs.
*
* If destination cid is already set, we may have to just clear
* the src cid to ensure compactness in frequent migrations
* scenarios.
* If destination cid or recent cid is already set, we may have
* to just clear the src cid to ensure compactness in frequent
* migrations scenarios.
*
* It is not useful to clear the src cid when the number of threads is
* greater or equal to the number of allowed CPUs, because user-space
* can expect that the number of allowed cids can reach the number of
* allowed CPUs.
*/
dst_pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(dst_rq));
dst_cid = READ_ONCE(dst_pcpu_cid->cid);
if (!mm_cid_is_unset(dst_cid) &&
atomic_read(&mm->mm_users) >= t->nr_cpus_allowed)
dst_cid_is_set = !mm_cid_is_unset(READ_ONCE(dst_pcpu_cid->cid)) ||
!mm_cid_is_unset(READ_ONCE(dst_pcpu_cid->recent_cid));
if (dst_cid_is_set && atomic_read(&mm->mm_users) >= READ_ONCE(mm->nr_cpus_allowed))
return;
src_pcpu_cid = per_cpu_ptr(mm->pcpu_cid, src_cpu);
src_rq = cpu_rq(src_cpu);
Expand All @@ -10318,13 +10321,14 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
src_cid);
if (src_cid == -1)
return;
if (!mm_cid_is_unset(dst_cid)) {
if (dst_cid_is_set) {
__mm_cid_put(mm, src_cid);
return;
}
/* Move src_cid to dst cpu. */
mm_cid_snapshot_time(dst_rq, mm);
WRITE_ONCE(dst_pcpu_cid->cid, src_cid);
WRITE_ONCE(dst_pcpu_cid->recent_cid, src_cid);
}

static void sched_mm_cid_remote_clear(struct mm_struct *mm, struct mm_cid *pcpu_cid,
Expand Down Expand Up @@ -10563,7 +10567,7 @@ void sched_mm_cid_after_execve(struct task_struct *t)
* Matches barrier in sched_mm_cid_remote_clear_old().
*/
smp_mb();
t->last_mm_cid = t->mm_cid = mm_cid_get(rq, mm);
t->last_mm_cid = t->mm_cid = mm_cid_get(rq, t, mm);
}
rseq_set_notify_resume(t);
}
Expand Down
48 changes: 34 additions & 14 deletions kernel/sched/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -3635,24 +3635,41 @@ static inline void mm_cid_put(struct mm_struct *mm)
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
}

static inline int __mm_cid_try_get(struct mm_struct *mm)
static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm)
{
struct cpumask *cpumask;
int cid;
struct cpumask *cidmask = mm_cidmask(mm);
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
int cid = __this_cpu_read(pcpu_cid->recent_cid);

cpumask = mm_cidmask(mm);
/* Try to re-use recent cid. This improves cache locality. */
if (!mm_cid_is_unset(cid) && !cpumask_test_and_set_cpu(cid, cidmask))
return cid;
/*
* Expand cid allocation if the maximum number of concurrency
* IDs allocated (max_nr_cid) is below the number cpus allowed
* and number of threads. Expanding cid allocation as much as
* possible improves cache locality.
*/
cid = atomic_read(&mm->max_nr_cid);
while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(&mm->mm_users)) {
if (!atomic_try_cmpxchg(&mm->max_nr_cid, &cid, cid + 1))
continue;
if (!cpumask_test_and_set_cpu(cid, cidmask))
return cid;
}
/*
* Find the first available concurrency id.
* Retry finding first zero bit if the mask is temporarily
* filled. This only happens during concurrent remote-clear
* which owns a cid without holding a rq lock.
*/
for (;;) {
cid = cpumask_first_zero(cpumask);
if (cid < nr_cpu_ids)
cid = cpumask_first_zero(cidmask);
if (cid < READ_ONCE(mm->nr_cpus_allowed))
break;
cpu_relax();
}
if (cpumask_test_and_set_cpu(cid, cpumask))
if (cpumask_test_and_set_cpu(cid, cidmask))
return -1;

return cid;
Expand All @@ -3670,7 +3687,8 @@ static inline void mm_cid_snapshot_time(struct rq *rq, struct mm_struct *mm)
WRITE_ONCE(pcpu_cid->time, rq->clock);
}

static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
static inline int __mm_cid_get(struct rq *rq, struct task_struct *t,
struct mm_struct *mm)
{
int cid;

Expand All @@ -3680,13 +3698,13 @@ static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
* guarantee forward progress.
*/
if (!READ_ONCE(use_cid_lock)) {
cid = __mm_cid_try_get(mm);
cid = __mm_cid_try_get(t, mm);
if (cid >= 0)
goto end;
raw_spin_lock(&cid_lock);
} else {
raw_spin_lock(&cid_lock);
cid = __mm_cid_try_get(mm);
cid = __mm_cid_try_get(t, mm);
if (cid >= 0)
goto unlock;
}
Expand All @@ -3706,7 +3724,7 @@ static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
* all newcoming allocations observe the use_cid_lock flag set.
*/
do {
cid = __mm_cid_try_get(mm);
cid = __mm_cid_try_get(t, mm);
cpu_relax();
} while (cid < 0);
/*
Expand All @@ -3723,7 +3741,8 @@ static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
return cid;
}

static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
static inline int mm_cid_get(struct rq *rq, struct task_struct *t,
struct mm_struct *mm)
{
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
struct cpumask *cpumask;
Expand All @@ -3740,8 +3759,9 @@ static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
}
cid = __mm_cid_get(rq, mm);
cid = __mm_cid_get(rq, t, mm);
__this_cpu_write(pcpu_cid->cid, cid);
__this_cpu_write(pcpu_cid->recent_cid, cid);

return cid;
}
Expand Down Expand Up @@ -3794,7 +3814,7 @@ static inline void switch_mm_cid(struct rq *rq,
prev->mm_cid = -1;
}
if (next->mm_cid_active)
next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm);
next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next, next->mm);
}

#else /* !CONFIG_SCHED_MM_CID: */
Expand Down
Loading