Skip to content

Commit 88c0702

Browse files
committed
layered: get cpuset support working with masks anded
1 parent 3e3bf57 commit 88c0702

File tree

5 files changed

+77
-7
lines changed

5 files changed

+77
-7
lines changed

scheds/rust/scx_layered/src/bpf/intf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ enum consts {
3030
MAX_TASKS = 131072,
3131
MAX_PATH = 4096,
3232
MAX_NUMA_NODES = 64,
33+
MAX_CONTAINERS = 64,
3334
MAX_LLCS = 64,
3435
MAX_COMM = 16,
3536
MAX_LAYER_MATCH_ORS = 32,

scheds/rust/scx_layered/src/bpf/main.bpf.c

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,11 @@ const volatile u64 numa_cpumasks[MAX_NUMA_NODES][MAX_CPUS / 64];
3434
const volatile u32 llc_numa_id_map[MAX_LLCS];
3535
const volatile u32 cpu_llc_id_map[MAX_CPUS];
3636
const volatile u32 nr_layers = 1;
37+
const volatile u32 nr_containers = 1;
3738
const volatile u32 nr_nodes = 32; /* !0 for veristat, set during init */
3839
const volatile u32 nr_llcs = 32; /* !0 for veristat, set during init */
3940
const volatile bool smt_enabled = true;
41+
const volatile bool enable_container = true;
4042
const volatile bool has_little_cores = true;
4143
const volatile bool xnuma_preemption = false;
4244
const volatile s32 __sibling_cpu[MAX_CPUS];
@@ -53,6 +55,7 @@ const volatile u64 lo_fb_wait_ns = 5000000; /* !0 for veristat */
5355
const volatile u32 lo_fb_share_ppk = 128; /* !0 for veristat */
5456
const volatile bool percpu_kthread_preempt = true;
5557
volatile u64 layer_refresh_seq_avgruntime;
58+
const volatile u64 cpuset_fakemasks[MAX_CONTAINERS][MAX_CPUS / 64];
5659

5760
/* Flag to enable or disable antistall feature */
5861
const volatile bool enable_antistall = true;
@@ -66,6 +69,10 @@ u64 unprotected_seq = 0;
6669

6770
private(all_cpumask) struct bpf_cpumask __kptr *all_cpumask;
6871
private(big_cpumask) struct bpf_cpumask __kptr *big_cpumask;
72+
// XXXLIKEWHATEVS -- this should be a map of kptrs.
73+
// for now use one cpumask consisting of all cpuset cpumasks
74+
// anded.
75+
private(cpuset_cpumask) struct bpf_cpumask __kptr *cpuset_cpumask;
6976
struct layer layers[MAX_LAYERS];
7077
u32 fallback_cpu;
7178
u32 layered_root_tgid = 0;
@@ -3313,8 +3320,8 @@ static s32 init_cpu(s32 cpu, int *nr_online_cpus,
33133320

33143321
s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
33153322
{
3316-
struct bpf_cpumask *cpumask, *tmp_big_cpumask, *tmp_unprotected_cpumask;
3317-
int i, nr_online_cpus, ret;
3323+
struct bpf_cpumask *cpumask, *tmp_big_cpumask, *tmp_unprotected_cpumask, tmptmp;
3324+
int i, nr_online_cpus, ret, x;
33183325

33193326
cpumask = bpf_cpumask_create();
33203327
if (!cpumask)
@@ -3356,6 +3363,41 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
33563363
if (tmp_unprotected_cpumask)
33573364
bpf_cpumask_release(tmp_unprotected_cpumask);
33583365

3366+
3367+
3368+
if (enable_container) {
3369+
bpf_for(i, 0, nr_containers) {
3370+
cpumask = bpf_cpumask_create();
3371+
3372+
if (!cpumask)
3373+
return -ENOMEM;
3374+
3375+
bpf_for(x, 0, MAX_CPUS/64) {
3376+
// container then cpu bit
3377+
if (cpuset_fakemasks[i][x] == 1) {
3378+
bpf_cpumask_set_cpu(x, cpumask);
3379+
}
3380+
}
3381+
3382+
if (cpuset_cpumask) {
3383+
struct bpf_cpumask *tmp_cpuset_cpumask = bpf_kptr_xchg(&cpuset_cpumask, NULL);
3384+
if (!tmp_cpuset_cpumask) {
3385+
bpf_cpumask_release(cpumask);
3386+
return -1;
3387+
}
3388+
bpf_cpumask_and(cpumask, cast_mask(tmp_cpuset_cpumask), cast_mask(cpumask));
3389+
bpf_cpumask_release(tmp_cpuset_cpumask);
3390+
}
3391+
3392+
struct bpf_cpumask *old_cpumask = bpf_kptr_xchg(&cpuset_cpumask, cpumask);
3393+
3394+
if (old_cpumask) {
3395+
bpf_cpumask_release(old_cpumask);
3396+
}
3397+
3398+
}
3399+
}
3400+
33593401
bpf_for(i, 0, nr_nodes) {
33603402
ret = create_node(i);
33613403
if (ret)

scheds/rust/scx_layered/src/layer_core_growth.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ use std::collections::BTreeSet;
8989

9090
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
9191
pub struct CpuSet {
92-
cpus: BTreeSet<usize>,
93-
cores: BTreeSet<usize>,
92+
pub cpus: BTreeSet<usize>,
93+
pub cores: BTreeSet<usize>,
9494
}
9595

9696
fn parse_cpu_ranges(s: &str) -> Result<BTreeSet<usize>> {
@@ -126,7 +126,7 @@ fn collect_cpuset_effective() -> Result<BTreeSet<BTreeSet<usize>>> {
126126
}
127127

128128
// return cpuset layout.
129-
fn get_cpusets(topo: &Topology) -> Result<BTreeSet<CpuSet>> {
129+
pub fn get_cpusets(topo: &Topology) -> Result<BTreeSet<CpuSet>> {
130130
let mut cpusets: BTreeSet<CpuSet> = BTreeSet::new();
131131
let cpuset_cpus = collect_cpuset_effective()?;
132132
for x in cpuset_cpus {

scheds/rust/scx_layered/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// This software may be used and distributed according to the terms of the
44
// GNU General Public License version 2.
55
mod config;
6-
mod layer_core_growth;
6+
pub mod layer_core_growth;
77

88
pub mod bpf_intf;
99

@@ -189,7 +189,7 @@ impl CpuPool {
189189
cpus
190190
}
191191

192-
fn get_core_topological_id(&self, core: &Core) -> usize {
192+
pub fn get_core_topological_id(&self, core: &Core) -> usize {
193193
*self
194194
.core_topology_to_id
195195
.get(&(core.node_id, core.llc_id, core.id))

scheds/rust/scx_layered/src/main.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ use stats::LayerStats;
5959
use stats::StatsReq;
6060
use stats::StatsRes;
6161
use stats::SysStats;
62+
use layer_core_growth::get_cpusets;
6263

6364
const MAX_PATH: usize = bpf_intf::consts_MAX_PATH as usize;
6465
const MAX_COMM: usize = bpf_intf::consts_MAX_COMM as usize;
@@ -67,6 +68,7 @@ const MIN_LAYER_WEIGHT: u32 = bpf_intf::consts_MIN_LAYER_WEIGHT;
6768
const MAX_LAYER_MATCH_ORS: usize = bpf_intf::consts_MAX_LAYER_MATCH_ORS as usize;
6869
const MAX_LAYER_NAME: usize = bpf_intf::consts_MAX_LAYER_NAME as usize;
6970
const MAX_LAYERS: usize = bpf_intf::consts_MAX_LAYERS as usize;
71+
const MAX_CPUS: usize = bpf_intf::consts_MAX_CPUS as usize;
7072
const DEFAULT_LAYER_WEIGHT: u32 = bpf_intf::consts_DEFAULT_LAYER_WEIGHT;
7173
const USAGE_HALF_LIFE: u32 = bpf_intf::consts_USAGE_HALF_LIFE;
7274
const USAGE_HALF_LIFE_F64: f64 = USAGE_HALF_LIFE as f64 / 1_000_000_000.0;
@@ -589,6 +591,10 @@ struct Opts {
589591
#[clap(long, default_value = "false")]
590592
disable_antistall: bool,
591593

594+
/// Enable container support
595+
#[clap(long, default_value = "false")]
596+
enable_container: bool,
597+
592598
/// Maximum task runnable_at delay (in seconds) before antistall turns on
593599
#[clap(long, default_value = "3")]
594600
antistall_sec: u64,
@@ -1415,6 +1421,22 @@ impl<'a> Scheduler<'a> {
14151421
Ok(())
14161422
}
14171423

1424+
fn init_cpusets(skel: &mut OpenBpfSkel, topo: &Topology) -> Result<()> {
1425+
let cpusets = get_cpusets(topo)?;
1426+
for (i, cpuset) in cpusets.iter().enumerate() {
1427+
let mut cpumask_bitvec: [u64; MAX_CPUS/64] = [0; MAX_CPUS/64];
1428+
for j in 0..MAX_CPUS/64 {
1429+
if cpuset.cpus.contains(&j) {
1430+
cpumask_bitvec[j] = 1;
1431+
}
1432+
}
1433+
let cpuset_cpumask_slice = &mut skel.maps.rodata_data.cpuset_fakemasks[i];
1434+
cpuset_cpumask_slice.copy_from_slice(&cpumask_bitvec);
1435+
}
1436+
skel.maps.rodata_data.nr_containers = cpusets.len() as u32;
1437+
Ok(())
1438+
}
1439+
14181440
fn init_nodes(skel: &mut OpenBpfSkel, _opts: &Opts, topo: &Topology) {
14191441
skel.maps.rodata_data.nr_nodes = topo.nodes.len() as u32;
14201442
skel.maps.rodata_data.nr_llcs = 0;
@@ -1855,6 +1877,7 @@ impl<'a> Scheduler<'a> {
18551877
skel.maps.rodata_data.lo_fb_wait_ns = opts.lo_fb_wait_us * 1000;
18561878
skel.maps.rodata_data.lo_fb_share_ppk = ((opts.lo_fb_share * 1024.0) as u32).clamp(1, 1024);
18571879
skel.maps.rodata_data.enable_antistall = !opts.disable_antistall;
1880+
skel.maps.rodata_data.enable_container = opts.enable_container;
18581881
skel.maps.rodata_data.enable_gpu_support = opts.enable_gpu_support;
18591882

18601883
for (cpu, sib) in topo.sibling_cpus().iter().enumerate() {
@@ -1922,6 +1945,10 @@ impl<'a> Scheduler<'a> {
19221945

19231946
Self::init_layers(&mut skel, &layer_specs, &topo)?;
19241947
Self::init_nodes(&mut skel, opts, &topo);
1948+
1949+
if opts.enable_container {
1950+
Self::init_cpusets(&mut skel, &topo)?;
1951+
}
19251952

19261953
// We set the pin path before loading the skeleton. This will ensure
19271954
// libbpf creates and pins the map, or reuses the pinned map fd for us,

0 commit comments

Comments
 (0)