Skip to content

Commit a705c27

Browse files
committed
veristat: memory accounting for bpf programs
This commit adds a new field mem_peak / "Peak memory" field to a set of gathered statistics. The field is intended as an estimate for peak verifier memory consumption for processing of a given program. Mechanically stat is collected as follows: - At the beginning of handle_verif_mode() a new cgroup namespace is created and cgroup fs is mounted in this namespace, memory controller is enabled for the root cgroup. - At each program load: - bpf_object__load() is split into bpf_object__prepare() and bpf_object__load() to avoid accounting for memory allocated for maps; - before bpf_object__load() a new cgroup is created and veristat process enters this cgroup, "memory.peak" of the new cgroup is stashed; - after bpf_object__load() the difference between current "memory.peak" and stashed "memory.peak" is used as a metric, veristat exits the cgroup and cgroup is discarded. If any of the above steps fails veristat would proceed w/o collecting mem_peak information for a program. The change has impact on veristat running time, e.g. for all test_progs object files there is an increase from 82s to 102s. I take a correlation between "Peak states" and "Peak memory" fields as a sanity check for gathered statistics, e.g. here is a sample of data for sched_ext programs: File Program Peak states Peak memory (KiB) --------- -------------------- ----------- ----------------- bpf.bpf.o lavd_select_cpu 1311 26256 bpf.bpf.o lavd_enqueue 1140 22720 bpf.bpf.o layered_enqueue 777 11504 bpf.bpf.o layered_dispatch 578 7976 bpf.bpf.o lavd_dispatch 634 6204 bpf.bpf.o rusty_init 343 5352 bpf.bpf.o lavd_init 361 5092 ... bpf.bpf.o rusty_exit_task 36 256 bpf.bpf.o rusty_running 19 256 bpf.bpf.o bpfland_dispatch 3 0 bpf.bpf.o bpfland_enable 1 0 Signed-off-by: Eduard Zingerman <[email protected]>
1 parent 744742c commit a705c27

File tree

1 file changed

+242
-7
lines changed

1 file changed

+242
-7
lines changed

tools/testing/selftests/bpf/veristat.c

Lines changed: 242 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <signal.h>
1313
#include <fcntl.h>
1414
#include <unistd.h>
15+
#include <sys/mount.h>
1516
#include <sys/time.h>
1617
#include <sys/sysinfo.h>
1718
#include <sys/stat.h>
@@ -49,6 +50,7 @@ enum stat_id {
4950
STACK,
5051
PROG_TYPE,
5152
ATTACH_TYPE,
53+
MEMORY_PEAK,
5254

5355
FILE_NAME,
5456
PROG_NAME,
@@ -208,6 +210,9 @@ static struct env {
208210
int top_src_lines;
209211
struct var_preset *presets;
210212
int npresets;
213+
char cgroup_fs_mount[PATH_MAX + 1];
214+
char stat_cgroup[PATH_MAX + 1];
215+
int memory_peak_fd;
211216
} env;
212217

213218
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -219,6 +224,22 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va
219224
return vfprintf(stderr, format, args);
220225
}
221226

227+
#define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__)
228+
229+
__printf(3, 4)
230+
static int log_errno_aux(const char *file, int line, const char *fmt, ...)
231+
{
232+
int err = -errno;
233+
va_list ap;
234+
235+
va_start(ap, fmt);
236+
fprintf(stderr, "%s:%d: ", file, line);
237+
vfprintf(stderr, fmt, ap);
238+
fprintf(stderr, " failed with error '%s'\n", strerror(errno));
239+
va_end(ap);
240+
return err;
241+
}
242+
222243
#ifndef VERISTAT_VERSION
223244
#define VERISTAT_VERSION "<kernel>"
224245
#endif
@@ -734,13 +755,13 @@ static int append_file_from_file(const char *path)
734755
}
735756

736757
static const struct stat_specs default_csv_output_spec = {
737-
.spec_cnt = 14,
758+
.spec_cnt = 15,
738759
.ids = {
739760
FILE_NAME, PROG_NAME, VERDICT, DURATION,
740761
TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
741762
MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
742763
SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE,
743-
STACK,
764+
STACK, MEMORY_PEAK,
744765
},
745766
};
746767

@@ -781,6 +802,7 @@ static struct stat_def {
781802
[STACK] = {"Stack depth", {"stack_depth", "stack"}, },
782803
[PROG_TYPE] = { "Program type", {"prog_type"}, },
783804
[ATTACH_TYPE] = { "Attach type", {"attach_type", }, },
805+
[MEMORY_PEAK] = { "Peak memory (KiB)", {"mem_peak", }, },
784806
};
785807

786808
static bool parse_stat_id_var(const char *name, size_t len, int *id,
@@ -1278,16 +1300,213 @@ static int max_verifier_log_size(void)
12781300
return log_size;
12791301
}
12801302

1303+
__printf(2, 3)
1304+
static int write_one_line(const char *file, const char *fmt, ...)
1305+
{
1306+
int err, saved_errno;
1307+
va_list ap;
1308+
FILE *f;
1309+
1310+
f = fopen(file, "w");
1311+
if (!f)
1312+
return -1;
1313+
1314+
va_start(ap, fmt);
1315+
errno = 0;
1316+
err = vfprintf(f, fmt, ap);
1317+
saved_errno = errno;
1318+
va_end(ap);
1319+
fclose(f);
1320+
errno = saved_errno;
1321+
return err < 0 ? -1 : 0;
1322+
}
1323+
1324+
/*
1325+
* This works around GCC warning about snprintf truncating strings like:
1326+
*
1327+
* char a[PATH_MAX], b[PATH_MAX];
1328+
* snprintf(a, "%s/foo", b); // triggers -Wformat-truncation
1329+
*/
1330+
__printf(3, 4)
1331+
static int snprintf_trunc(char *str, volatile size_t size, const char *fmt, ...)
1332+
{
1333+
va_list ap;
1334+
int ret;
1335+
1336+
va_start(ap, fmt);
1337+
ret = vsnprintf(str, size, fmt, ap);
1338+
va_end(ap);
1339+
return ret;
1340+
}
1341+
1342+
static void destroy_stat_cgroup(void);
1343+
static void umount_cgroupfs(void);
1344+
1345+
/*
1346+
* Enters new cgroup namespace and mounts cgroupfs at /tmp/veristat-cgroup-mount-XXXXXX,
1347+
* enables "memory" controller for the root cgroup.
1348+
*/
1349+
static int mount_cgroupfs(void)
1350+
{
1351+
char buf[PATH_MAX + 1];
1352+
int err;
1353+
1354+
env.memory_peak_fd = -1;
1355+
1356+
err = unshare(CLONE_NEWCGROUP);
1357+
if (err < 0) {
1358+
err = log_errno("unshare(CLONE_NEWCGROUP)");
1359+
goto err_out;
1360+
}
1361+
1362+
snprintf_trunc(buf, sizeof(buf), "%s/veristat-cgroup-mount-XXXXXX", P_tmpdir);
1363+
if (mkdtemp(buf) == NULL) {
1364+
err = log_errno("mkdtemp(%s)", buf);
1365+
goto err_out;
1366+
}
1367+
strcpy(env.cgroup_fs_mount, buf);
1368+
1369+
err = mount("none", env.cgroup_fs_mount, "cgroup2", 0, NULL);
1370+
if (err < 0) {
1371+
err = log_errno("mount none %s -t cgroup2", env.cgroup_fs_mount);
1372+
goto err_out;
1373+
}
1374+
1375+
snprintf_trunc(buf, sizeof(buf), "%s/cgroup.subtree_control", env.cgroup_fs_mount);
1376+
err = write_one_line(buf, "+memory\n");
1377+
if (err < 0) {
1378+
err = log_errno("echo '+memory' > %s", buf);
1379+
goto err_out;
1380+
}
1381+
1382+
return 0;
1383+
1384+
err_out:
1385+
umount_cgroupfs();
1386+
return err;
1387+
}
1388+
1389+
static void umount_cgroupfs(void)
1390+
{
1391+
int err;
1392+
1393+
if (!env.cgroup_fs_mount[0])
1394+
return;
1395+
1396+
err = umount(env.cgroup_fs_mount);
1397+
if (err < 0)
1398+
log_errno("umount %s", env.cgroup_fs_mount);
1399+
1400+
err = rmdir(env.cgroup_fs_mount);
1401+
if (err < 0)
1402+
log_errno("rmdir %s", env.cgroup_fs_mount);
1403+
1404+
env.cgroup_fs_mount[0] = 0;
1405+
}
1406+
1407+
/*
1408+
* Creates a cgroup at /tmp/veristat-cgroup-mount-XXXXXX/accounting-<pid>,
1409+
* moves current process to this cgroup.
1410+
*/
1411+
static int create_stat_cgroup(void)
1412+
{
1413+
char buf[PATH_MAX + 1];
1414+
int err;
1415+
1416+
if (!env.cgroup_fs_mount[0])
1417+
return -1;
1418+
1419+
env.memory_peak_fd = -1;
1420+
1421+
snprintf_trunc(buf, sizeof(buf), "%s/accounting-%d", env.cgroup_fs_mount, getpid());
1422+
err = mkdir(buf, 0777);
1423+
if (err < 0) {
1424+
err = log_errno("mkdir(%s)", buf);
1425+
goto err_out;
1426+
}
1427+
strcpy(env.stat_cgroup, buf);
1428+
1429+
snprintf_trunc(buf, sizeof(buf), "%s/cgroup.procs", env.stat_cgroup);
1430+
err = write_one_line(buf, "%d\n", getpid());
1431+
if (err < 0) {
1432+
err = log_errno("echo %d > %s", getpid(), buf);
1433+
goto err_out;
1434+
}
1435+
1436+
snprintf_trunc(buf, sizeof(buf), "%s/memory.peak", env.stat_cgroup);
1437+
env.memory_peak_fd = open(buf, O_RDWR | O_APPEND);
1438+
if (env.memory_peak_fd < 0) {
1439+
err = log_errno("open(%s)", buf);
1440+
goto err_out;
1441+
}
1442+
1443+
return 0;
1444+
1445+
err_out:
1446+
destroy_stat_cgroup();
1447+
return err;
1448+
}
1449+
1450+
static void destroy_stat_cgroup(void)
1451+
{
1452+
char buf[PATH_MAX];
1453+
int err;
1454+
1455+
close(env.memory_peak_fd);
1456+
1457+
if (env.cgroup_fs_mount[0]) {
1458+
snprintf_trunc(buf, sizeof(buf), "%s/cgroup.procs", env.cgroup_fs_mount);
1459+
err = write_one_line(buf, "%d\n", getpid());
1460+
if (err < 0)
1461+
log_errno("echo %d > %s", getpid(), buf);
1462+
}
1463+
1464+
if (env.stat_cgroup[0]) {
1465+
err = rmdir(env.stat_cgroup);
1466+
if (err < 0)
1467+
log_errno("rmdir %s", env.stat_cgroup);
1468+
}
1469+
1470+
env.stat_cgroup[0] = 0;
1471+
}
1472+
1473+
/* Current value of /tmp/veristat-cgroup-mount-XXXXXX/accounting-<pid>/memory.peak */
1474+
static long cgroup_memory_peak(void)
1475+
{
1476+
long err, memory_peak;
1477+
char buf[32];
1478+
1479+
if (env.memory_peak_fd < 0)
1480+
return -1;
1481+
1482+
err = pread(env.memory_peak_fd, buf, sizeof(buf) - 1, 0);
1483+
if (err <= 0) {
1484+
log_errno("read(%s/memory.peak)", env.stat_cgroup);
1485+
return -1;
1486+
}
1487+
1488+
buf[err] = 0;
1489+
errno = 0;
1490+
memory_peak = strtoll(buf, NULL, 10);
1491+
if (errno) {
1492+
log_errno("unrecognized %s/memory.peak format: %s", env.stat_cgroup, buf);
1493+
return -1;
1494+
}
1495+
1496+
return memory_peak;
1497+
}
1498+
12811499
static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
12821500
{
12831501
const char *base_filename = basename(strdupa(filename));
12841502
const char *prog_name = bpf_program__name(prog);
1503+
long mem_peak_a, mem_peak_b, mem_peak = -1;
12851504
char *buf;
12861505
int buf_sz, log_level;
12871506
struct verif_stats *stats;
12881507
struct bpf_prog_info info;
12891508
__u32 info_len = sizeof(info);
1290-
int err = 0;
1509+
int err = 0, cgroup_err;
12911510
void *tmp;
12921511
int fd;
12931512

@@ -1332,7 +1551,16 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
13321551
if (env.force_reg_invariants)
13331552
bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
13341553

1335-
err = bpf_object__load(obj);
1554+
err = bpf_object__prepare(obj);
1555+
if (!err) {
1556+
cgroup_err = create_stat_cgroup();
1557+
mem_peak_a = cgroup_memory_peak();
1558+
err = bpf_object__load(obj);
1559+
mem_peak_b = cgroup_memory_peak();
1560+
destroy_stat_cgroup();
1561+
if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0)
1562+
mem_peak = mem_peak_b - mem_peak_a;
1563+
}
13361564
env.progs_processed++;
13371565

13381566
stats->file_name = strdup(base_filename);
@@ -1341,6 +1569,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
13411569
stats->stats[SIZE] = bpf_program__insn_cnt(prog);
13421570
stats->stats[PROG_TYPE] = bpf_program__type(prog);
13431571
stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog);
1572+
stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / 1024;
13441573

13451574
memset(&info, 0, info_len);
13461575
fd = bpf_program__fd(prog);
@@ -1824,6 +2053,7 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
18242053
case TOTAL_STATES:
18252054
case PEAK_STATES:
18262055
case MAX_STATES_PER_INSN:
2056+
case MEMORY_PEAK:
18272057
case MARK_READ_MAX_LEN: {
18282058
long v1 = s1->stats[id];
18292059
long v2 = s2->stats[id];
@@ -2053,6 +2283,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id,
20532283
case STACK:
20542284
case SIZE:
20552285
case JITED_SIZE:
2286+
case MEMORY_PEAK:
20562287
*val = s ? s->stats[id] : 0;
20572288
break;
20582289
default:
@@ -2139,6 +2370,7 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats
21392370
case MARK_READ_MAX_LEN:
21402371
case SIZE:
21412372
case JITED_SIZE:
2373+
case MEMORY_PEAK:
21422374
case STACK: {
21432375
long val;
21442376
int err, n;
@@ -2776,27 +3008,30 @@ static void output_prog_stats(void)
27763008

27773009
static int handle_verif_mode(void)
27783010
{
2779-
int i, err;
3011+
int i, err = 0;
27803012

27813013
if (env.filename_cnt == 0) {
27823014
fprintf(stderr, "Please provide path to BPF object file!\n\n");
27833015
argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
27843016
return -EINVAL;
27853017
}
27863018

3019+
mount_cgroupfs();
27873020
for (i = 0; i < env.filename_cnt; i++) {
27883021
err = process_obj(env.filenames[i]);
27893022
if (err) {
27903023
fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
2791-
return err;
3024+
goto out;
27923025
}
27933026
}
27943027

27953028
qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
27963029

27973030
output_prog_stats();
27983031

2799-
return 0;
3032+
out:
3033+
umount_cgroupfs();
3034+
return err;
28003035
}
28013036

28023037
static int handle_replay_mode(void)

0 commit comments

Comments
 (0)