diff --git a/qa/1788 b/qa/1788 index c7820b2587..edccbe1d2b 100755 --- a/qa/1788 +++ b/qa/1788 @@ -2,7 +2,7 @@ # PCP QA Test No. 1788 # Exercise Linux kernel hugepage sysfs metrics. # -# Copyright (c) 2024 Red Hat. All Rights Reserved. +# Copyright (c) 2024-2025 Red Hat. All Rights Reserved. # seq=`basename $0` @@ -18,16 +18,18 @@ echo "QA output created by $seq" _cleanup() { cd $here - $sudo rm -f $PCP_VAR_DIR/config/pmda/60.43 + $sudo rm -f $PCP_VAR_DIR/config/pmda/60.{43,44} _restore_config $PCP_VAR_DIR/config/pmda/60.43 + _restore_config $PCP_VAR_DIR/config/pmda/60.44 $sudo rm -rf $tmp $tmp.* } status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 -# deterministic output - move aside any existing indom cache +# deterministic output - move aside any existing indom caches _save_config $PCP_VAR_DIR/config/pmda/60.43 +_save_config $PCP_VAR_DIR/config/pmda/60.44 _filter_instances() { @@ -40,7 +42,7 @@ export LINUX_STATSPATH=$root pmda=$PCP_PMDAS_DIR/linux/pmda_linux.so,linux_init local="-L -K clear -K add,60,$pmda" -metrics=`pminfo $local mem.hugepages | LC_COLLATE=POSIX sort` +metrics=`pminfo $local mem.hugepages mem.numa.hugepages | LC_COLLATE=POSIX sort` for tgz in $here/linux/sysfs-hugepages-*.tgz do rm -fr $root diff --git a/qa/1788.out b/qa/1788.out index d14a866200..a813e7c83e 100644 --- a/qa/1788.out +++ b/qa/1788.out @@ -37,6 +37,18 @@ mem.hugepages.totalsize inst [N or "32768kB"] value 327680 inst [N or "64kB"] value 6400 +mem.numa.hugepages.free +No value(s) available! + +mem.numa.hugepages.pagesize +No value(s) available! + +mem.numa.hugepages.surplus +No value(s) available! + +mem.numa.hugepages.totalsize +No value(s) available! + == done == Checking hugepages metric values - sysfs-hugepages-002.tgz @@ -65,5 +77,81 @@ mem.hugepages.totalsize inst [N or "1048576kB"] value 104857600 inst [N or "2048kB"] value 4096000 +mem.numa.hugepages.free +No value(s) available! + +mem.numa.hugepages.pagesize +No value(s) available! + +mem.numa.hugepages.surplus +No value(s) available! + +mem.numa.hugepages.totalsize +No value(s) available! + +== done + +== Checking hugepages metric values - sysfs-hugepages-003.tgz + +mem.hugepages.free + inst [N or "1048576kB"] value 104857600 + inst [N or "2048kB"] value 4096000 + inst [N or "32768kB"] value 0 + inst [N or "64kB"] value 0 + +mem.hugepages.overcommit + inst [N or "1048576kB"] value 0 + inst [N or "2048kB"] value 0 + inst [N or "32768kB"] value 0 + inst [N or "64kB"] value 0 + +mem.hugepages.pagesize + inst [N or "1048576kB"] value 1048576 + inst [N or "2048kB"] value 2048 + inst [N or "32768kB"] value 32768 + inst [N or "64kB"] value 64 + +mem.hugepages.reserved + inst [N or "1048576kB"] value 0 + inst [N or "2048kB"] value 16384 + inst [N or "32768kB"] value 0 + inst [N or "64kB"] value 0 + +mem.hugepages.surplus + inst [N or "1048576kB"] value 0 + inst [N or "2048kB"] value 8192 + inst [N or "32768kB"] value 0 + inst [N or "64kB"] value 0 + +mem.hugepages.totalsize + inst [N or "1048576kB"] value 104857600 + inst [N or "2048kB"] value 4096000 + inst [N or "32768kB"] value 0 + inst [N or "64kB"] value 0 + +mem.numa.hugepages.free + inst [N or "node0::1048576kB"] value 104857600 + inst [N or "node0::2048kB"] value 4096000 + inst [N or "node0::32768kB"] value 0 + inst [N or "node0::64kB"] value 0 + +mem.numa.hugepages.pagesize + inst [N or "node0::1048576kB"] value 1048576 + inst [N or "node0::2048kB"] value 2048 + inst [N or "node0::32768kB"] value 32768 + inst [N or "node0::64kB"] value 64 + +mem.numa.hugepages.surplus + inst [N or "node0::1048576kB"] value 0 + inst [N or "node0::2048kB"] value 8192 + inst [N or "node0::32768kB"] value 0 + inst [N or "node0::64kB"] value 0 + +mem.numa.hugepages.totalsize + inst [N or "node0::1048576kB"] value 104857600 + inst [N or "node0::2048kB"] value 4096000 + inst [N or "node0::32768kB"] value 0 + inst [N or "node0::64kB"] value 0 + == done diff --git a/qa/821.out b/qa/821.out index b285b7236e..6880e60523 100644 --- a/qa/821.out +++ b/qa/821.out @@ -137,6 +137,18 @@ mem.numa.alloc.miss mem.numa.alloc.other_node inst [0 or "node0"] value 0 +mem.numa.hugepages.free +No value(s) available! + +mem.numa.hugepages.pagesize +No value(s) available! + +mem.numa.hugepages.surplus +No value(s) available! + +mem.numa.hugepages.totalsize +No value(s) available! + mem.numa.max_bandwidth No value(s) available! @@ -2933,6 +2945,18 @@ mem.numa.alloc.other_node inst [0 or "node0"] value 11838 inst [1 or "node1"] value 53630 +mem.numa.hugepages.free +No value(s) available! + +mem.numa.hugepages.pagesize +No value(s) available! + +mem.numa.hugepages.surplus +No value(s) available! + +mem.numa.hugepages.totalsize +No value(s) available! + mem.numa.max_bandwidth No value(s) available! @@ -5738,6 +5762,18 @@ mem.numa.alloc.other_node inst [0 or "node0"] value 11838 inst [1 or "node1"] value 53630 +mem.numa.hugepages.free +No value(s) available! + +mem.numa.hugepages.pagesize +No value(s) available! + +mem.numa.hugepages.surplus +No value(s) available! + +mem.numa.hugepages.totalsize +No value(s) available! + mem.numa.max_bandwidth No value(s) available! @@ -8537,6 +8573,18 @@ mem.numa.alloc.miss mem.numa.alloc.other_node inst [0 or "node0"] value 0 +mem.numa.hugepages.free +No value(s) available! + +mem.numa.hugepages.pagesize +No value(s) available! + +mem.numa.hugepages.surplus +No value(s) available! + +mem.numa.hugepages.totalsize +No value(s) available! + mem.numa.max_bandwidth No value(s) available! @@ -11073,6 +11121,18 @@ mem.numa.alloc.miss mem.numa.alloc.other_node inst [0 or "node0"] value 0 +mem.numa.hugepages.free +No value(s) available! + +mem.numa.hugepages.pagesize +No value(s) available! + +mem.numa.hugepages.surplus +No value(s) available! + +mem.numa.hugepages.totalsize +No value(s) available! + mem.numa.max_bandwidth No value(s) available! @@ -13673,6 +13733,18 @@ mem.numa.alloc.miss mem.numa.alloc.other_node inst [0 or "node0"] value 0 +mem.numa.hugepages.free +No value(s) available! + +mem.numa.hugepages.pagesize +No value(s) available! + +mem.numa.hugepages.surplus +No value(s) available! + +mem.numa.hugepages.totalsize +No value(s) available! + mem.numa.max_bandwidth No value(s) available! @@ -15551,6 +15623,18 @@ mem.numa.alloc.other_node inst [0 or "node0"] value 847 inst [1 or "node1"] value 492232 +mem.numa.hugepages.free +No value(s) available! + +mem.numa.hugepages.pagesize +No value(s) available! + +mem.numa.hugepages.surplus +No value(s) available! + +mem.numa.hugepages.totalsize +No value(s) available! + mem.numa.max_bandwidth No value(s) available! diff --git a/qa/linux/sysfs-hugepages-003.tgz b/qa/linux/sysfs-hugepages-003.tgz new file mode 100644 index 0000000000..08265e44c2 Binary files /dev/null and b/qa/linux/sysfs-hugepages-003.tgz differ diff --git a/src/pmdas/linux/help b/src/pmdas/linux/help index 71ee023856..d6e4750056 100644 --- a/src/pmdas/linux/help +++ b/src/pmdas/linux/help @@ -3,7 +3,7 @@ # Portions Copyright (c) International Business Machines Corp., 2002 # Portions Copyright (c) 2007-2009 Aconex. All Rights Reserved. # Portions Copyright (c) 2016-2017 Fujitsu. -# Portions Copyright (c) 2013-2021,2023-2024 Red Hat. +# Portions Copyright (c) 2013-2021,2023-2025 Red Hat. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the @@ -1041,6 +1041,11 @@ filled up manually after running some bandwidth saturation benchmark tools. @ mem.numa.util.hugepagesTotalBytes per-node total amount of hugepages memory @ mem.numa.util.hugepagesFreeBytes per-node amount of free hugepages memory @ mem.numa.util.hugepagesSurpBytes per-node amount of surplus hugepages memory +@ mem.numa.hugepages.pagesize per-node available huge page sizes +@ mem.numa.hugepages.free per-node huge page space not yet allocated +@ mem.numa.hugepages.surplus per-node huge page space in the pool above total size +@ mem.numa.hugepages.totalsize per-node memory consumed by each huge page size + @ mem.vmstat.nr_dirty number of pages in dirty state Instantaneous number of pages in dirty state, from /proc/vmstat @ mem.vmstat.nr_dirty_background_threshold background writeback threshold diff --git a/src/pmdas/linux/linux.h b/src/pmdas/linux/linux.h index dd2b5e61a8..db7f2bb4f2 100644 --- a/src/pmdas/linux/linux.h +++ b/src/pmdas/linux/linux.h @@ -116,6 +116,7 @@ enum { CLUSTER_WWID, /* 92 multipath aggregated stats */ CLUSTER_PRESSURE_IRQ, /* 93 /proc/pressure/irq metrics */ CLUSTER_HUGEPAGES, /* 94 /sys/kernel/mm/hugepages metrics */ + CLUSTER_NUMA_HUGEPAGES, /* 95 /sys/devices/system/node/nodeN/hugepages metrics */ NUM_CLUSTERS /* one more than highest numbered cluster */ }; @@ -197,6 +198,7 @@ enum { SOFTIRQ_CPU_INDOM, /* 41 - per-CPU soft IRQs */ WWID_INDOM, /* 42 - per-WWID multipath device */ HUGEPAGES_INDOM, /* 43 - hugepages (fixed sizes) */ + NUMA_HUGEPAGES_INDOM, /* 44 - NUMA hugepages (fixed sizes) */ NUM_INDOMS /* one more than highest numbered cluster */ }; diff --git a/src/pmdas/linux/linux_table.c b/src/pmdas/linux/linux_table.c index 59af70b2a0..09007676a1 100644 --- a/src/pmdas/linux/linux_table.c +++ b/src/pmdas/linux/linux_table.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Red Hat. + * Copyright (c) 2012,2025 Red Hat. * Copyright (c) 2004 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it @@ -46,16 +46,17 @@ linux_table_clone(struct linux_table *table) { struct linux_table *ret; struct linux_table *t; - int len; + size_t len, bytes; if (!table) return NULL; for (len=1, t=table; t->field; t++) len++; - ret = (struct linux_table *)malloc(len * sizeof(struct linux_table)); + bytes = len * sizeof(struct linux_table); + ret = (struct linux_table *)malloc(bytes); if (!ret) return NULL; - memcpy(ret, table, len * sizeof(struct linux_table)); + memcpy(ret, table, bytes); /* Initialize the table */ for (t=ret; t && t->field; t++) { @@ -75,7 +76,7 @@ linux_table_scan(FILE *fp, struct linux_table *table) char buf[1024]; int ret = 0; - while(fgets(buf, sizeof(buf), fp) != NULL) { + while (fgets(buf, sizeof(buf), fp) != NULL) { for (t=table; t && t->field; t++) { if ((p = strstr(buf, t->field)) != NULL) { /* first digit after the matched field */ @@ -95,7 +96,7 @@ linux_table_scan(FILE *fp, struct linux_table *table) /* calculate current value, accounting for counter wrap */ for (t=table; t && t->field; t++) { - if (t->maxval == 0) + if (t->maxval == 0) /* instantaneous value */ t->val = t->this; else { diff --git a/src/pmdas/linux/pmda.c b/src/pmdas/linux/pmda.c index 15fa4a385a..d7c05c28f4 100644 --- a/src/pmdas/linux/pmda.c +++ b/src/pmdas/linux/pmda.c @@ -1,7 +1,7 @@ /* * Linux PMDA * - * Copyright (c) 2012-2024 Red Hat. + * Copyright (c) 2012-2025 Red Hat. * Copyright (c) 2016-2017 Fujitsu. * Copyright (c) 2007-2011 Aconex. All Rights Reserved. * Copyright (c) 2002 International Business Machines Corp. @@ -378,6 +378,7 @@ static pmdaIndom indomtab[] = { { SOFTIRQ_CPU_INDOM, 0, NULL }, { WWID_INDOM, 0, NULL }, { HUGEPAGES_INDOM, 0, NULL }, + { NUMA_HUGEPAGES_INDOM, 0, NULL }, }; @@ -7467,6 +7468,22 @@ static pmdaMetric metrictab[] = { /* mem.hugepages.overcommit */ { NULL, { PMDA_PMID(CLUSTER_HUGEPAGES, OVERCOMMIT_HUGEPAGES), PM_TYPE_U64, HUGEPAGES_INDOM, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, }, + +/* + * NUMA hugepages (fixed sizes) metrics cluster + */ + /* mem.numa.hugepages.pagesize */ + { NULL, { PMDA_PMID(CLUSTER_NUMA_HUGEPAGES, PAGESIZE_NUMA_HUGEPAGES), PM_TYPE_U64, + NUMA_HUGEPAGES_INDOM, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, }, + /* mem.numa.hugepages.free */ + { NULL, { PMDA_PMID(CLUSTER_NUMA_HUGEPAGES, FREE_NUMA_HUGEPAGES), PM_TYPE_U64, + NUMA_HUGEPAGES_INDOM, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, }, + /* mem.numa.hugepages.surplus */ + { NULL, { PMDA_PMID(CLUSTER_NUMA_HUGEPAGES, SURPLUS_NUMA_HUGEPAGES), PM_TYPE_U64, + NUMA_HUGEPAGES_INDOM, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, }, + /* mem.numa.hugepages.totalsize */ + { NULL, { PMDA_PMID(CLUSTER_NUMA_HUGEPAGES, TOTALSIZE_NUMA_HUGEPAGES), PM_TYPE_U64, + NUMA_HUGEPAGES_INDOM, PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0) }, }, }; typedef struct { @@ -7836,6 +7853,8 @@ linux_refresh(pmdaExt *pmda, int *need_refresh, int context) if (need_refresh[CLUSTER_HUGEPAGES]) refresh_sysfs_hugepages(INDOM(HUGEPAGES_INDOM)); + if (need_refresh[CLUSTER_NUMA_HUGEPAGES]) + refresh_sysfs_numa_hugepages(INDOM(NUMA_HUGEPAGES_INDOM)); done: container_close(cp, ns_fds); @@ -7965,7 +7984,7 @@ linux_fetchCallBack(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom) net_addr_t *addrp; net_interface_t *netip; scsi_entry_t *scsi_entry; - hugepages_t *hugepages; + uint64_t *values; char *name; if (mdesc->m_user != NULL) { @@ -10197,15 +10216,32 @@ linux_fetchCallBack(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom) */ if (item >= HUGEPAGES_METRIC_COUNT) return PM_ERR_PMID; - hugepages = NULL; - sts = pmdaCacheLookup(INDOM(HUGEPAGES_INDOM), inst, NULL, (void **)&hugepages); + values = NULL; + sts = pmdaCacheLookup(INDOM(HUGEPAGES_INDOM), inst, NULL, (void **)&values); if (sts < 0) return sts; - if (sts != PMDA_CACHE_ACTIVE || hugepages == NULL) + if (sts != PMDA_CACHE_ACTIVE || values == NULL) return PM_ERR_INST; - atom->ull = hugepages->values[item]; + atom->ull = values[item]; if (item != PAGESIZE_HUGEPAGES) /* convert to kB */ - atom->ull *= hugepages->values[PAGESIZE_HUGEPAGES]; + atom->ull *= values[PAGESIZE_HUGEPAGES]; + break; + + case CLUSTER_NUMA_HUGEPAGES: + /* + * mem.numa.hugepages.* metrics are direct indexed by item, see sysfs_hugepages.h + */ + if (item >= NUMA_HUGEPAGES_METRIC_COUNT) + return PM_ERR_PMID; + values = NULL; + sts = pmdaCacheLookup(INDOM(NUMA_HUGEPAGES_INDOM), inst, NULL, (void **)&values); + if (sts < 0) + return sts; + if (sts != PMDA_CACHE_ACTIVE || values == NULL) + return PM_ERR_INST; + atom->ull = values[item]; + if (item != PAGESIZE_NUMA_HUGEPAGES) /* convert to kB */ + atom->ull *= values[PAGESIZE_NUMA_HUGEPAGES]; break; default: /* unknown cluster */ diff --git a/src/pmdas/linux/root_linux b/src/pmdas/linux/root_linux index 24dcf1d4e9..38e63c3ec6 100644 --- a/src/pmdas/linux/root_linux +++ b/src/pmdas/linux/root_linux @@ -2,7 +2,7 @@ * Copyright (c) 2000,2004,2007-2008 SGI. All Rights Reserved. * Copyright (c) 2002 International Business Machines Corp. * Copyright (c) 2007-2009 Aconex. All Rights Reserved. - * Copyright (c) 2013-2021,2023-2024 Red Hat. + * Copyright (c) 2013-2021,2023-2025 Red Hat. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -752,6 +752,7 @@ mem.numa { util alloc max_bandwidth 60:36:38 + hugepages } mem.numa.util { @@ -801,6 +802,13 @@ mem.numa.alloc { other_node 60:36:37 } +mem.numa.hugepages { + pagesize 60:95:0 + free 60:95:1 + surplus 60:95:2 + totalsize 60:95:3 +} + mem.zoneinfo { free 60:68:0 min 60:68:1 diff --git a/src/pmdas/linux/sysfs_hugepages.c b/src/pmdas/linux/sysfs_hugepages.c index 794813fd91..a92e9946b5 100644 --- a/src/pmdas/linux/sysfs_hugepages.c +++ b/src/pmdas/linux/sysfs_hugepages.c @@ -1,7 +1,7 @@ /* - * Linux /sys/kernel/mm/hugepages cluster + * Linux /sys/{kernel/mm,devices/system/node/nodeN}/hugepages clusters * - * Copyright (c) 2024, Red Hat. + * Copyright (c) 2024-2025, Red Hat. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -15,9 +15,10 @@ */ #include #include "linux.h" +#include "proc_stat.h" #include "sysfs_hugepages.h" -static char *hugepage_fields[] = { +static const char *hugepage_fields[] = { [PAGESIZE_HUGEPAGES] = "", /* directory name */ [FREE_HUGEPAGES] = "free_hugepages", [RESV_HUGEPAGES] = "resv_hugepages", @@ -27,30 +28,32 @@ static char *hugepage_fields[] = { [HUGEPAGES_METRIC_COUNT] = NULL }; -int -refresh_sysfs_hugepages(pmInDom indom) +static const char *numa_hugepage_fields[] = { + [PAGESIZE_NUMA_HUGEPAGES] = "", /* directory name */ + [FREE_NUMA_HUGEPAGES] = "free_hugepages", + [SURPLUS_NUMA_HUGEPAGES] = "surplus_hugepages", + [TOTALSIZE_NUMA_HUGEPAGES] = "nr_hugepages", + [NUMA_HUGEPAGES_METRIC_COUNT] = NULL +}; + +/* + * Scans a single directory for different hugepage size stats files. + * Returns negative code on error, else 0/1 indicating whether the + * instance domain has changed (and hence needs to be persisted). + */ +static int +scan_sysfs_hugepages_dir(const char *sysname, DIR *sysdir, pmInDom indom, + const char *node, const char *fields[], unsigned int count) { - char sysname[MAXPATHLEN]; + struct dirent *sysentry, *hugepages; char statsname[MAXPATHLEN]; char statsfile[MAXPATHLEN]; char strvalue[64], *iname; - DIR *sysdir, *hugepagesdir; - struct hugepages *hugepage; - struct dirent *sysentry, *hugepages; + DIR *hugepagesdir; + uint64_t *hugepage; unsigned long long pagesize; - static int setup; int i, sts, fd, needsave = 0; - if (!setup) { - pmdaCacheOp(indom, PMDA_CACHE_LOAD); - setup = 1; - } - - pmdaCacheOp(indom, PMDA_CACHE_INACTIVE); - pmsprintf(sysname, sizeof(sysname), "%s/sys/kernel/mm/hugepages", linux_statspath); - if ((sysdir = opendir(sysname)) == NULL) - return -oserror(); - while ((sysentry = readdir(sysdir)) != NULL) { char *size = sysentry->d_name; @@ -60,12 +63,16 @@ refresh_sysfs_hugepages(pmInDom indom) continue; /* look up this size in the cache, add it if not already present. */ - hugepage = NULL; iname = size + 10; + if (node) { /* add optional NUMA node prefix */ + pmsprintf(statsname, sizeof(statsname), "%s::%s", node, iname); + iname = statsname; + } + hugepage = NULL; sts = pmdaCacheLookupName(indom, iname, NULL, (void **)&hugepage); if (sts < 0 || hugepage == NULL) { /* new hugepage size */ - if ((hugepage = (hugepages_t *)calloc(1, sizeof(hugepages_t))) == NULL) { + if (!(hugepage = (uint64_t *)calloc(count, sizeof(uint64_t)))) { sts = -oserror(); closedir(sysdir); return sts; @@ -78,8 +85,7 @@ refresh_sysfs_hugepages(pmInDom indom) pmdaCacheStore(indom, PMDA_CACHE_ADD, iname, (void *)hugepage); /* now update the stats for the new or existing hugepage size */ - memset(hugepage->values, 0, sizeof(hugepage->values)); - hugepage->values[PAGESIZE_HUGEPAGES] = pagesize; + hugepage[0] = pagesize; /* pagesize is always the first array entry */ pmsprintf(statsname, sizeof(statsname), "%s/%s", sysname, size); if ((hugepagesdir = opendir(statsname)) == NULL) @@ -100,9 +106,9 @@ refresh_sysfs_hugepages(pmInDom indom) continue; } strvalue[n] = '\0'; - for (i=0; i < HUGEPAGES_METRIC_COUNT; i++) { - if (strncmp(hugepage_fields[i], h, hlen) == 0) { - hugepage->values[i] = strtoull(strvalue, NULL, 0); + for (i=0; i < count; i++) { + if (strncmp(fields[i], h, hlen) == 0) { + hugepage[i] = strtoull(strvalue, NULL, 0); break; } } @@ -110,9 +116,78 @@ refresh_sysfs_hugepages(pmInDom indom) } closedir(hugepagesdir); } + + return needsave; +} + +int +refresh_sysfs_hugepages(pmInDom indom) +{ + static int setup; + char sysname[MAXPATHLEN]; + DIR *sysdir; + int sts; + + if (!setup) { + pmdaCacheOp(indom, PMDA_CACHE_LOAD); + setup = 1; + } + + pmdaCacheOp(indom, PMDA_CACHE_INACTIVE); + + pmsprintf(sysname, sizeof(sysname), "%s/sys/kernel/mm/hugepages", linux_statspath); + if ((sysdir = opendir(sysname)) == NULL) + return -oserror(); + sts = scan_sysfs_hugepages_dir(sysname, sysdir, indom, NULL, + hugepage_fields, HUGEPAGES_METRIC_COUNT); closedir(sysdir); + if (sts < 0) + return sts; + if (sts > 0) + pmdaCacheOp(indom, PMDA_CACHE_SAVE); + + return 0; +} + +/* + * This refreshes a compound instance domain, having + * per-NUMA-node and per-hugepage-size components. + */ +int +refresh_sysfs_numa_hugepages(pmInDom indom) +{ + int i, sts, save = 0; + DIR *sysdir; + char prefix[128]; + char sysname[MAXPATHLEN]; + pmInDom nodes = INDOM(NODE_INDOM); + static int setup; + + if (!setup) { + pmdaCacheOp(indom, PMDA_CACHE_LOAD); + cpu_node_setup(); + setup = 1; + } + + pmdaCacheOp(indom, PMDA_CACHE_INACTIVE); + + for (pmdaCacheOp(nodes, PMDA_CACHE_WALK_REWIND);;) { + if ((i = pmdaCacheOp(nodes, PMDA_CACHE_WALK_NEXT)) < 0) + break; + pmsprintf(sysname, sizeof(sysname), + "%s/sys/devices/system/node/node%d/hugepages", + linux_statspath, i); + if ((sysdir = opendir(sysname)) == NULL) + continue; + pmsprintf(prefix, sizeof(prefix), "node%d", i); + sts = scan_sysfs_hugepages_dir(sysname, sysdir, indom, prefix, + numa_hugepage_fields, NUMA_HUGEPAGES_METRIC_COUNT); + closedir(sysdir); + if (sts > 0) + save = 1; + } - if (needsave) + if (save) pmdaCacheOp(indom, PMDA_CACHE_SAVE); return 0; diff --git a/src/pmdas/linux/sysfs_hugepages.h b/src/pmdas/linux/sysfs_hugepages.h index d0c6319a24..e9b7fcd6d1 100644 --- a/src/pmdas/linux/sysfs_hugepages.h +++ b/src/pmdas/linux/sysfs_hugepages.h @@ -1,7 +1,7 @@ /* - * Linux /sys/kernel/mm/hugepages cluster + * Linux /sys/{kernel/mm,devices/system/node/nodeN}/hugepages clusters * - * Copyright (c) 2024, Red Hat. + * Copyright (c) 2024-2025, Red Hat. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -31,10 +31,20 @@ enum { HUGEPAGES_METRIC_COUNT }; -typedef struct hugepages { - uint64_t values[HUGEPAGES_METRIC_COUNT]; -} hugepages_t; +enum { + /* direct indexed NUMA pagesize metric */ + PAGESIZE_NUMA_HUGEPAGES = 0, + + /* direct indexed NUMA counter metrics */ + FREE_NUMA_HUGEPAGES, + SURPLUS_NUMA_HUGEPAGES, + TOTALSIZE_NUMA_HUGEPAGES, + + /* number of direct indexed counters */ + NUMA_HUGEPAGES_METRIC_COUNT +}; extern int refresh_sysfs_hugepages(pmInDom); +extern int refresh_sysfs_numa_hugepages(pmInDom); #endif /* SYSFS_HUGEPAGES_H */ diff --git a/src/pmlogconf/zeroconf/localdefs b/src/pmlogconf/zeroconf/localdefs index 8b6e698062..f948afc69e 100644 --- a/src/pmlogconf/zeroconf/localdefs +++ b/src/pmlogconf/zeroconf/localdefs @@ -8,6 +8,7 @@ FILES = \ interrupts \ nfsclient \ numa \ + numahugepages \ numastat \ pidstat \ pidstat-summary \ diff --git a/src/pmlogconf/zeroconf/numahugepages b/src/pmlogconf/zeroconf/numahugepages new file mode 100644 index 0000000000..ae1b8f064b --- /dev/null +++ b/src/pmlogconf/zeroconf/numahugepages @@ -0,0 +1,4 @@ +#pmlogconf-setup 2.0 +ident metrics relating to NUMA hugepages +probe mem.numa.hugepages.totalsize > 0 ? include : exclude + mem.numa.hugepages