diff --git a/headers/vmlinux/vmlinux_net.h b/headers/vmlinux/vmlinux_net.h index 5f16c57b..64b26212 100644 --- a/headers/vmlinux/vmlinux_net.h +++ b/headers/vmlinux/vmlinux_net.h @@ -67,6 +67,7 @@ struct sk_buff { __u8 nf_trace: 1; __u8 ip_summed: 2; __u8 ooo_okay: 1; + __u8 tstamp_type: 2; __u8 l4_hash: 1; __u8 sw_hash: 1; __u8 wifi_acked_valid: 1; @@ -145,4 +146,8 @@ enum ip_conntrack_status { IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT), }; +struct scm_timestamping_internal { + struct timespec64 ts[3]; +}; + #endif /* __VMLINUX_NET_H__ */ diff --git a/headers/vmlinux/vmlinux_types.h b/headers/vmlinux/vmlinux_types.h index d7b3bed0..96411e12 100644 --- a/headers/vmlinux/vmlinux_types.h +++ b/headers/vmlinux/vmlinux_types.h @@ -11,4 +11,11 @@ typedef __u64 u64; typedef s64 ktime_t; +typedef __s64 time64_t; + +struct timespec64 { + time64_t tv_sec; + long int tv_nsec; +}; + #endif /* __VMLINUX_TYPES_H__ */ diff --git a/netstacklat/.gitignore b/netstacklat/.gitignore new file mode 100644 index 00000000..1d232888 --- /dev/null +++ b/netstacklat/.gitignore @@ -0,0 +1 @@ +netstacklat diff --git a/netstacklat/Makefile b/netstacklat/Makefile new file mode 100644 index 00000000..8efef417 --- /dev/null +++ b/netstacklat/Makefile @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +USER_TARGETS := netstacklat +BPF_TARGETS := netstacklat.bpf +BPF_SKEL_OBJ := netstacklat.bpf.o + +EXTRA_DEPS += netstacklat.h bits.bpf.h +LDLIBS += -lm + +LIB_DIR = ../lib + +include $(LIB_DIR)/common.mk + diff --git a/netstacklat/README.md b/netstacklat/README.md new file mode 100644 index 00000000..403a9df5 --- /dev/null +++ b/netstacklat/README.md @@ -0,0 +1,58 @@ +# Netstacklat - Monitor latency within the network stack +Netstacklat is a simple tool for monitoring latency within the Linux +network stack for ingress traffic. The tool relies on the kernel time +stamping received packets (`SOF_TIMESTAMPING_RX_SOFTWARE`), +specifically setting `sk_buff->tstamp`. It then reports when packets +arrive at various hooks relative to this timestamp, i.e. the time +between the packet being timestamped by the kernel and reaching a +specific hook. + +The tool is based on the following bpftrace script from Jesper +Dangaard Brouer: +```console +sudo bpftrace -e ' + kfunc:tcp_v4_do_rcv, + kfunc:tcp_data_queue, + kfunc:udp_queue_rcv_one_skb + { + $tai_offset=37000000000; + $now=nsecs(tai)-$tai_offset; @cnt[probe]=count(); @total[probe]=count(); + $ts=args->skb->tstamp; $delta=$now-(uint64)$ts; + @hist_ns[probe]=hist($delta); + @stats[probe]=stats($delta); + //printf("now:%llu - ts:%llu = delta:%llu\n", $now, $ts, $delta); + } + interval:s:10 {time("\n%H:%M:%S\n"); + print(@cnt); clear(@cnt); + print(@total); + print(@stats); + print(@hist_ns); + }' +``` + +The eBPF part of the tool (`netstacklat.bpf.c`) is designed to be +compatible with +[ebpf_exporter](https://github.com/cloudflare/ebpf_exporter), so that +the data can easily be exported to Prometheus. The easiest way to use +netstacklat together with ebpf-exporter is simply to point it to this +directory, i.e. +```console +$ ebpf_exporter --config.dir=/bpf-examples/netstacklat --config.names=netstacklat +``` + +Alternatively, you can copy over the files to ebpf-exporter's example +repository. +```console +$ cp netstacklat.{bpf.c,h,yaml} -t /ebpf_exporter/examples/ +# Fix up some header includes (e.g. "vmlinux_local.h" -> +$ make -C /ebpf_exporter/examples build +$ ebpf_exporter --config.dir=/ebpf_exporter/examples --config.names +``` + +Note that when using together with ebpf-exporter, some of the +functionality handled by netstacklat's userspace program will not be +available. This includes setting the `TAI_OFFSET` constant in +`netstacklat.bpf.c` to match your system's TAI offset (you can do this +manually instead), and enabling RX timestamping by the kernel (see the +`enable_sw_rx_tstamps()` function in `netstacklat.c` for an example of +how to do this). diff --git a/netstacklat/bits.bpf.h b/netstacklat/bits.bpf.h new file mode 100644 index 00000000..2b7e825d --- /dev/null +++ b/netstacklat/bits.bpf.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* From https://github.com/iovisor/bcc/blob/v0.25.0/libbpf-tools/bits.bpf.h*/ + +#ifndef __BITS_BPF_H +#define __BITS_BPF_H + +static __always_inline u64 log2(u32 v) +{ + u32 shift, r; + + r = (v > 0xFFFF) << 4; v >>= r; + shift = (v > 0xFF) << 3; v >>= shift; r |= shift; + shift = (v > 0xF) << 2; v >>= shift; r |= shift; + shift = (v > 0x3) << 1; v >>= shift; r |= shift; + r |= (v >> 1); + + return r; +} + +static __always_inline u64 log2l(u64 v) +{ + u32 hi = v >> 32; + if (hi) + return log2(hi) + 32; + else + return log2(v); +} + +#endif /* __BITS_BPF_H */ diff --git a/netstacklat/netstacklat.bpf.c b/netstacklat/netstacklat.bpf.c new file mode 100644 index 00000000..899a9638 --- /dev/null +++ b/netstacklat/netstacklat.bpf.c @@ -0,0 +1,338 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include "vmlinux_local.h" +#include + +#include +#include +#include + +#include "netstacklat.h" +#include "bits.bpf.h" + +char LICENSE[] SEC("license") = "GPL"; + + +volatile const __s64 TAI_OFFSET = (37LL * NS_PER_S); +volatile const struct netstacklat_bpf_config user_config = { + .filter_pid = false, +}; + +/* + * Alternative definition of sk_buff to handle renaming of the field + * mono_delivery_time to tstamp_type. See + * https://nakryiko.com/posts/bpf-core-reference-guide/#handling-incompatible-field-and-type-changes + */ +struct sk_buff___old { + union { + ktime_t tstamp; + u64 skb_mstamp_ns; + }; + __u8 mono_delivery_time: 1; +} __attribute__((preserve_access_index)); + +/* + * To be compatible with ebpf-exporter, all histograms need a key whose final + * member is the histogram bucket index + */ +struct hist_key { + u32 bucket; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, HIST_NBUCKETS); + __type(key, u32); + __type(value, u64); +} netstack_latency_ip_start_seconds SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, HIST_NBUCKETS); + __type(key, u32); + __type(value, u64); +} netstack_latency_tcp_start_seconds SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, HIST_NBUCKETS); + __type(key, u32); + __type(value, u64); +} netstack_latency_udp_start_seconds SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, HIST_NBUCKETS); + __type(key, u32); + __type(value, u64); +} netstack_latency_tcp_sock_enqueued_seconds SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, HIST_NBUCKETS); + __type(key, u32); + __type(value, u64); +} netstack_latency_udp_sock_enqueued_seconds SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, HIST_NBUCKETS); + __type(key, u32); + __type(value, u64); +} netstack_latency_tcp_sock_read_seconds SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, HIST_NBUCKETS); + __type(key, u32); + __type(value, u64); +} netstack_latency_udp_sock_read_seconds SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, PID_MAX_LIMIT); + __type(key, u32); + __type(value, u8); +} netstack_pidfilter SEC(".maps"); + +static u32 get_exp2_histogram_bucket_idx(u64 value, u32 max_bucket) +{ + u32 bucket = log2l(value); + + // Right-inclusive histogram, so "round up" the log value + if (bucket > 0 && 1ULL << bucket < value) + bucket++; + + if (bucket > max_bucket) + bucket = max_bucket; + + return bucket; +} + +/* + * Same call signature as the increment_exp2_histogram_nosync macro from + * https://github.com/cloudflare/ebpf_exporter/blob/master/examples/maps.bpf.h + * but provided as a function. + * + * Unlike the macro, only works with keys of type struct hist_key. The hist_key + * struct must be provided by value (rather than as a pointer) to keep the same + * call signature as the ebpf-exporter macro, although this will get inefficent + * if struct hist_key grows large. + */ +static void increment_exp2_histogram_nosync(void *map, struct hist_key key, + u64 value, u32 max_bucket) +{ + u64 *bucket_count; + + // Increment histogram + key.bucket = get_exp2_histogram_bucket_idx(value, max_bucket); + bucket_count = bpf_map_lookup_elem(map, &key); + if (bucket_count) + (*bucket_count)++; + + // Increment sum at end of histogram + if (value == 0) + return; + + key.bucket = max_bucket + 1; + bucket_count = bpf_map_lookup_elem(map, &key); + if (bucket_count) + *bucket_count += value; +} + +static void *hook_to_histmap(enum netstacklat_hook hook) +{ + switch (hook) { + case NETSTACKLAT_HOOK_IP_RCV: + return &netstack_latency_ip_start_seconds; + case NETSTACKLAT_HOOK_TCP_START: + return &netstack_latency_tcp_start_seconds; + case NETSTACKLAT_HOOK_UDP_START: + return &netstack_latency_udp_start_seconds; + case NETSTACKLAT_HOOK_TCP_SOCK_ENQUEUED: + return &netstack_latency_tcp_sock_enqueued_seconds; + case NETSTACKLAT_HOOK_UDP_SOCK_ENQUEUED: + return &netstack_latency_udp_sock_enqueued_seconds; + case NETSTACKLAT_HOOK_TCP_SOCK_READ: + return &netstack_latency_tcp_sock_read_seconds; + case NETSTACKLAT_HOOK_UDP_SOCK_READ: + return &netstack_latency_udp_sock_read_seconds; + default: + return NULL; + } +} + +static ktime_t time_since(ktime_t tstamp) +{ + ktime_t now; + + if (tstamp <= 0) + return -1; + + now = bpf_ktime_get_tai_ns() - TAI_OFFSET; + if (tstamp > now) + return -1; + + return now - tstamp; +} + +static void record_latency(ktime_t latency, enum netstacklat_hook hook) +{ + struct hist_key key = { 0 }; + increment_exp2_histogram_nosync(hook_to_histmap(hook), key, latency, + HIST_MAX_LATENCY_SLOT); +} + +static void record_latency_since(ktime_t tstamp, enum netstacklat_hook hook) +{ + ktime_t latency = time_since(tstamp); + if (latency >= 0) + record_latency(latency, hook); +} + +static void record_skb_latency(struct sk_buff *skb, enum netstacklat_hook hook) +{ + if (bpf_core_field_exists(skb->tstamp_type)) { + /* + * For kernels >= v6.11 the tstamp_type being non-zero + * (SKB_CLOCK_REALTIME) implies that skb->tstamp holds a + * preserved TX timestamp rather than a RX timestamp. See + * https://lore.kernel.org/all/20240509211834.3235191-2-quic_abchauha@quicinc.com/ + */ + if (BPF_CORE_READ_BITFIELD(skb, tstamp_type) > 0) + return; + + } else { + /* + * For kernels < v6.11, the field was called mono_delivery_time + * instead, see https://lore.kernel.org/all/20220302195525.3480280-1-kafai@fb.com/ + * Kernels < v5.18 do not have the mono_delivery_field either, + * but we do not support those anyways (as they lack the + * bpf_ktime_get_tai_ns helper) + */ + struct sk_buff___old *skb_old = (void *)skb; + if (BPF_CORE_READ_BITFIELD(skb_old, mono_delivery_time) > 0) + return; + } + + record_latency_since(skb->tstamp, hook); +} + +static bool filter_pid(u32 pid) +{ + u8 *pid_ok; + + if (!user_config.filter_pid) + // No PID filter - all PIDs ok + return true; + + pid_ok = bpf_map_lookup_elem(&netstack_pidfilter, &pid); + if (!pid_ok) + return false; + + return *pid_ok > 0; +} + +static bool filter_current_task(void) +{ + __u32 tgid; + + if (!user_config.filter_pid) + return true; + + tgid = bpf_get_current_pid_tgid() >> 32; + return filter_pid(tgid); +} + +static void record_socket_latency(struct sock *sk, ktime_t tstamp, + enum netstacklat_hook hook) +{ + if (!filter_current_task()) + return; + + record_latency_since(tstamp, hook); +} + +SEC("fentry/ip_rcv_core") +int BPF_PROG(netstacklat_ip_rcv_core, struct sk_buff *skb, void *block, + void *tp, void *res, bool compat_mode) +{ + record_skb_latency(skb, NETSTACKLAT_HOOK_IP_RCV); + return 0; +} + +SEC("fentry/ip6_rcv_core") +int BPF_PROG(netstacklat_ip6_rcv_core, struct sk_buff *skb, void *block, + void *tp, void *res, bool compat_mode) +{ + record_skb_latency(skb, NETSTACKLAT_HOOK_IP_RCV); + return 0; +} + +SEC("fentry/tcp_v4_rcv") +int BPF_PROG(netstacklat_tcp_v4_rcv, struct sk_buff *skb) +{ + record_skb_latency(skb, NETSTACKLAT_HOOK_TCP_START); + return 0; +} + +SEC("fentry/tcp_v6_rcv") +int BPF_PROG(netstacklat_tcp_v6_rcv, struct sk_buff *skb) +{ + record_skb_latency(skb, NETSTACKLAT_HOOK_TCP_START); + return 0; +} + +SEC("fentry/udp_rcv") +int BPF_PROG(netstacklat_udp_rcv, struct sk_buff *skb) +{ + record_skb_latency(skb, NETSTACKLAT_HOOK_UDP_START); + return 0; +} + +SEC("fentry/udpv6_rcv") +int BPF_PROG(netstacklat_udpv6_rcv, struct sk_buff *skb) +{ + record_skb_latency(skb, NETSTACKLAT_HOOK_UDP_START); + return 0; +} + +SEC("fexit/tcp_data_queue") +int BPF_PROG(netstacklat_tcp_data_queue, struct sock *sk, struct sk_buff *skb) +{ + record_skb_latency(skb, NETSTACKLAT_HOOK_TCP_SOCK_ENQUEUED); + return 0; +} + +SEC("fexit/udp_queue_rcv_one_skb") +int BPF_PROG(netstacklat_udp_queue_rcv_one_skb, struct sock *sk, + struct sk_buff *skb) +{ + record_skb_latency(skb, NETSTACKLAT_HOOK_UDP_SOCK_ENQUEUED); + return 0; +} + +SEC("fexit/udpv6_queue_rcv_one_skb") +int BPF_PROG(netstacklat_udpv6_queue_rcv_one_skb, struct sock *sk, + struct sk_buff *skb) +{ + record_skb_latency(skb, NETSTACKLAT_HOOK_UDP_SOCK_ENQUEUED); + return 0; +} + +SEC("fentry/tcp_recv_timestamp") +int BPF_PROG(netstacklat_tcp_recv_timestamp, void *msg, struct sock *sk, + struct scm_timestamping_internal *tss) +{ + struct timespec64 *ts = &tss->ts[0]; + record_socket_latency(sk, (ktime_t)ts->tv_sec * NS_PER_S + ts->tv_nsec, + NETSTACKLAT_HOOK_TCP_SOCK_READ); + return 0; +} + +SEC("fentry/skb_consume_udp") +int BPF_PROG(netstacklat_skb_consume_udp, struct sock *sk, struct sk_buff *skb, + int len) +{ + record_socket_latency(sk, skb->tstamp, NETSTACKLAT_HOOK_UDP_SOCK_READ); + return 0; +} diff --git a/netstacklat/netstacklat.c b/netstacklat/netstacklat.c new file mode 100644 index 00000000..f70fa87f --- /dev/null +++ b/netstacklat/netstacklat.c @@ -0,0 +1,1049 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +static const char *__doc__ = + "Netstacklat - Monitor latency to various points in the ingress network stack"; + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "netstacklat.h" +#include "netstacklat.bpf.skel.h" + +#define MAX_EPOLL_EVENTS 8 + +/* + * Used to pack both a "type" and a value into the epoll_event.data.u64 member. + * The topmost bits indicates the type (SIG, TIMER, etc) while the remaining + * bits can be used for the value. The MASK can be used to filter out the + * type/value. + */ +#define NETSTACKLAT_EPOLL_SIG (1ULL << 63) +#define NETSTACKLAT_EPOLL_TIMER (1ULL << 62) +#define NETSTACKLAT_EPOLL_MASK \ + (~(NETSTACKLAT_EPOLL_SIG | NETSTACKLAT_EPOLL_TIMER)) + +// Magical value used to indicate that the program should be aborted +#define NETSTACKLAT_ABORT 424242 + +#define MAX_BUCKETSPAN_STRLEN 16 +#define MAX_BUCKETCOUNT_STRLEN 10 +#define MAX_BAR_STRLEN (80 - 6 - MAX_BUCKETSPAN_STRLEN - MAX_BUCKETCOUNT_STRLEN) + +#define MAX_HOOK_PROGS 4 + +// Maximum number of different pids that can be filtered for +#define MAX_FILTER_PIDS 4096 + +struct hook_prog_collection { + struct bpf_program *progs[MAX_HOOK_PROGS]; + int nprogs; +}; + +struct netstacklat_config { + struct netstacklat_bpf_config bpf_conf; + double report_interval_s; + bool enabled_hooks[NETSTACKLAT_N_HOOKS]; + int npids; + __u32 pids[MAX_FILTER_PIDS]; +}; + +static const struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "report-interval", required_argument, NULL, 'r' }, + { "list-probes", no_argument, NULL, 'l' }, + { "enable-probe", required_argument, NULL, 'e' }, + { "disable-probe", required_argument, NULL, 'd' }, + { "filter-pid", required_argument, NULL, 'p' }, + { 0, 0, 0, 0 } +}; + +static const struct option *optval_to_longopt(int val) +{ + int i; + + for (i = 0; long_options[i].name != 0; i++) { + if (long_options[i].val == val) + return &long_options[i]; + } + + return NULL; +} + +static int generate_optstr(char *buf, size_t size) +{ + int i, optlen, strlen = 0; + char optstr[4]; + + for (i = 0; long_options[i].name != 0; i++) { + if (long_options[i].flag || !isalnum(long_options[i].val)) + continue; + + optlen = snprintf( + optstr, sizeof(optstr), "%c%s", long_options[i].val, + long_options[i].has_arg == optional_argument ? "::" : + long_options[i].has_arg == required_argument ? ":" : + ""); + if (strlen + optlen < size) { + strncpy(buf + strlen, optstr, optlen + 1); + } + strlen += optlen; + } + + return strlen + 1; +} + +static void print_usage(FILE *stream, const char *prog_name) +{ + int i; + + fprintf(stream, "\nDOCUMENTATION:\n%s\n", __doc__); + fprintf(stream, "\n"); + fprintf(stream, " Usage: %s (options-see-below)\n", prog_name); + fprintf(stream, " Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + if (!long_options[i].flag && isalnum(long_options[i].val)) + fprintf(stream, " -%c, ", long_options[i].val); + else + fprintf(stream, " "); + + printf(" --%s", long_options[i].name); + + if (long_options[i].has_arg == required_argument) + fprintf(stream, " "); + else if (long_options[i].has_arg == optional_argument) + fprintf(stream, "[ARG]"); + + fprintf(stream, "\n"); + } + printf("\n"); +} + +static const char *hook_to_str(enum netstacklat_hook hook) +{ + switch (hook) { + case NETSTACKLAT_HOOK_IP_RCV: + return "ip-start"; + case NETSTACKLAT_HOOK_TCP_START: + return "tcp-start"; + case NETSTACKLAT_HOOK_UDP_START: + return "udp-start"; + case NETSTACKLAT_HOOK_TCP_SOCK_ENQUEUED: + return "tcp-socket-enqueued"; + case NETSTACKLAT_HOOK_UDP_SOCK_ENQUEUED: + return "udp-socket-enqueued"; + case NETSTACKLAT_HOOK_TCP_SOCK_READ: + return "tcp-socket-read"; + case NETSTACKLAT_HOOK_UDP_SOCK_READ: + return "udp-socket-read"; + default: + return "invalid"; + } +} + +static enum netstacklat_hook str_to_hook(const char *str) +{ + enum netstacklat_hook hook; + + for (hook = 1; hook < NETSTACKLAT_N_HOOKS; hook++) { + if (strcmp(str, hook_to_str(hook)) == 0) + return hook; + } + + return NETSTACKLAT_HOOK_INVALID; +} + +static const char *hook_to_description(enum netstacklat_hook hook) +{ + switch (hook) { + case NETSTACKLAT_HOOK_IP_RCV: + return "packet has reached the IP-stack, i.e. past the traffic control layer"; + case NETSTACKLAT_HOOK_TCP_START: + return "packet has reached the local TCP-stack, i.e. past the IP (and routing) stack"; + case NETSTACKLAT_HOOK_UDP_START: + return "packet has reached the local UDP-stack, i.e. past the IP (and routing) stack"; + case NETSTACKLAT_HOOK_TCP_SOCK_ENQUEUED: + return "packet has been enqueued to a TCP socket, i.e. end of the kernel receive stack"; + case NETSTACKLAT_HOOK_UDP_SOCK_ENQUEUED: + return "packed has been enqueued to a UDP socket, i.e. end of the kernel receive stack"; + case NETSTACKLAT_HOOK_TCP_SOCK_READ: + return "packet payload has been read from TCP socket, i.e. delivered to user space"; + case NETSTACKLAT_HOOK_UDP_SOCK_READ: + return "packet payload has been read from UDP socket, i.e. delivered to user space"; + default: + return "not a valid hook"; + } +} + +static int hook_to_histmap(enum netstacklat_hook hook, + const struct netstacklat_bpf *obj) +{ + switch (hook) { + case NETSTACKLAT_HOOK_IP_RCV: + return bpf_map__fd(obj->maps.netstack_latency_ip_start_seconds); + case NETSTACKLAT_HOOK_TCP_START: + return bpf_map__fd( + obj->maps.netstack_latency_tcp_start_seconds); + case NETSTACKLAT_HOOK_UDP_START: + return bpf_map__fd( + obj->maps.netstack_latency_udp_start_seconds); + case NETSTACKLAT_HOOK_TCP_SOCK_ENQUEUED: + return bpf_map__fd( + obj->maps.netstack_latency_tcp_sock_enqueued_seconds); + case NETSTACKLAT_HOOK_UDP_SOCK_ENQUEUED: + return bpf_map__fd( + obj->maps.netstack_latency_udp_sock_enqueued_seconds); + case NETSTACKLAT_HOOK_TCP_SOCK_READ: + return bpf_map__fd( + obj->maps.netstack_latency_tcp_sock_read_seconds); + case NETSTACKLAT_HOOK_UDP_SOCK_READ: + return bpf_map__fd( + obj->maps.netstack_latency_udp_sock_read_seconds); + default: + return -EINVAL; + } +} + +static void hook_to_progs(struct hook_prog_collection *progs, + enum netstacklat_hook hook, + const struct netstacklat_bpf *obj) +{ + switch (hook) { + case NETSTACKLAT_HOOK_IP_RCV: + progs->progs[0] = obj->progs.netstacklat_ip_rcv_core; + progs->progs[1] = obj->progs.netstacklat_ip6_rcv_core; + progs->nprogs = 2; + break; + case NETSTACKLAT_HOOK_TCP_START: + progs->progs[0] = obj->progs.netstacklat_tcp_v4_rcv; + progs->progs[1] = obj->progs.netstacklat_tcp_v6_rcv; + progs->nprogs = 2; + break; + case NETSTACKLAT_HOOK_UDP_START: + progs->progs[0] = obj->progs.netstacklat_udp_rcv; + progs->progs[1] = obj->progs.netstacklat_udpv6_rcv; + progs->nprogs = 2; + break; + case NETSTACKLAT_HOOK_TCP_SOCK_ENQUEUED: + progs->progs[0] = obj->progs.netstacklat_tcp_data_queue; + progs->nprogs = 1; + break; + case NETSTACKLAT_HOOK_UDP_SOCK_ENQUEUED: + progs->progs[0] = obj->progs.netstacklat_udp_queue_rcv_one_skb; + progs->progs[1] = + obj->progs.netstacklat_udpv6_queue_rcv_one_skb; + progs->nprogs = 2; + break; + case NETSTACKLAT_HOOK_TCP_SOCK_READ: + progs->progs[0] = obj->progs.netstacklat_tcp_recv_timestamp; + progs->nprogs = 1; + break; + case NETSTACKLAT_HOOK_UDP_SOCK_READ: + progs->progs[0] = obj->progs.netstacklat_skb_consume_udp; + progs->nprogs = 1; + break; + default: + progs->nprogs = 0; + break; + } +} + +static void list_hooks(FILE *stream) +{ + enum netstacklat_hook hook; + + fprintf(stream, "available hooks:\n"); + for (hook = 1; hook < NETSTACKLAT_N_HOOKS; hook++) + fprintf(stream, " %s: %s\n", hook_to_str(hook), + hook_to_description(hook)); +} + +static int parse_bounded_double(double *res, const char *str, double low, + double high, const char *name) +{ + char *endptr; + errno = 0; + + *res = strtod(str, &endptr); + if (endptr == str || strlen(str) != endptr - str) { + fprintf(stderr, "%s %s is not a valid number\n", name, str); + return -EINVAL; + } + + if (errno == ERANGE) { + fprintf(stderr, "%s %s overflowed\n", name, str); + return -ERANGE; + } + + if (*res < low || *res > high) { + fprintf(stderr, "%s must be in range [%g, %g]\n", name, low, high); + return -ERANGE; + } + + return 0; +} + +static int parse_bounded_long(long long *res, const char *str, long long low, + long long high, const char *name) +{ + char *endptr; + errno = 0; + + *res = strtoll(str, &endptr, 10); + if (endptr == str || strlen(str) != endptr - str) { + fprintf(stderr, "%s %s is not a valid integer\n", name, str); + return -EINVAL; + } + + if (errno == ERANGE) { + fprintf(stderr, "%s %s overflowed\n", name, str); + return -ERANGE; + } + + if (*res < low || *res > high) { + fprintf(stderr, "%s must be in range [%lld, %lld]\n", name, low, + high); + return -ERANGE; + } + + return 0; +} + +/* + * Parses a comma-delimited string of hook-names, and sets the positions for + * the hooks that appear in the string to true. + */ +static int parse_hooks(bool hooks[NETSTACKLAT_N_HOOKS], const char *_str) +{ + enum netstacklat_hook hook; + char *tokp = NULL; + char str[1024]; + char *hookstr; + int i; + + for (i = 0; i < NETSTACKLAT_N_HOOKS; i++) + hooks[i] = false; + + if (strlen(_str) >= sizeof(str)) + return -E2BIG; + strcpy(str, _str); + + hookstr = strtok_r(str, ",", &tokp); + while (hookstr) { + hook = str_to_hook(hookstr); + if (hook == NETSTACKLAT_HOOK_INVALID) { + fprintf(stderr, "%s is not a valid hook\n", hookstr); + return -EINVAL; + } + + hooks[hook] = true; + + hookstr = strtok_r(NULL, ",", &tokp); + } + + return 0; +} + +static int parse_pids(size_t size, __u32 arr[size], const char *_str, + const char *name) +{ + char *pidstr, *str; + char *tokp = NULL; + int err, i = 0; + long long val; + + str = malloc(strlen(_str) + 1); + if (!str) + return -ENOMEM; + strcpy(str, _str); + + pidstr = strtok_r(str, ",", &tokp); + while (pidstr && i < size) { + err = parse_bounded_long(&val, pidstr, 1, PID_MAX_LIMIT, name); + if (err) + goto exit; + arr[i] = val; + + pidstr = strtok_r(NULL, ",", &tokp); + i++; + } + + if (pidstr) + // Parsed size pids, but more still remain + err = -E2BIG; + +exit: + free(str); + return err ?: i; +} + +int parse_arguments(int argc, char *argv[], struct netstacklat_config *conf) +{ + bool hooks_on = false, hooks_off = false; + bool hooks[NETSTACKLAT_N_HOOKS]; + char optstr[64]; + int opt, err, i; + double fval; + + conf->npids = 0; + conf->bpf_conf.filter_pid = false; + + for (i = 0; i < NETSTACKLAT_N_HOOKS; i++) + // All probes enabled by default + conf->enabled_hooks[i] = true; + + if (generate_optstr(optstr, sizeof(optstr)) > sizeof(optstr)) { + fprintf(stderr, + "Internal error: optstr too short to fit all long_options\n"); + return -ENAMETOOLONG; + } + + while ((opt = getopt_long(argc, argv, optstr, long_options, + NULL)) != -1) { + switch (opt) { + case 'r': // report interval + err = parse_bounded_double( + &fval, optarg, 0.01, 3600 * 24, + optval_to_longopt(opt)->name); + if (err) + return err; + + conf->report_interval_s = fval; + break; + case 'l': // list-probes + list_hooks(stdout); + exit(EXIT_SUCCESS); + case 'e': // enable-probes + err = parse_hooks(hooks, optarg); + if (err) + return err; + + for (i = 1; i < NETSTACKLAT_N_HOOKS; i++) + conf->enabled_hooks[i] = hooks[i]; + hooks_on = true; + break; + case 'd': // disable-probes + err = parse_hooks(hooks, optarg); + if (err) + return err; + + for (i = 1; i < NETSTACKLAT_N_HOOKS; i++) + conf->enabled_hooks[i] = !hooks[i]; + hooks_off = true; + break; + case 'p': // filter-pids + err = parse_pids(ARRAY_SIZE(conf->pids) - conf->npids, + conf->pids + conf->npids, optarg, + optval_to_longopt(opt)->name); + if (err < 0) + return err; + + conf->npids += err; + conf->bpf_conf.filter_pid = true; + break; + case 'h': // help + print_usage(stdout, argv[0]); + exit(EXIT_SUCCESS); + default: + // unrecognized option reported by getopt, so just print usage + print_usage(stderr, argv[0]); + return -EINVAL; + } + } + + if (hooks_on && hooks_off) { + fprintf(stderr, + "%s and %s are mutually exclusive, only use one of them\n", + optval_to_longopt('e')->name, + optval_to_longopt('d')->name); + return -EINVAL; + } + + return 0; +} + +static int find_first_nonzero_bucket(size_t n, const __u64 hist[n]) +{ + int i; + + for (i = 0; i < n; i++) { + if (hist[i] > 0) + return i; + } + + return -1; +} + +static int find_last_nonzero_bucket(size_t n, const __u64 hist[n]) +{ + int i; + + for (i = n - 1; i >= 0; i--) { + if (hist[i] > 0) + return i; + } + + return -1; +} + +static int find_largest_bucket(size_t n, const __u64 hist[n]) +{ + __u64 max_val = 0; + int i; + + for (i = 0; i < n; i++) { + if (hist[i] > max_val) + max_val = hist[i]; + } + + return max_val; +} + +static double ns_to_siprefix(double ns, char **prefix) +{ + static char *prefixes[] = { "n", "u", "m", "" }; + int psteps = 0; + + while (ns >= 1000 && psteps < ARRAY_SIZE(prefixes) - 1) { + ns /= 1000; + psteps++; + } + + *prefix = prefixes[psteps]; + + return ns; +} + +static void print_nchars(FILE *stream, char c, int n) +{ + while (n-- > 0) + putc(c, stream); +} + +static int print_bucket_interval(FILE *stream, double low_bound_ns, + double high_bound_ns) +{ + char *lprefix, *hprefix; + double low_si, high_si; + + low_si = ns_to_siprefix(low_bound_ns, &lprefix); + + if (isinf(high_bound_ns)) { + high_si = INFINITY; + hprefix = " "; + } else { + high_si = ns_to_siprefix(high_bound_ns, &hprefix); + } + + return fprintf(stream, "%c%.3g%ss, %.3g%ss]", + low_bound_ns == 0 ? '[' : '(', low_si, lprefix, high_si, + hprefix); +} + +static void print_histbar(FILE *stream, __u64 count, __u64 max_count) +{ + int barlen = round((double)count / max_count * MAX_BAR_STRLEN); + + fprintf(stream, "|"); + print_nchars(stream, '@', barlen); + print_nchars(stream, ' ', MAX_BAR_STRLEN - barlen); + fprintf(stream, "|"); +} + +static void print_log2hist(FILE *stream, size_t n, const __u64 hist[n], + double multiplier) +{ + int bucket, start_bucket, end_bucket, max_bucket, len; + double low_bound, high_bound, avg; + __u64 count = 0; + char *prefix; + + start_bucket = find_first_nonzero_bucket(n - 1, hist); + end_bucket = find_last_nonzero_bucket(n - 1, hist); + max_bucket = find_largest_bucket(n - 1, hist); + + for (bucket = max(0, start_bucket); bucket <= end_bucket; bucket++) { + low_bound = pow(2, bucket - 1) * multiplier; + high_bound = pow(2, bucket) * multiplier; + + // First bucket includes 0 (i.e. [0, 1] rather than (0.5, 1]) + if (bucket == 0) + low_bound = 0; + // Last bucket includes all values too large for the second-last bucket + if (bucket == n - 2) + high_bound = INFINITY; + + len = print_bucket_interval(stream, low_bound, high_bound); + print_nchars(stream, ' ', + max(0, MAX_BUCKETSPAN_STRLEN - len) + 1); + fprintf(stream, "%*llu ", MAX_BUCKETCOUNT_STRLEN, hist[bucket]); + print_histbar(stream, hist[bucket], max_bucket); + fprintf(stream, "\n"); + + count += hist[bucket]; + } + + // Final "bucket" is the sum of all values in the histogram + if (count > 0) { + avg = ns_to_siprefix((double)hist[n - 1] / count, &prefix); + fprintf(stream, "count: %llu, average: %.2f%ss\n", count, avg, + prefix); + } else { + fprintf(stream, "count: %llu, average: -\n", count); + } +} + +static void merge_percpu_hist(size_t n, int ncpus, + const __u64 percpu_hist[n][ncpus], + __u64 merged_hist[n]) +{ + int idx, cpu; + + memset(merged_hist, 0, sizeof(__u64) * n); + + for (idx = 0; idx < n; idx++) { + for (cpu = 0; cpu < ncpus; cpu++) { + merged_hist[idx] += percpu_hist[idx][cpu]; + } + } +} + +static int fetch_hist_map(int map_fd, __u64 hist[HIST_NBUCKETS]) +{ + __u32 in_batch, out_batch, count = HIST_NBUCKETS; + int ncpus = libbpf_num_possible_cpus(); + __u32 idx, buckets_fetched = 0; + __u64 (*percpu_hist)[ncpus]; + __u32 *keys; + int err = 0; + + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, batch_opts, .flags = BPF_EXIST); + + percpu_hist = calloc(HIST_NBUCKETS, sizeof(*percpu_hist)); + keys = calloc(HIST_NBUCKETS, sizeof(*keys)); + if (!percpu_hist || !keys) { + err = -ENOMEM; + goto exit; + } + + while (buckets_fetched < HIST_NBUCKETS) { + err = bpf_map_lookup_batch(map_fd, + buckets_fetched > 0 ? &in_batch : NULL, + &out_batch, keys + buckets_fetched, + percpu_hist + buckets_fetched, &count, + &batch_opts); + if (err == -ENOENT) // All entries fetched + err = 0; + else if (err) + goto exit; + + // Verify keys match expected idx range + for (idx = buckets_fetched; idx < buckets_fetched + count; idx++) { + if (keys[idx] != idx) { + err = -EBADSLT; + goto exit; + } + } + + in_batch = out_batch; + buckets_fetched += count; + count = HIST_NBUCKETS - buckets_fetched; + } + + merge_percpu_hist(HIST_NBUCKETS, ncpus, percpu_hist, hist); + +exit: + free(percpu_hist); + free(keys); + return err; +} + +static int report_stats(const struct netstacklat_config *conf, + const struct netstacklat_bpf *obj) +{ + enum netstacklat_hook hook; + __u64 hist[HIST_NBUCKETS] = { 0 }; + time_t t; + int err; + + time(&t); + printf("%s", ctime(&t)); + + for (hook = 1; hook < NETSTACKLAT_N_HOOKS; hook++) { + if (!conf->enabled_hooks[hook]) + continue; + + printf("%s:\n", hook_to_str(hook)); + + err = fetch_hist_map(hook_to_histmap(hook, obj), hist); + if (err) + return err; + + print_log2hist(stdout, ARRAY_SIZE(hist), hist, 1); + printf("\n"); + } + fflush(stdout); + + return 0; +} + +static int enable_sw_rx_tstamps(void) +{ + int tstamp_opt = SOF_TIMESTAMPING_RX_SOFTWARE; + int sock_fd, err; + + sock_fd = socket(AF_INET, SOCK_DGRAM, 0); + if (sock_fd < 0) { + err = -errno; + fprintf(stderr, "Failed opening socket: %s\n", strerror(-err)); + return err; + } + + err = setsockopt(sock_fd, SOL_SOCKET, SO_TIMESTAMPING, &tstamp_opt, + sizeof(tstamp_opt)); + if (err) { + err = -errno; + fprintf(stderr, "Failed setting SO_TIMESTAMPING option: %s\n", + strerror(-err)); + goto err_socket; + } + + return 0; + +err_socket: + close(sock_fd); + return err; +} + +static __s64 get_tai_offset(void) +{ + struct ntptimeval ntpt; + + ntp_gettimex(&ntpt); + return ntpt.tai; +} + +static void set_programs_to_load(const struct netstacklat_config *conf, + struct netstacklat_bpf *obj) +{ + struct hook_prog_collection progs; + enum netstacklat_hook hook; + int i; + + for (hook = 1; hook < NETSTACKLAT_N_HOOKS; hook++) { + hook_to_progs(&progs, hook, obj); + + for (i = 0; i < progs.nprogs; i++) + bpf_program__set_autoload(progs.progs[i], + conf->enabled_hooks[hook]); + } +} + +static int init_signalfd(void) +{ + sigset_t mask; + int fd, err; + + sigemptyset(&mask); + sigaddset(&mask, SIGINT); + sigaddset(&mask, SIGTERM); + + fd = signalfd(-1, &mask, 0); + if (fd < 0) + return -errno; + + err = pthread_sigmask(SIG_BLOCK, &mask, NULL); + if (err) { + err = -errno; + close(fd); + return err; + } + + return fd; +} + +static int handle_signal(int sig_fd) +{ + struct signalfd_siginfo sig_info; + ssize_t size; + + size = read(sig_fd, &sig_info, sizeof(sig_info)); + if (size != sizeof(sig_info)) { + fprintf(stderr, "Failed reading signal fd\n"); + return -EBADFD; + } + + switch (sig_info.ssi_signo) { + case SIGINT: + case SIGTERM: + return NETSTACKLAT_ABORT; + default: + fprintf(stderr, "Unexpected signal: %d\n", sig_info.ssi_signo); + return -EBADR; + } +} + +static int setup_timer(__u64 interval_ns) +{ + struct itimerspec timercfg = { + .it_value = { .tv_sec = interval_ns / NS_PER_S, + .tv_nsec = interval_ns % NS_PER_S }, + .it_interval = { .tv_sec = interval_ns / NS_PER_S, + .tv_nsec = interval_ns % NS_PER_S } + }; + int fd, err; + + fd = timerfd_create(CLOCK_MONOTONIC, 0); + if (fd < 0) { + return -errno; + } + + err = timerfd_settime(fd, 0, &timercfg, NULL); + if (err) { + err = -errno; + close(fd); + return err; + } + + return fd; +} + +static int handle_timer(int timer_fd, const struct netstacklat_config *conf, + const struct netstacklat_bpf *obj) +{ + __u64 timer_exps; + ssize_t size; + + size = read(timer_fd, &timer_exps, sizeof(timer_exps)); + if (size != sizeof(timer_exps)) { + fprintf(stderr, "Failed reading timer fd\n"); + return -EBADFD; + } + + if (timer_exps == 0) + return 0; + if (timer_exps > 1) + fprintf(stderr, "Warning: Missed %llu reporting intervals\n", + timer_exps - 1); + + return report_stats(conf, obj); +} + +static int epoll_add_event(int epoll_fd, int fd, __u64 event_type, __u64 value) +{ + struct epoll_event ev = { + .events = EPOLLIN, + .data = { .u64 = event_type | value }, + }; + + if (value & ~NETSTACKLAT_EPOLL_MASK) + return -EINVAL; + + return epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev) ? -errno : 0; +} + +static int setup_epoll_instance(int sig_fd, int timer_fd) +{ + int epoll_fd, err = 0; + + epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (epoll_fd < 0) + return -errno; + + err = epoll_add_event(epoll_fd, sig_fd, NETSTACKLAT_EPOLL_SIG, sig_fd); + if (err) + goto err; + + err = epoll_add_event(epoll_fd, timer_fd, NETSTACKLAT_EPOLL_TIMER, + timer_fd); + if (err) + goto err; + + return epoll_fd; + +err: + close(epoll_fd); + return err; +} + +static int poll_events(int epoll_fd, const struct netstacklat_config *conf, + const struct netstacklat_bpf *obj) +{ + struct epoll_event events[MAX_EPOLL_EVENTS]; + int i, n, fd, err = 0; + __u64 epoll_type; + + n = epoll_wait(epoll_fd, events, MAX_EPOLL_EVENTS, 100); + if (n < 0) + return -errno; + + for (i = 0; i < n; i++) { + epoll_type = events[i].data.u64 & ~NETSTACKLAT_EPOLL_MASK; + fd = events[i].data.u64 & NETSTACKLAT_EPOLL_MASK; + + switch (epoll_type) { + case NETSTACKLAT_EPOLL_SIG: + err = handle_signal(fd); + break; + case NETSTACKLAT_EPOLL_TIMER: + err = handle_timer(fd, conf, obj); + break; + default: + fprintf(stderr, "Warning: unexpected epoll data: %lu\n", + events[i].data.u64); + break; + } + + if (err) + break; + } + + return err; +} + +static int init_pidfilter_map(const struct netstacklat_bpf *obj, + const struct netstacklat_config *conf) +{ + __u8 pid_ok_val = 1; + int map_fd, err; + __u32 i; + + map_fd = bpf_map__fd(obj->maps.netstack_pidfilter); + for (i = 0; i < conf->npids; i++) { + err = bpf_map_update_elem(map_fd, &conf->pids[i], &pid_ok_val, + 0); + if (err) + return err; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + int sig_fd, timer_fd, epoll_fd, sock_fd, err; + struct netstacklat_config config = { + .report_interval_s = 5, + }; + struct netstacklat_bpf *obj; + char errmsg[128]; + + err = parse_arguments(argc, argv, &config); + if (err) { + fprintf(stderr, "Failed parsing arguments: %s\n", + strerror(-err)); + return EXIT_FAILURE; + } + + sock_fd = enable_sw_rx_tstamps(); + if (sock_fd < 0) { + err = sock_fd; + fprintf(stderr, + "Failed enabling software RX timestamping: %s\n", + strerror(-err)); + return EXIT_FAILURE; + } + + obj = netstacklat_bpf__open(); + if (!obj) { + err = libbpf_get_error(obj); + libbpf_strerror(err, errmsg, sizeof(errmsg)); + fprintf(stderr, "Failed opening eBPF object file: %s\n", errmsg); + goto exit_sockfd; + } + + obj->rodata->TAI_OFFSET = get_tai_offset() * NS_PER_S; + obj->rodata->user_config = config.bpf_conf; + + set_programs_to_load(&config, obj); + + err = netstacklat_bpf__load(obj); + if (err) { + libbpf_strerror(err, errmsg, sizeof(errmsg)); + fprintf(stderr, "Failed loading eBPF programs: %s\n", errmsg); + goto exit_destroy_bpf; + } + + err = init_pidfilter_map(obj, &config); + if (err) { + libbpf_strerror(err, errmsg, sizeof(errmsg)); + fprintf(stderr, "Failed filling the pid filter map: %s\n", + errmsg); + goto exit_destroy_bpf; + } + + err = netstacklat_bpf__attach(obj); + if (err) { + libbpf_strerror(err, errmsg, sizeof(errmsg)); + fprintf(stderr, "Failed to attach eBPF programs: %s\n", errmsg); + goto exit_destroy_bpf; + } + + sig_fd = init_signalfd(); + if (sig_fd < 0) { + err = sig_fd; + fprintf(stderr, "Failed setting up signal handling: %s\n", + strerror(-err)); + goto exit_detach_bpf; + } + + timer_fd = setup_timer(config.report_interval_s * NS_PER_S); + if (timer_fd < 0) { + err = timer_fd; + fprintf(stderr, "Failed creating timer: %s\n", strerror(-err)); + goto exit_sigfd; + } + + epoll_fd = setup_epoll_instance(sig_fd, timer_fd); + if (epoll_fd < 0) { + err = epoll_fd; + fprintf(stderr, "Failed setting up epoll: %s\n", + strerror(-err)); + goto exit_timerfd; + } + + // Report stats until user shuts down program + while (true) { + err = poll_events(epoll_fd, &config, obj); + + if (err) { + if (err == NETSTACKLAT_ABORT) { + // Report stats a final time before terminating + err = report_stats(&config, obj); + } else { + libbpf_strerror(err, errmsg, sizeof(errmsg)); + fprintf(stderr, "Failed polling fds: %s\n", + errmsg); + } + break; + } + } + + // Cleanup + close(epoll_fd); +exit_timerfd: + close(timer_fd); +exit_sigfd: + close(sig_fd); +exit_detach_bpf: + netstacklat_bpf__detach(obj); +exit_destroy_bpf: + netstacklat_bpf__destroy(obj); +exit_sockfd: + close(sock_fd); + return err ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/netstacklat/netstacklat.h b/netstacklat/netstacklat.h new file mode 100644 index 00000000..bb0162a1 --- /dev/null +++ b/netstacklat/netstacklat.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef NETSTACKLAT_H +#define NETSTACKLAT_H + +#define HIST_MAX_LATENCY_SLOT 34 // 2^34 ns -> ~17s +/* + * MAX_LATENCY_SLOT + 1 buckets for hist, + 1 "bucket" for the "sum key" + * (https://github.com/cloudflare/ebpf_exporter?tab=readme-ov-file#sum-keys) + * that ebpf_exporter expects for exp2 hists (see how it's used in the + * increment_exp2_histogram_nosync() function) + */ +#define HIST_NBUCKETS (HIST_MAX_LATENCY_SLOT + 2) + +#define NS_PER_S 1000000000 + +// The highest possible PID on a Linux system (from /include/linux/threads.h) +#define PID_MAX_LIMIT (4 * 1024 * 1024) + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) +#endif + +#ifndef max +#define max(a, b) \ + ({ \ + typeof(a) _a = (a); \ + typeof(b) _b = (b); \ + _a > _b ? _a : _b; \ + }) +#endif + +enum netstacklat_hook { + NETSTACKLAT_HOOK_INVALID = 0, + NETSTACKLAT_HOOK_IP_RCV, + NETSTACKLAT_HOOK_TCP_START, + NETSTACKLAT_HOOK_UDP_START, + NETSTACKLAT_HOOK_TCP_SOCK_ENQUEUED, + NETSTACKLAT_HOOK_UDP_SOCK_ENQUEUED, + NETSTACKLAT_HOOK_TCP_SOCK_READ, + NETSTACKLAT_HOOK_UDP_SOCK_READ, + NETSTACKLAT_N_HOOKS, +}; + +struct netstacklat_bpf_config +{ + bool filter_pid; +}; + +#endif + diff --git a/netstacklat/netstacklat.yaml b/netstacklat/netstacklat.yaml new file mode 100644 index 00000000..2fb99530 --- /dev/null +++ b/netstacklat/netstacklat.yaml @@ -0,0 +1,79 @@ +metrics: + histograms: + - name: netstack_latency_ip_start_seconds + help: Time for packet to reach the start of the IP-stack + bucket_type: exp2 + bucket_min: 0 + bucket_max: 34 + bucket_multiplier: 0.000000001 # nanoseconds to seconds + labels: + - name: bucket + size: 4 + decoders: + - name: uint + - name: netstack_latency_tcp_start_seconds + help: Time for packet to reach the start of the TCP stack + bucket_type: exp2 + bucket_min: 0 + bucket_max: 34 + bucket_multiplier: 0.000000001 # nanoseconds to seconds + labels: + - name: bucket + size: 4 + decoders: + - name: uint + - name: netstack_latency_udp_start_seconds + help: Time until packet to reach the start of the UDP stack + bucket_type: exp2 + bucket_min: 0 + bucket_max: 34 + bucket_multiplier: 0.000000001 # nanoseconds to seconds + labels: + - name: bucket + size: 4 + decoders: + - name: uint + - name: netstack_latency_tcp_sock_enqueued_seconds + help: Time until packet is queued to TCP socket + bucket_type: exp2 + bucket_min: 0 + bucket_max: 34 + bucket_multiplier: 0.000000001 # nanoseconds to seconds + labels: + - name: bucket + size: 4 + decoders: + - name: uint + - name: netstack_latency_udp_sock_enqueued_seconds + help: Time until packet is queued to UDP socket + bucket_type: exp2 + bucket_min: 0 + bucket_max: 34 + bucket_multiplier: 0.000000001 # nanoseconds to seconds + labels: + - name: bucket + size: 4 + decoders: + - name: uint + - name: netstack_latency_tcp_sock_read_seconds + help: Time until packet data is read from TCP socket + bucket_type: exp2 + bucket_min: 0 + bucket_max: 34 + bucket_multiplier: 0.000000001 # nanoseconds to seconds + labels: + - name: bucket + size: 4 + decoders: + - name: uint + - name: netstack_latency_udp_sock_read_seconds + help: Time until packet data is read from UDP socket + bucket_type: exp2 + bucket_min: 0 + bucket_max: 34 + bucket_multiplier: 0.000000001 # nanoseconds to seconds + labels: + - name: bucket + size: 4 + decoders: + - name: uint