diff --git a/xdq-tester/Makefile b/xdq-tester/Makefile new file mode 100644 index 00000000..01f8fa44 --- /dev/null +++ b/xdq-tester/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +USER_TARGETS := xdq-tester +BPF_TARGETS := $(patsubst %.c,%,$(wildcard *.bpf.c)) + +USER_LIBS = -llua -ldl -lm + +LIB_DIR = ../lib + +include $(LIB_DIR)/common.mk diff --git a/xdq-tester/bpf_local_helpers.h b/xdq-tester/bpf_local_helpers.h new file mode 100644 index 00000000..4f875e66 --- /dev/null +++ b/xdq-tester/bpf_local_helpers.h @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#ifndef BPF_LOCAL_HELPERS_H_ +#define BPF_LOCAL_HELPERS_H_ + +#include "bpf_shared_data.h" + +#define EEXIST 17 /* File exists */ + +#define BPF_MAP_TYPE_PIFO_GENERIC 31 +#define BPF_MAP_TYPE_PIFO_XDP 32 + +/* + * bpf_packet_dequeue + * + * Dequeue the packet at the head of the PIFO in *map* and return a pointer + * to the packet (or NULL if the PIFO is empty). + * + * Returns + * On success, a pointer to the packet, or NULL if the PIFO is empty. The + * packet pointer must be freed using *bpf_packet_drop()* or returning + * the packet pointer. The *rank* pointer will be set to the rank of + * the dequeued packet on success, or a negative error code on error. + */ +static long (*bpf_packet_dequeue)(void *ctx, void *map, __u64 flags, __u64 *rank) = (void *) 208;; +static long (*bpf_packet_drop)(void *ctx, void *pkt) = (void *) 209; + +struct parsing_context { + void *data; // Start of eth hdr + void *data_end; // End of safe acessible area + void *meta; // Meta data + struct hdr_cursor nh; // Position to parse next + __u32 pkt_len; // Full packet length (headers+data) +}; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +static __always_inline void * +bpf_map_lookup_or_try_init(void *map, const void *key, const void *init) +{ + void *val; + long err; + + val = bpf_map_lookup_elem(map, key); + if (val) + return val; + + err = bpf_map_update_elem(map, key, init, BPF_NOEXIST); + if (err && err != -EEXIST) + return NULL; + + return bpf_map_lookup_elem(map, key); +} + +static __always_inline int bpf_max(__u64 left, __u64 right) +{ + return right > left ? right : left; +} + + +/* + * Maps an IPv4 address into an IPv6 address according to RFC 4291 sec 2.5.5.2 + */ +static void map_ipv4_to_ipv6(struct in6_addr *ipv6, __be32 ipv4) +{ + __builtin_memset(&ipv6->in6_u.u6_addr8[0], 0x00, 10); + __builtin_memset(&ipv6->in6_u.u6_addr8[10], 0xff, 2); + ipv6->in6_u.u6_addr32[3] = ipv4; +} + +/* + * Five-tuple helpers + */ + +/* This function currently only supports UDP packets */ +static __always_inline int parse_packet(struct parsing_context *pctx, struct packet_info *p_info) +{ + /* Parse Ethernet and IP/IPv6 headers */ + p_info->eth_type = parse_ethhdr(&pctx->nh, pctx->data_end, &p_info->eth); + if (p_info->eth_type == bpf_htons(ETH_P_IP)) { + p_info->ip_type = parse_iphdr(&pctx->nh, pctx->data_end, &p_info->iph); + if (p_info->ip_type < 0) + goto err; + p_info->nt.ipv = 4; + map_ipv4_to_ipv6(&p_info->nt.saddr.ip, p_info->iph->saddr); + map_ipv4_to_ipv6(&p_info->nt.daddr.ip, p_info->iph->daddr); + } else if (p_info->eth_type == bpf_htons(ETH_P_IPV6)) { + p_info->ip_type = parse_ip6hdr(&pctx->nh, pctx->data_end, &p_info->ip6h); + if (p_info->ip_type < 0) + goto err; + p_info->nt.ipv = 6; + p_info->nt.saddr.ip = p_info->ip6h->saddr; + p_info->nt.daddr.ip = p_info->ip6h->daddr; + } else { + goto err; + } + + /* Parse UDP header */ + if (p_info->ip_type != IPPROTO_UDP) + goto err; + if (parse_udphdr(&pctx->nh, pctx->data_end, &p_info->udph) < 0) + goto err; + + p_info->nt.proto = IPPROTO_UDP; + p_info->nt.saddr.port = p_info->udph->source; + p_info->nt.daddr.port = p_info->udph->dest; + + return 0; +err: + bpf_printk("Failed to parse UDP packet"); + return -1; +} + +#pragma GCC diagnostic pop + +#endif // BPF_LOCAL_HELPERS_H_ diff --git a/xdq-tester/bpf_shared_data.h b/xdq-tester/bpf_shared_data.h new file mode 100644 index 00000000..f0211384 --- /dev/null +++ b/xdq-tester/bpf_shared_data.h @@ -0,0 +1,52 @@ +#ifndef BPF_SHARED_DATA_H_ +#define BPF_SHARED_DATA_H_ + +#include "codel_impl.h" + +struct flow_address { + struct in6_addr ip; + __u16 port; + __u16 reserved; +}; + +struct network_tuple { + struct flow_address saddr; + struct flow_address daddr; + __u16 proto; + __u8 ipv; + __u8 reserved; +}; + +struct flow_state { + __u32 pkts; + __u32 root_finish_bytes; + __u32 finish_bytes; + __u16 root_weight; + __u16 weight; + __u32 persistent; + __u64 root_priority; +}; + +struct fq_codel_flow_state { + __u32 pkts; + __u32 finish_bytes; + __u32 total_bytes; + __u32 grace_period; + struct codel_state codel; +}; + +struct packet_info { + struct ethhdr *eth; + union { + struct iphdr *iph; + struct ipv6hdr *ip6h; + }; + union { + struct udphdr *udph; + }; + struct network_tuple nt; + int eth_type; + int ip_type; +}; + +#endif // BPF_SHARED_DATA_H_ diff --git a/xdq-tester/codel_impl.h b/xdq-tester/codel_impl.h new file mode 100644 index 00000000..ae6c70b0 --- /dev/null +++ b/xdq-tester/codel_impl.h @@ -0,0 +1,155 @@ +#include + +#ifndef __CODEL_IMPL_H +#define __CODEL_IMPL_H + +#ifndef CODEL_TARGET +#define CODEL_TARGET (10 * 1000 * 1000ULL) /* 10 ms in nanosec */ +#endif + +#ifndef CODEL_EXCEED_INTERVAL +#define CODEL_EXCEED_INTERVAL (100 * 1000 * 1000ULL) /* 100 ms in ns*/ +#endif + +/* Codel like dropping scheme, inspired by: + * - RFC: https://queue.acm.org/detail.cfm?id=2209336 + * - Code: https://queue.acm.org/appendices/codel.html + * - Kernel: include/net/codel_impl.h + */ +struct codel_state { + /* codel like dropping scheme */ + __u64 first_above_time; /* Time when above target (0 if below)*/ + __u64 drop_next; /* Time to drop next packet */ + __u32 count; /* Packets dropped since going into drop state */ + __u32 dropping; /* Equal to 1 if in drop state */ +}; + +/* Table lookup for square-root shifted 16 bit */ +static __always_inline __u32 get_sqrt_sh16(__u64 cnt) +{ + switch (cnt) { + case 1: return 65536; /* 65536 * sqrt(1) */ + case 2: return 92682; /* 65536 * sqrt(2) */ + case 3: return 113512; /* 65536 * sqrt(3) */ + case 4: return 131072; /* 65536 * sqrt(4) */ + case 5: return 146543; /* 65536 * sqrt(5) */ + case 6: return 160530; /* 65536 * sqrt(6) */ + case 7: return 173392; + case 8: return 185364; + case 9: return 196608; + case 10: return 207243; + case 11: return 217358; + case 12: return 227023; + case 13: return 236293; + case 14: return 245213; + case 15: return 253820; + case 16: return 262144; /* 100 ms / sqrt(16) = 25 ms */ + case 17: return 270212; + case 18: return 278046; + case 19: return 285664; + case 20: return 293086; + case 21: return 300324; + case 22: return 307391; + case 23: return 314300; + case 24: return 321060; + case 25: return 327680; /* 100 ms / sqrt(25) = 20 ms */ + case 26: return 334169; + case 27: return 340535; + case 28: return 346784; + case 29: return 352922; + case 30: return 358955; + case 31: return 364889; + case 32: return 370728; + case 33: return 376476; + case 34: return 382137; + case 35: return 387716; + case 36: return 393216; /* 100 / sqrt(36) = 16.66 ms */ + default: + return 463410; /* 65536*sqrt(50) => 100/sqrt(50) = 14.14 ms */ + } +} + +static __always_inline __u64 get_next_interval_sqrt(__u64 cnt) +{ + __u64 val = ((__u64)CODEL_EXCEED_INTERVAL << 16) / get_sqrt_sh16(cnt); + return val; +} + +static __always_inline __u64 +codel_control_law(__u64 t, __u64 cnt) +{ + return t + get_next_interval_sqrt(cnt); +} + +static __always_inline +bool codel_should_drop(struct codel_state *codel, __u64 t_queue_sz, __u64 now) +{ + __u64 interval = CODEL_EXCEED_INTERVAL; + + if (t_queue_sz < CODEL_TARGET) { + /* went below so we'll stay below for at least interval */ + codel->first_above_time = 0; + return false; + } + + if (codel->first_above_time == 0) { + /* just went above from below. If we stay above + * for at least interval we'll say it's ok to drop + */ + codel->first_above_time = now + interval; + return false; + } else if (now >= codel->first_above_time) { + return true; + } + return false; +} + +static __always_inline +bool codel_drop(struct codel_state *codel, __u64 t_queue_sz, __u64 now) +{ + __u64 interval = CODEL_EXCEED_INTERVAL; + + /* If horizon have been exceed for a while, inc drop intensity*/ + bool drop = codel_should_drop(codel, t_queue_sz, now); + + if (codel->dropping) { /* In dropping state */ + if (!drop) { + /* time below target - leave dropping state */ + codel->dropping = false; + return false; + } else if (now >= codel->drop_next) { + /* It's time for the next drop. Drop the current + * packet. Schedule the next drop + */ + codel->count += 1; + // schedule the next drop. + codel->drop_next = + codel_control_law(codel->drop_next, codel->count); + return true; + } + } else if (drop && + ((now - codel->drop_next < interval) || + (now - codel->first_above_time >= interval))) { + /* If we get here, then we're not in dropping state. + * Decide whether it's time to enter dropping state. + */ + __u32 count = codel->count; + + codel->dropping = true; + + /* If we're in a drop cycle, drop rate that controlled queue + * on the last cycle is a good starting point to control it now. + */ + if (now - codel->drop_next < interval) + count = count > 2 ? (count - 2) : 1; + else + count = 1; + + codel->count = count; + codel->drop_next = codel_control_law(now, count); + return true; + } + return false; +} + +#endif /* __CODEL_IMPL_H */ diff --git a/xdq-tester/config.lua b/xdq-tester/config.lua new file mode 100644 index 00000000..5dce82dd --- /dev/null +++ b/xdq-tester/config.lua @@ -0,0 +1,66 @@ +IPPROTO_UDP = 17 +ETH_P_IPV6 = 0x86dd + +XDP_ABORTED = 0 +XDP_DROP = 1 +XDP_PASS = 2 +XDP_TX = 3 +XDP_REDIRECT = 4 + +config = { + bpf = { + file = "./sched_fifo.bpf.o", + xdp_func = "enqueue_prog", + dequeue_func = "dequeue_prog", + }, + + defaultUdp = { + eth = { + proto = ETH_P_IPV6, + source = "01:00:00:00:00:01", + dest = "01:00:00:00:00:02", + next_sequence = nil + }, + + ip = { + priority = 0, + version = 6, + flow_lbl = { 0, 0, 0 }, + -- payload_len = , + nexthdr = IPPROTO_UDP, + hop_limit = 1, + saddr = "fe80::1", + daddr = "fe80::2", + next_sequence = nil, + }, + + udp = { + source = 1, + dest = 1, + -- len = , + -- check = , + payload = "", + next_sequence = udp_payload_enumerator, + } + } +} + +-- Monitor config.bpf for changes +local _config_bpf = config.bpf +config.bpf = {} -- create proxy table +local config_bpf_mt = { + __index = function (_, k) + if k == "file" then + load_xdq_file(_config_bpf[k]) + end + return _config_bpf[k] + end, + + __newindex = function (_, k, v) + if k == "file" then + load_xdq_file(v) + end + _config_bpf[k] = v + end +} +setmetatable(config.bpf, config_bpf_mt) diff --git a/xdq-tester/fifo.lua b/xdq-tester/fifo.lua new file mode 100644 index 00000000..4f36e469 --- /dev/null +++ b/xdq-tester/fifo.lua @@ -0,0 +1,25 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +-- FIFO scheduler +config.bpf.file = "./sched_fifo.bpf.o" + +-- Setup flows +flow1 = Udp:new() +flow1.udp.dest = 8080 + +flow2 = Udp:new() +flow2.udp.dest = 8081 + +flow3 = Udp:new() +flow3.udp.dest = 8082 + + +-- Test scheduler +enqueue(flow1, 1) +enqueue(flow2, 1) +enqueue(flow3, 1) + +dequeue_cmp(flow1, 1) +dequeue_cmp(flow2, 1) +dequeue_cmp(flow3, 1) diff --git a/xdq-tester/fq_codel.lua b/xdq-tester/fq_codel.lua new file mode 100644 index 00000000..6d553e19 --- /dev/null +++ b/xdq-tester/fq_codel.lua @@ -0,0 +1,181 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +-- Fair Queuing with Controlled Delay (FQ_CoDel) +config.bpf.file = "./sched_fq_codel.bpf.o" + + +-- Setup flows +-- We use this flow to test sparse flow handling +adjust_meta(8) + +flow = Udp:new() +flow.udp.dest = 8000 + +-- The background stream flow increments the time bytes +-- so that we can test our sparse flow tester when time has passed +bg_flow = Udp:new() +bg_flow.udp.dest = 8001 +-- Make the packet the size of full a quantom (1514 - 62) -- 1514 +bg_flow.udp.payload = create_payload(1452) + +set_time_ns(1000) + +-- Test scheduler + +-- +-- 1. Sparse flow tests +-- +-- In our implementation of FQ-CoDel, the time_bytes variable is the only thing +-- that connects sparse flows. Therefore, we can test all possible scenarios +-- using only two flows. One background flow that we only use to advance time. +-- And the flow that we use for testing. + +function make_sparse(flow) + -- The background flow needs two packets to be a stream: + -- * The first packet will be sparse. + -- * The second packet exceeds the sparse quantom. + flow.udp.payload = create_payload(1444) + enqueue(flow, 10001) -- Sparse + enqueue(flow, 10002) -- Stream + dequeue_cmp(flow, 10001) -- Dequeue sparse + dequeue_cmp(flow, 10002) -- Dequeue sparse + -- Note that the time_bytes has not advanced at this point but will after the + -- next dequeued packet. +end + +-- 1.1 Test when a sparse flow ends while sparse +function fq_codel_sparse_test1() + -- This test does the following: + -- 1. Creates a sparse flow with a couple of packets. + -- 2. Advance time_bytes and expire the sparse flow. + -- 3. Creates a new sparse flow with a couple of packets. + -- 4. Advance time_bytes and expire the new sparse flow. + -- In steps two and four the test confirms that the sparse flows + -- were still sparse. + make_sparse(bg_flow) + + -- Prime the background stream so it can update the time_bytes variable later. + enqueue(bg_flow, 11) -- Prime for updating time_bytes + enqueue(bg_flow, 12) -- Make sure the flow is not recycled after update + + -- Make the packet the size of half a quantom (1522/2 - 62) + -- The flow will cease being a sparse flow after two packets. + flow.udp.payload = create_payload(695) + + -- The sparse flow gets a full quantom of packets. + enqueue(flow, 11) -- Sparse 1 + enqueue(flow, 12) -- Sparse 2 + + -- Remove all sparse packets. + dequeue_cmp(flow, 11) -- Dequeue sparse + dequeue_cmp(flow, 12) -- Dequeue sparse + + -- Advance time_bytes + dequeue_cmp(bg_flow, 11) -- Advances time_bytes one quantom + -- Our FQ-CoDel algorithm should have expired the flow + -- flow at this point, but not the background stream. + + -- Test that the flow is indeed expired. + enqueue(flow, 13) -- Add sparse packet with a higher priority + dequeue_cmp(flow, 13) -- Dequeue the sparse packet + dequeue_cmp(bg_flow, 12) -- Advances time_bytes one quantom + -- Our FQ-CoDel algorithm should have expired both the flow + -- flow and the background stream at this point. +end + +-- 1.2 Test a sparse flow when the time_bytes advances while the flow is sparse +function fq_codel_sparse_test2() + -- This test does the following: + -- 1. Creates a sparse flow with a couple of packets. + -- 2. Advances time_bytes by a half a quantom + -- 3. Adds a couple of packets to the sparse flow. + -- In steps one and three the test confirms that the sparse flow + -- is still sparse. + make_sparse(bg_flow) + + -- Make the packet the size of half a quantom (1522/2 - 62) + bg_flow.udp.payload = create_payload(691) + + -- Make each packet 50 bytes for our sparse flow + flow.udp.payload = create_payload(30) + + -- Keep in mind that the last background packet ends at a full quantom. Therefore, + -- if we want to update the time_bytes by a half a quantom, we will need to enqueue + -- and deqeueu a half a quantom packet. + enqueue(bg_flow, 21) -- Used to advance time_bytes by half a quantom + enqueue(bg_flow, 22) -- Used to advance time_bytes by half a quantom + enqueue(bg_flow, 23) -- Make sure the flow is not recycled after update + dequeue_cmp(bg_flow, 21) -- Advances time_bytes by a half a quantom + + -- Confirm that the sparse flow has a higher priority than the background stream. + enqueue(flow, 21) -- Add a sparse packet + enqueue(flow, 22) -- Add a sparse packet + dequeue_cmp(flow, 21) -- Dequeue the sparse packet + dequeue_cmp(flow, 22) -- Dequeue the sparse packet + + dequeue_cmp(bg_flow, 22) -- Advances time_bytes by a half a quantom + + -- Confirm that the sparse flow has a higher priority than the stream. + enqueue(flow, 23) -- Add a sparse packet + enqueue(flow, 24) -- Add a sparse packet + dequeue_cmp(flow, 23) -- Dequeue the sparse packet + dequeue_cmp(flow, 24) -- Dequeue the sparse packet + + -- Recycle both flows. + dequeue_cmp(bg_flow, 23) -- Recycle both flows +end + +-- 1.3 Test a flow that becomes a stream. +function fq_codel_sparse_test3() + -- This test does the following: + -- 1. Creates a sparse flow and adds a full quantom to it. + -- 2. Adds packets to the flow to make it a stream. + -- 3. Advances time_bytes by a half a quantom. + -- 4. Adds packets to the stream + -- In steps two and four the test confirms that the flow is a stream. + make_sparse(bg_flow) + + -- Make the packet the size of half a quantom (1514/2 - 62) + flow.udp.payload = create_payload(695) + + -- Make the packet the size of half a quantom (1514/2 - 62) + bg_flow.udp.payload = create_payload(695) + + -- Keep in mind that the last background packet ends at a full quantom. Therefore, + -- if we want to update the time_bytes by a half a quantom, we will need to enqueue + -- and deqeueu a half a quantom packet. + enqueue(bg_flow, 31) -- Used to advance time_bytes by half a quantom + enqueue(bg_flow, 32) -- Used to advance time_bytes by half a quantom + enqueue(bg_flow, 33) -- Make sure the flow is not recycled after update + dequeue_cmp(bg_flow, 31) -- Advances time_bytes by a half a quantom + + -- Make the flow flow a stream. + enqueue(flow, 31) -- Add sparse packet + enqueue(flow, 32) -- Add sparse packet + enqueue(flow, 33) -- Make the flow a stream + enqueue(flow, 34) -- Add stream packet + + -- Dequeue the sparse flow packets. + dequeue_cmp(flow, 31) + dequeue_cmp(flow, 32) + + -- Confirm that both flows are streams with equal priority. + dequeue_cmp(flow, 33) + dequeue_cmp(bg_flow, 32) + dequeue_cmp(flow, 34) + dequeue_cmp(bg_flow, 33) +end + +-- +function fq_codel_codel_test1() + -- Not inplemented +end + + +-- Run tests +-- fq_codel_sparse_test1() +-- fq_codel_sparse_test2() +fq_codel_sparse_test3() +-- +-- fq_codel_codel_test1() diff --git a/xdq-tester/hpfq.lua b/xdq-tester/hpfq.lua new file mode 100644 index 00000000..ed591e25 --- /dev/null +++ b/xdq-tester/hpfq.lua @@ -0,0 +1,42 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +-- Hierarchical Packet Fair Queueing (HPFQ) +config.bpf.file = "./sched_hpfq.bpf.o" + +-- Create flows +flow1 = Udp:new() +flow1.udp.dest = 4000 + +flow2 = Udp:new() +flow2.udp.dest = 8001 + +flow3 = Udp:new() +flow3.udp.dest = 8002 + + +function hpfq_test1() + enqueue(flow1, 1) + enqueue(flow2, 1) + enqueue(flow3, 1) + + dequeue_cmp(flow3, 1) + dequeue_cmp(flow2, 1) + dequeue_cmp(flow1, 1) +end + +function hpfq_debug() + enqueue(flow1, 1) + enqueue(flow1, 2) + dequeue_cmp(flow1, 1) + dequeue_cmp(flow1, 2) + + enqueue(flow1, 2) + enqueue(flow1, 3) + dequeue_cmp(flow1, 2) + dequeue_cmp(flow1, 3) +end + +-- hpfq_test1() + +hpfq_debug() diff --git a/xdq-tester/lib.lua b/xdq-tester/lib.lua new file mode 100644 index 00000000..3f276215 --- /dev/null +++ b/xdq-tester/lib.lua @@ -0,0 +1,348 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +xdq = { + total_queued = 0, + total_dequeued = 0, + currently_queued = 0, +} + +function table_has_key(table, key) + return table[key] ~= nil +end + +function hex_dump(buf) + return buf:gsub('.', function (c) return string.format('%02x ', string.byte(c)) end) +end + +function trim_hex(hex) + local result = hex + local max_length = 16 * 3 -- Each byte is two hex characters followed by a space + local length = #hex + result = result:gsub(' +$', '') + if (#result > max_length) then + result = result:sub(1, max_length) + result = result .. '... (' .. string.format('%d', length) .. ' bytes)' + end + return result +end + +function compare_eth(cmp_eth, eth) + if type(cmp_eth.proto) ~= "number" then + fail("comparison eth.proto must be a number") + end + if type(eth.proto) ~= "number" then + fail("dequeued eth.proto must be a number") + end + if cmp_eth.proto ~= eth.proto then + fail(string.format("expected eth.proto: 0x%x, but found 0x%x", cmp_eth.proto, eth.proto)); + end + + if type(cmp_eth.source) ~= "string" then + fail("comparison eth.source must be a string") + end + if type(eth.source) ~= "string" then + fail("dequeued eth.source must be a string") + end + if cmp_eth.source ~= eth.source then + fail(string.format("expected eth.source: %s, but found %s", cmp_eth.source, eth.source)); + end + + if type(cmp_eth.dest) ~= "string" then + fail("comparison eth.dest must be a string") + end + if type(eth.dest) ~= "string" then + fail("dequeued eth.dest must be a string") + end + if cmp_eth.dest ~= eth.dest then + fail(string.format("expected eth.dest: %s, but found %s", cmp_eth.dest, eth.dest)); + end +end + +function compare_ip(cmp_ip, ip) + local cmp_ip_saddr + local ip_saddr + local cmp_ip_daddr + local ip_daddr + + if type(cmp_ip.priority) ~= "number" then + fail("comparison ip.priority must be a number") + end + if type(ip.priority) ~= "number" then + fail("dequeued ip.priority must be a number") + end + if cmp_ip.priority ~= ip.priority then + fail(string.format("expected ip.priority: %d, but found %d", cmp_ip.priority, ip.priority)); + end + + if type(cmp_ip.version) ~= "number" then + fail("comparison ip.version must be a number") + end + if type(ip.version) ~= "number" then + fail("dequeued ip.version must be a number") + end + if cmp_ip.version ~= ip.version then + fail(string.format("expected ip.version: %d, but found %d", cmp_ip.version, ip.version)); + end + + if type(cmp_ip.flow_lbl) ~= "table" then + fail("comparison ip.flow_lbl not a table") + end + if type(ip.flow_lbl) ~= "table" then + fail("dequeue ip.flow_lbl not a table") + end + for i = 1, 3, 1 do + if type(cmp_ip.flow_lbl[i]) ~= "number" then + fail(string.format("comparison ip.flow_lbl[%d] must be a number", i)) + end + if type(ip.flow_lbl[i]) ~= "number" then + fail(string.format("dequeued ip.flow_lbl[%d] must be a number", i)) + end + if cmp_ip.flow_lbl[i] ~= ip.flow_lbl[i] then + fail(string.format("expected ip.flow_lbl[%d]: %d, but found %d", i, cmp_ip.flow_lbl[i], ip.flow_lbl[i])); + end + end + + -- TODO: Add function that calculates the payload_len in lua + -- if type(cmp_ip.payload_len) ~= "number" then + -- fail("comparison ip.payload_len must be a number") + -- end + -- if type(ip.payload_len) ~= "number" then + -- fail("dequeued ip.payload_len must be a number") + -- end + -- if cmp_ip.payload_len ~= ip.payload_len then + -- fail(string.format("expected ip.payload_len: %s, but found %s", cmp_ip.payload_len, ip.payload_len)); + -- end + + if type(cmp_ip.nexthdr) ~= "number" then + fail("comparison ip.nexthdr must be a number") + end + if type(ip.nexthdr) ~= "number" then + fail("dequeued ip.nexthdr must be a number") + end + if cmp_ip.nexthdr ~= ip.nexthdr then + fail(string.format("expected ip.nexthdr: %d, but found %d", cmp_ip.nexthdr, ip.nexthdr)); + end + + if type(cmp_ip.hop_limit) ~= "number" then + fail("comparison ip.hop_limit must be a number") + end + if type(ip.hop_limit) ~= "number" then + fail("dequeued ip.hop_limit must be a number") + end + if cmp_ip.hop_limit ~= ip.hop_limit then + fail(string.format("expected ip.hop_limit: %d, but found %d", cmp_ip.hop_limit, ip.hop_limit)); + end + + if type(cmp_ip.saddr) ~= "string" then + fail("comparison ip.saddr must be a string") + end + if type(ip.saddr) ~= "string" then + fail("dequeued ip.saddr must be a string") + end + cmp_ip_saddr = normalize_ipv6_address(cmp_ip.saddr) + ip_saddr = normalize_ipv6_address(ip.saddr) + if cmp_ip_saddr ~= ip_saddr then + fail(string.format("expected ip.saddr: %s, but found %s", cmp_ip_saddr, ip_saddr)); + end + + if type(cmp_ip.daddr) ~= "string" then + fail("comparison ip.daddr must be a string") + end + if type(ip.daddr) ~= "string" then + fail("dequeued ip.daddr must be a string") + end + cmp_ip_daddr = normalize_ipv6_address(cmp_ip.daddr) + ip_daddr = normalize_ipv6_address(ip.daddr) + if cmp_ip_daddr ~= ip_daddr then + fail(string.format("expected ip.daddr: %s, but found %s", cmp_ip_daddr, ip_daddr)); + end +end + +function compare_udp(cmp_udp, udp) + if type(cmp_udp.source) ~= "number" then + fail("comparison udp.source must be a number") + end + if type(udp.source) ~= "number" then + fail("dequeued udp.source must be a number") + end + if cmp_udp.source ~= udp.source then + fail(string.format("expected udp.source: %d, but found %d", cmp_udp.source, udp.source)); + end + + if type(cmp_udp.dest) ~= "number" then + fail("comparison udp.dest must be a number") + end + if type(udp.dest) ~= "number" then + fail("dequeued udp.dest must be a number") + end + if cmp_udp.dest ~= udp.dest then + fail(string.format("expected udp.dest: %d, but found %d", cmp_udp.dest, udp.dest)); + end + + -- TODO: Add len when missing + -- if type(cmp_udp.len) ~= "number" then + -- fail("comparison udp.len must be a number") + -- end + -- if type(udp.len) ~= "number" then + -- fail("dequeued udp.len must be a number") + -- end + -- if cmp_udp.len ~= udp.len then + -- fail(string.format("expected udp.len: %d, but found %d", cmp_udp.sourc, udp.len)); + + -- TODO: Add lua function that creates UDP checksum + -- if type(cmp_udp.check) ~= "number" then + -- fail("comparison udp.check must be a number") + -- end + -- if type(udp.check) ~= "number" then + -- fail("dequeued udp.check must be a number") + -- end + -- if cmp_udp.check ~= udp.check then + -- fail(string.format("expected udp.check: %d, but found %d", cmp_udp.sourc, udp.check)); + + if type(cmp_udp.payload) ~= "string" then + fail("comparison udp.payload must be a string") + end + if type(udp.payload) ~= "string" then + fail("dequeued udp.payload must be a string") + end + if cmp_udp.payload ~= udp.payload then + local cmp_payload, _ = trim_hex(hex_dump(cmp_udp.payload)) + local payload, _ = trim_hex(hex_dump(udp.payload)) + fail(string.format("expected udp.payload: %s,\n" .. + " but found: %s", cmp_payload, payload)); + end +end + +function update_sequence(packet, packet_nr) + for _, v in pairs(packet) do + if type(v) == "table" then + if type(v.next_sequence) == "function" then + v.next_sequence(packet, packet_nr) + end + end + end +end + +function enqueue(packet, packet_nr) + update_sequence(packet, packet_nr) + return xdq_enqueue(packet) +end + +function dequeue() + return xdq_dequeue() +end + +function cmp(packet, cmp_packet, packet_nr) + if type(cmp_packet) ~= "table" then + fail("parameter not a table") + end + if type(packet) ~= "table" then + fail("dequeue failed") + end + + if type(packet.eth) ~= "table" then + fail("comparision packet missing eth table") + end + if type(packet.eth) ~= "table" then + fail("dequeued packet missing eth table") + end + compare_eth(cmp_packet.eth, packet.eth) + if cmp_packet.eth.proto == ETH_P_IPV6 then + if type(cmp_packet.ip) ~= "table" then + fail("comparision packet missing ip table") + end + if type(packet.ip) ~= "table" then + fail("dequeued packet missing ip table") + end + compare_ip(cmp_packet.ip, packet.ip) + protocol = cmp_packet.ip.nexthdr + end + if protocol == IPPROTO_UDP then + if type(cmp_packet.udp) ~= "table" then + fail("comparision packet missing udp table") + end + if type(packet.udp) ~= "table" then + fail("dequeued packet missing udp table") + end + update_sequence(cmp_packet, packet_nr) + compare_udp(cmp_packet.udp, packet.udp) + end +end + +function dequeue_cmp(cmp_packet, packet_nr) + local packet, retval = dequeue() + cmp(packet, cmp_packet, packet_nr) + return packet, retval +end + +function udp_payload_enumerator(packet, packet_nr) + local payload + local nr_str + if type(packet_nr) ~= "number" then + fail("The packet sequence number must be a number") + end + if type(packet.udp.payload) ~= 'string' then + fail("'packet.udp.payload' must be a string") + end + payload = packet.udp.payload + nr_str = string.pack("L", packet_nr) + if (#payload > 0 and #payload < #nr_str) then + print("Warning: payload is smaller than the udp enumerator") + end + packet.udp.payload = nr_str .. payload:sub(#nr_str + 1) +end + +function create_payload(len) + if type(len) ~= 'number' then + fail("parameter must be a number") + end + if len < 0 then + fail("length parameter can't be a negative value") + end + return string.rep("A", len) +end + +function dump_rep(o, rep) + if type(o) == 'table' then + local indent = string.rep(" ", rep) + local s = '{\n' + for k, v in pairs(o) do + if k == 'payload' then + v = trim_hex(hex_dump(v)) + end + k = '"' .. k .. '"' + s = s .. indent .. k .. ': ' .. dump_rep(v, rep + 1) .. ',\n' + end + indent = string.rep(' ', rep - 1) + s = s:gsub(',\n$', '\n') + return s .. indent .. '}' + else + if type(o) ~= 'number' then + o = '"' .. tostring(o) .. '"' + end + return tostring(o) + end +end + +function dump(o) + return dump_rep(o, 1) +end + +function copy(obj) + if type(obj) ~= 'table' then + return obj + end + local res = {} + for k, v in pairs(obj) do + res[copy(k)] = copy(v) + end + return res +end + +Udp = { +} + +function Udp:new() + return copy(config.defaultUdp) +end diff --git a/xdq-tester/sched_fifo.bpf.c b/xdq-tester/sched_fifo.bpf.c new file mode 100644 index 00000000..e37239ef --- /dev/null +++ b/xdq-tester/sched_fifo.bpf.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include + +#include "bpf_local_helpers.h" + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 2000); + __uint(map_extra, 4096); /* range */ +} pifo_map SEC(".maps"); + + +/* Simple FIFO */ +SEC("xdp") +int enqueue_prog(struct xdp_md *xdp) +{ + void *data = (void *)(long)xdp->data; + void *data_end = (void *)(long)xdp->data_end; + struct ethhdr *eth = data; + + if (eth + 1 > data_end) + return XDP_DROP; + + return bpf_redirect_map(&pifo_map, 0, 0); +} + +SEC("dequeue") +void *dequeue_prog(struct dequeue_ctx *ctx) +{ + __u64 prio = 0; + void *pkt = (void *) bpf_packet_dequeue(ctx, &pifo_map, 0, &prio); + if (!pkt) + return 0; + + return pkt; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdq-tester/sched_fq_codel.bpf.c b/xdq-tester/sched_fq_codel.bpf.c new file mode 100644 index 00000000..ec35d776 --- /dev/null +++ b/xdq-tester/sched_fq_codel.bpf.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include +#include + +#include "bpf_local_helpers.h" + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 8388608); /* range - 1024×4098×2 */ +} pifo_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct network_tuple); + __type(value, struct fq_codel_flow_state); + __uint(max_entries, 16384); +} flow_states SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u64); + __uint(max_entries, 1); +} xdq_time_ns SEC(".maps"); + +const __u32 quantum = 1514; +__u64 time_bytes = quantum; + +struct xdq_meta { + __u64 time_ns; +} __attribute__((aligned(4))) __attribute__((packed)); + +static __always_inline __u64 get_time_ns() +{ +#ifdef XDQ_LIVE + return bpf_ktime_get_boot_ns(); +#else + __u32 key = 0; + __u64 *val = bpf_map_lookup_elem(&xdq_time_ns, &key); + if (!val) { + return 0; + } + return *val; +#endif +} + +static __always_inline int xdq_meta_add(struct xdp_md *ctx) +{ + struct xdq_meta *meta; + void *data; + int err; + + /* Reserve space in-front of data pointer for our meta info. + * (Notice drivers not supporting data_meta will fail here!) + */ + err = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta)); + if (err) { + bpf_printk("Frey: Failed to add meta data section"); + return -1; + } + + /* Notice: Kernel-side verifier requires that loading of + * ctx->data MUST happen _after_ helper bpf_xdp_adjust_meta(), + * as pkt-data pointers are invalidated. Helpers that require + * this are determined/marked by bpf_helper_changes_pkt_data() + */ + data = (void *)(unsigned long)ctx->data; + + meta = (void *)(unsigned long)ctx->data_meta; + if (meta + 1 > data) /* Verify meta area is accessible */ + return -2; + + meta->time_ns = get_time_ns(); + return 0; +} + +static __always_inline int schedule_packet(struct parsing_context *pctx) +{ + struct packet_info p_info = {}; + struct network_tuple nt = {0}; + struct fq_codel_flow_state *flow; + struct fq_codel_flow_state new_flow = {0}; + __u32 prio = 0; + + /* Get flow */ + if (parse_packet(pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + flow = bpf_map_lookup_or_try_init(&flow_states, &nt, &new_flow); + if (!flow) + goto err; + + prio = flow->finish_bytes; + + /* Handle Sparse flows */ + if (flow->pkts == 0 && time_bytes >= flow->grace_period) { // New flow + flow->total_bytes = 0; + prio = time_bytes - quantum; + } + + flow->pkts++; + flow->finish_bytes = prio + pctx->pkt_len; + flow->total_bytes += pctx->pkt_len; + flow->grace_period = time_bytes + quantum; + + if (bpf_map_update_elem(&flow_states, &nt, flow, BPF_ANY)) + goto err; + + return bpf_redirect_map(&pifo_map, prio, 0); +err: + bpf_printk("XDP DROP"); + return XDP_DROP; +} + +/* Weighted fair queueing (WFQ) */ +SEC("xdp") +int enqueue_prog(struct xdp_md *xdp) +{ + if (xdq_meta_add(xdp) < 0) { + return XDP_ABORTED; + } + + struct parsing_context pctx = { + .data = (void *)(long)xdp->data, + .data_end = (void *)(long)xdp->data_end, + .meta = (void *)(long)xdp->data_meta, + .pkt_len = (xdp->data_end - xdp->data) & 0xffff, + .nh = { .pos = (void *)(long)xdp->data }, + }; + return schedule_packet(&pctx); +} + + +SEC("dequeue") +void *dequeue_prog(struct dequeue_ctx *ctx) +{ + struct parsing_context pctx; + struct packet_info p_info = {0}; + struct network_tuple nt; + struct fq_codel_flow_state *flow; + struct xdq_meta *meta; + __u64 now; + __u64 sojourn_time; + __u64 prio = 0; + + struct xdp_md *pkt = NULL; + + pkt = (void *) bpf_packet_dequeue(ctx, &pifo_map, 0, &prio); + if (!pkt) { + bpf_printk("Frey: No packet in PIFO"); + goto err; + } + + pctx.data = (void *)(long) pkt->data; + pctx.data_end = (void *)(long) pkt->data_end; + pctx.nh.pos = (void *)(long) pkt->data; + pctx.meta = (void *)(long) pkt->data_meta; + + /* Get flow */ + if (parse_packet(&pctx, &p_info) < 0) { + bpf_printk("Frey: Parse failed"); + goto err; + } + + nt = p_info.nt; + + flow = bpf_map_lookup_elem(&flow_states, &nt); + if (!flow) { + bpf_printk("Frey: Failed to lookup flow"); + goto err; + } + + flow->pkts--; + + if (prio > time_bytes) + time_bytes = prio; + + meta = (struct xdq_meta *) pctx.meta; + if (meta + 1 > pctx.data) { /* Verify meta area is accessible */ + bpf_printk("Frey: Failed to lookup metadata"); + goto err; + } + now = get_time_ns(); + sojourn_time = now - meta->time_ns; + if (codel_drop(&flow->codel, sojourn_time, now)) { + bpf_printk("Frey: Codel dropped packet!"); + goto err; + } + + bpf_printk("DEQUEUE: port: %d <- prio: %6d <- time_bytes: %6d <- pkt: %6d <- tot: %6d", (int) bpf_ntohs(nt.daddr.port), prio, time_bytes, flow->pkts, flow->total_bytes); + return pkt; +err: + if (pkt) + bpf_packet_drop(ctx, pkt); + bpf_printk("DEQUEUE packet failed"); + return NULL; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdq-tester/sched_hpfq.bpf.c b/xdq-tester/sched_hpfq.bpf.c new file mode 100644 index 00000000..7f984765 --- /dev/null +++ b/xdq-tester/sched_hpfq.bpf.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include + +#include "bpf_local_helpers.h" + +/* This code determines root WFQ scheduling using UDP ports. It would be nicer + in the future to use VLANs instead. + All UDP ports up to 4000 go to the left PIFO, and the other ports go to the + right PIFO. */ + +enum leaf_pifo { + NO_PIFO = 0, + LEFT_PIFO, + RIGHT_PIFO +}; + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_GENERIC); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 8388608); /* range - 1024×4098×2 */ +} root_pifo_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 8388608); /* range - 1024×4098×2 */ +} left_pifo_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 8388608); /* range - 1024×4098×2 */ +} right_pifo_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct network_tuple); + __type(value, struct flow_state); + __uint(max_entries, 16384); +} flow_states SEC(".maps"); + +__u64 root_time_bytes = 0; +__u64 left_time_bytes = 0; +__u64 right_time_bytes = 0; + +__u16 default_root_weight = 256; +__u16 default_weight = 256; + +static __always_inline int set_root_flow_priority(struct parsing_context *pctx, + struct flow_state *flow) +{ + __u64 root_start_time_bytes = bpf_max(root_time_bytes, flow->root_finish_bytes); + flow->root_finish_bytes = root_start_time_bytes + (pctx->pkt_len * flow->root_weight >> 8); + return root_start_time_bytes & ((1UL << 60) - 1); // Priority only defined in the lower 60 bits +} + +static __always_inline int set_leaf_flow_priority(struct parsing_context *pctx, + struct flow_state *flow, + __u64 leaf_time_bytes) +{ + __u64 leaf_start_time_bytes = bpf_max(leaf_time_bytes, flow->finish_bytes); + flow->finish_bytes = leaf_start_time_bytes + (pctx->pkt_len * flow->weight >> 8); + return leaf_start_time_bytes; +} + +static __always_inline int schedule_packet(struct parsing_context *pctx) +{ + struct packet_info p_info = {}; + + struct network_tuple nt = {0}; + __u32 leaf_id; + + struct flow_state new_flow = {0}; + struct flow_state *flow; + + __u64 root_prio; + __u32 left_prio; + __u32 right_prio; + + new_flow.root_weight = default_root_weight; + new_flow.weight = default_weight; + + /* Get flow */ + if (parse_packet(pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + leaf_id = (bpf_ntohs(p_info.udph->dest) <= 4000) ? LEFT_PIFO : RIGHT_PIFO; + + flow = bpf_map_lookup_or_try_init(&flow_states, &nt, &new_flow); + if (!flow) + goto err; + flow->pkts++; + + /* Calculate scheduling priority */ + // Root WFQ + root_prio = set_root_flow_priority(pctx, flow); + if (bpf_map_push_elem(&root_pifo_map, &leaf_id, root_prio)) + goto err; + flow->root_priority = root_prio; + + // Leaf WFQ + if (leaf_id == LEFT_PIFO) { + left_prio = set_leaf_flow_priority(pctx, flow, left_time_bytes); + + if (bpf_map_update_elem(&flow_states, &nt, flow, BPF_ANY)) + goto err; + + bpf_printk("XDP HPFQ scheduled with priority, root:%d left:%d", root_prio, left_prio); + return bpf_redirect_map(&left_pifo_map, left_prio, 0); + } else if (leaf_id == RIGHT_PIFO) { + right_prio = set_leaf_flow_priority(pctx, flow, right_time_bytes); + + if (bpf_map_update_elem(&flow_states, &nt, flow, BPF_ANY)) + goto err; + + bpf_printk("XDP HPFQ scheduled with priority, root:%d right:%d", root_prio, right_prio); + return bpf_redirect_map(&right_pifo_map, right_prio, 0); + } +err: + bpf_printk("XDP DROP"); + return XDP_DROP; +} + +/* Hierarchical Packet Fair Queueing (HPFQ) */ +SEC("xdp") +int enqueue_prog(struct xdp_md *xdp) +{ + struct parsing_context pctx = { + .data = (void *)(long)xdp->data, + .data_end = (void *)(long)xdp->data_end, + .pkt_len = (xdp->data_end - xdp->data) & 0xffff, + .nh = { .pos = (void *)(long)xdp->data }, + }; + return schedule_packet(&pctx); +} + + +SEC("dequeue") +void *dequeue_prog(struct dequeue_ctx *ctx) +{ + struct parsing_context pctx; + struct packet_info p_info = {0}; + + struct network_tuple nt; + __u32 leaf_id; + + struct flow_state *flow; + + __u64 root_prio = 0; + __u64 leaf_prio = 0; + + struct xdp_md *pkt = NULL; + + + if (bpf_map_pop_elem(&root_pifo_map, &leaf_id)) + goto err; + + if (leaf_id == LEFT_PIFO) + pkt = (void *) bpf_packet_dequeue(ctx, &left_pifo_map, 0, &leaf_prio); + else if (leaf_id == RIGHT_PIFO) + pkt = (void *) bpf_packet_dequeue(ctx, &right_pifo_map, 0, &leaf_prio); + + if (!pkt) + goto err; + + pctx.data = (void *)(long) pkt->data; + pctx.data_end = (void *)(long) pkt->data_end; + pctx.nh.pos = (void *)(long) pkt->data; + + /* Get flows */ + if (parse_packet(&pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + // Handle flow + flow = bpf_map_lookup_elem(&flow_states, &nt); + if (!flow) + goto err; + root_prio = flow->root_priority; + + flow->pkts--; + if (flow->pkts <= 0) { + if (!flow->persistent) { + bpf_map_delete_elem(&flow_states, &nt); + } else { + flow->root_finish_bytes = 0; + flow->finish_bytes = 0; + } + } + + // Handle virtual time in bytes + root_time_bytes = root_prio; + if (leaf_id == LEFT_PIFO) + left_time_bytes = leaf_prio; + else + right_time_bytes = leaf_prio; + + bpf_printk("Frey: left_time_bytes:%d right_time_bytes:%d", left_time_bytes, right_time_bytes); + bpf_printk("flow: %hd - root_weight:%d leaf_weight:%d", nt.daddr.port, flow->root_weight, flow->weight); + if (leaf_id == LEFT_PIFO) + bpf_printk("DEQUEUE HPFQ with priority, root:%d left:%d", root_prio, leaf_prio); + else + bpf_printk("DEQUEUE HPFQ with priority, root:%d right:%d", root_prio, leaf_prio); + + return pkt; +err: + if (pkt) + bpf_packet_drop(ctx, pkt); + bpf_printk("DEQUEUE packet failed"); + return NULL; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdq-tester/sched_sprio.bpf.c b/xdq-tester/sched_sprio.bpf.c new file mode 100644 index 00000000..db2cbdd9 --- /dev/null +++ b/xdq-tester/sched_sprio.bpf.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include + +#include "bpf_local_helpers.h" + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 1024); + __uint(map_extra, 4096); /* range */ +} pifo_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct network_tuple); + __type(value, struct flow_state); + __uint(max_entries, 16384); +} flow_states SEC(".maps"); + +__u32 default_weight = 256; + +static __always_inline int schedule_packet(struct parsing_context *pctx) +{ + struct packet_info p_info = {}; + struct network_tuple nt = {0}; + struct flow_state *flow; + struct flow_state new_flow = {0}; + __u32 prio = 0; + + new_flow.pkts = 0; + new_flow.finish_bytes = 0; + new_flow.weight = default_weight; + new_flow.persistent = 0; + + /* Get flow */ + if (parse_packet(pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + flow = bpf_map_lookup_or_try_init(&flow_states, &nt, &new_flow); + if (!flow) + goto err; + + flow->pkts++; + + /* Calculate scheduling priority */ + prio = flow->weight; + + if (bpf_map_update_elem(&flow_states, &nt, flow, BPF_ANY)) + goto err; + + bpf_printk("XDP SPRIO scheduled with priority %d", prio); + return bpf_redirect_map(&pifo_map, prio, 0); +err: + bpf_printk("XDP DROP"); + return XDP_DROP; +} + +/* Simple strict priority */ +SEC("xdp") +int enqueue_prog(struct xdp_md *xdp) +{ + struct parsing_context pctx = { + .data = (void *)(long)xdp->data, + .data_end = (void *)(long)xdp->data_end, + .pkt_len = (xdp->data_end - xdp->data) & 0xffff, + .nh = { .pos = (void *)(long)xdp->data }, + }; + return schedule_packet(&pctx); +} + + +SEC("dequeue") +void *dequeue_prog(struct dequeue_ctx *ctx) +{ + struct parsing_context pctx; + struct packet_info p_info = {0}; + struct network_tuple nt; + struct flow_state *flow; + __u64 prio = 0; + + struct xdp_md *pkt = NULL; + + pkt = (void *)bpf_packet_dequeue(ctx, &pifo_map, 0, &prio); + if (!pkt) + goto err; + + pctx.data = (void *)(long) pkt->data; + pctx.data_end = (void *)(long) pkt->data_end; + pctx.nh.pos = (void *)(long) pkt->data; + + /* Get flow */ + if (parse_packet(&pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + flow = bpf_map_lookup_elem(&flow_states, &nt); + if (!flow) + goto err; + + flow->pkts--; + if (flow->pkts <= 0) { + if (!flow->persistent) + bpf_map_delete_elem(&flow_states, &nt); + else + flow->finish_bytes = 0; + } + + bpf_printk("DEQUEUE SPRIO with priority %d", prio); + return pkt; +err: + if (pkt) + bpf_packet_drop(ctx, pkt); + bpf_printk("DEQUEUE packet failed"); + return NULL; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdq-tester/sched_test.bpf.c b/xdq-tester/sched_test.bpf.c new file mode 100644 index 00000000..04ff6fd1 --- /dev/null +++ b/xdq-tester/sched_test.bpf.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include +#include + +#include "bpf_local_helpers.h" + + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 4096); /* range */ +} pifo_map SEC(".maps"); + + +struct xdq_meta { + __u64 time_ns; +} __attribute__((aligned(4))) __attribute__((packed)); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u64); + __uint(max_entries, 1); +} xdq_time_ns SEC(".maps"); + +static __always_inline __u64 get_time_ns() +{ +#ifdef XDQ_LIVE + return bpf_ktime_get_boot_ns(); +#else + __u32 key = 0; + __u64 *val = bpf_map_lookup_elem(&xdq_time_ns, &key); + if (!val) { + return 0; + } + return *val; +#endif +} + + +static __always_inline int xdq_meta_add(struct xdp_md *ctx) +{ + struct xdq_meta *meta; + void *data; + void *data_end; + int err; + + /* Reserve space in-front of data pointer for our meta info. + * (Notice drivers not supporting data_meta will fail here!) + */ + err = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta)); + if (err) { + bpf_printk("Frey: Failed to add meta data section"); + return -1; + } + + data = (void *)(unsigned long)ctx->data; + data_end = (void *)(unsigned long)ctx->data_end; + meta = (void *)(unsigned long)ctx->data_meta; + if (meta + 1 > data) /* Verify meta area is accessible */ + return -2; + + meta->time_ns = get_time_ns(); + + bpf_printk("Frey 1: data: %p data_end: %p data_size: %d meta: %p meta+1: %p meta_size: %d", data, data_end, data_end - data, meta, (meta + 1), sizeof(struct xdq_meta)); + return 0; +} + + +static __always_inline int schedule_packet(struct parsing_context *pctx) +{ + struct packet_info p_info = {}; + + /* Get flow */ + if (parse_packet(pctx, &p_info) < 0) + goto err; + + + return bpf_redirect_map(&pifo_map, 0, 0); +err: + bpf_printk("XDP DROP"); + return XDP_DROP; +} + + +SEC("xdp") +int enqueue_prog(struct xdp_md *xdp) +{ + if (xdq_meta_add(xdp) < 0) { + return XDP_ABORTED; + } + + struct parsing_context pctx = { + .data = (void *)(long)xdp->data, + .data_end = (void *)(long)xdp->data_end, + .meta = (void *)(long)xdp->data_meta, + .pkt_len = (xdp->data_end - xdp->data) & 0xffff, + .nh = { .pos = (void *)(long)xdp->data }, + }; + return schedule_packet(&pctx); +} + + +SEC("dequeue") +void *dequeue_prog(struct dequeue_ctx *ctx) +{ + __u64 prio = 0; + struct xdp_md *pkt = (void *) bpf_packet_dequeue(ctx, &pifo_map, 0, &prio); + __u64 sojourn_time = 0; + void *data; + struct xdq_meta *meta; + + if (!pkt) + return 0; + + data = (void *)(unsigned long)pkt->data; + meta = (void *)(unsigned long)pkt->data_meta; + + if (meta + 1 > data) { /* Verify meta area is accessible */ + bpf_printk("Frey: Failed to lookup metadata"); + bpf_printk("Frey 2: data: %p data_end: %p data_size: %d meta: %p meta+1: %p meta_size: %d", pkt->data, pkt->data_end, pkt->data_end - pkt->data, meta, (meta + 1), sizeof(*meta)); + goto err; + } + sojourn_time = meta->time_ns; + bpf_printk("Frey: Sojourn_time: %llu", sojourn_time); + return pkt; + +err: + if (pkt) + bpf_packet_drop(ctx, pkt); + bpf_printk("DEQUEUE packet failed"); + return NULL; + +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdq-tester/sched_wfq.bpf.c b/xdq-tester/sched_wfq.bpf.c new file mode 100644 index 00000000..eba07d7e --- /dev/null +++ b/xdq-tester/sched_wfq.bpf.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include + +#include "bpf_local_helpers.h" + +struct { + __uint(type, BPF_MAP_TYPE_PIFO_XDP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 4096); + __uint(map_extra, 8388608); /* range - 1024×4098×2 */ +} pifo_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct network_tuple); + __type(value, struct flow_state); + __uint(max_entries, 16384); +} flow_states SEC(".maps"); + +__u64 time_bytes = 0; + +__u16 default_weight = 256; + +static __always_inline int schedule_packet(struct parsing_context *pctx) +{ + struct packet_info p_info = {}; + struct network_tuple nt = {0}; + struct flow_state *flow; + struct flow_state new_flow = {0}; + __u32 start_time_bytes; + __u32 prio = 0; + + new_flow.weight = default_weight; + + /* Get flow */ + if (parse_packet(pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + flow = bpf_map_lookup_or_try_init(&flow_states, &nt, &new_flow); + if (!flow) + goto err; + + flow->pkts++; + + /* Calculate scheduling priority */ + start_time_bytes = bpf_max(time_bytes, flow->finish_bytes); + flow->finish_bytes = start_time_bytes + (pctx->pkt_len * flow->weight >> 8); + prio = start_time_bytes; + + if (bpf_map_update_elem(&flow_states, &nt, flow, BPF_ANY)) + goto err; + + bpf_printk("Frey: port: %d weight: %d pkt: %d end: %d", (int) bpf_ntohs(nt.daddr.port), flow->weight, flow->pkts, flow->finish_bytes); + bpf_printk("XDP WFQ scheduled with priority, %d", prio); + return bpf_redirect_map(&pifo_map, prio, 0); +err: + bpf_printk("XDP DROP"); + return XDP_DROP; +} + +/* Weighted fair queueing (WFQ) */ +SEC("xdp") +int enqueue_prog(struct xdp_md *xdp) +{ + struct parsing_context pctx = { + .data = (void *)(long)xdp->data, + .data_end = (void *)(long)xdp->data_end, + .pkt_len = (xdp->data_end - xdp->data) & 0xffff, + .nh = { .pos = (void *)(long)xdp->data }, + }; + return schedule_packet(&pctx); +} + + +SEC("dequeue") +void *dequeue_prog(struct dequeue_ctx *ctx) +{ + struct parsing_context pctx; + struct packet_info p_info = {0}; + struct network_tuple nt; + struct flow_state *flow; + __u64 prio = 0; + + struct xdp_md *pkt = NULL; + + pkt = (void *) bpf_packet_dequeue(ctx, &pifo_map, 0, &prio); + if (!pkt) { + bpf_printk("Frey: No packet in PIFO"); + goto err; + } + + pctx.data = (void *)(long) pkt->data; + pctx.data_end = (void *)(long) pkt->data_end; + pctx.nh.pos = (void *)(long) pkt->data; + + /* Get flow */ + if (parse_packet(&pctx, &p_info) < 0) + goto err; + + nt = p_info.nt; + + flow = bpf_map_lookup_elem(&flow_states, &nt); + if (!flow) + goto err; + + flow->pkts--; + if (flow->pkts <= 0) { + if (!flow->persistent) + bpf_map_delete_elem(&flow_states, &nt); + else + flow->finish_bytes = 0; + } + + time_bytes = prio; + + bpf_printk("port: %d - weight: %d", bpf_ntohs(nt.daddr.port), flow->weight); + bpf_printk("DEQUEUE WFQ with priority %d", prio); + return pkt; +err: + if (pkt) + bpf_packet_drop(ctx, pkt); + bpf_printk("DEQUEUE packet failed"); + return NULL; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdq-tester/sprio.lua b/xdq-tester/sprio.lua new file mode 100644 index 00000000..682c4253 --- /dev/null +++ b/xdq-tester/sprio.lua @@ -0,0 +1,27 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +-- Strict Priority scheduler (SPRIO) +config.bpf.file = "./sched_sprio.bpf.o" + +-- Create flows +flow1 = Udp:new() +flow1.udp.dest = 8080 +set_flow_weight(flow1, 2) + +flow2 = Udp:new() +flow2.udp.dest = 8081 +set_flow_weight(flow2, 1) + +flow3 = Udp:new() +flow3.udp.dest = 8082 +set_flow_weight(flow3, 0) + +-- Test scheduler +enqueue(flow1, 1) +enqueue(flow2, 1) +enqueue(flow3, 1) + +dequeue_cmp(flow3, 1) +dequeue_cmp(flow2, 1) +dequeue_cmp(flow1, 1) diff --git a/xdq-tester/test.lua b/xdq-tester/test.lua new file mode 100644 index 00000000..8e63edbb --- /dev/null +++ b/xdq-tester/test.lua @@ -0,0 +1,30 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +-- Test metadata +config.bpf.file = "./sched_test.bpf.o" + +-- Setup flows +adjust_meta(8) +flow1 = Udp:new() +flow1.udp.dest = 8080 +flow1.udp.payload = create_payload(4) + +flow2 = Udp:new() +flow2.udp.dest = 8081 +flow2.udp.payload = create_payload(10) + +-- Test scheduler +function test() + enqueue(flow1, 1) + dequeue_cmp(flow1, 1) + + set_time_ns(100) + enqueue(flow2, 1) + packet = dequeue() + cmp(packet, flow2, 1) + print(dump(packet)) + cmp(packet, flow1, 1) +end + +test() diff --git a/xdq-tester/wfq.lua b/xdq-tester/wfq.lua new file mode 100644 index 00000000..f0d63fbf --- /dev/null +++ b/xdq-tester/wfq.lua @@ -0,0 +1,149 @@ +-- SPDX-License-Identifier: GPL-2.0 +-- Copyright (c) 2022 Freysteinn Alfredsson + +-- Weighted Fair Queueing (WFQ) +config.bpf.file = "./sched_wfq.bpf.o" + + +-- Setup flows +flow1 = Udp:new() +flow1.udp.dest = 8000 +flow1.udp.payload = create_payload(38) + +flow2 = Udp:new() +flow2.udp.dest = 8001 +flow2.udp.payload = create_payload(138) + +flow3 = Udp:new() +flow3.udp.dest = 8002 +flow3.udp.payload = create_payload(38) + +-- Test scheduler + +-- 1. Enqueue two packets using the same flow. +-- Tests that no flows remain after the PIFO is empty. +function wfq_test1() + enqueue(flow1, 1) + dequeue_cmp(flow1, 1) +end + + +-- 2. Enqueue two flows +function wfq_test2() + enqueue(flow1, 1) + enqueue(flow1, 2) + enqueue(flow3, 1) + + dequeue_cmp(flow1, 1) + dequeue_cmp(flow3, 1) + dequeue_cmp(flow1, 2) +end + + +-- 3. Enqueue three flows where one flow has a larger packet size. +function wfq_test3() + -- priority: flow(number, flow_end_byte), flow(...), ... + enqueue(flow1, 1) + -- 0: *f1(1, 100) + + enqueue(flow2, 1) + -- 0: *f2(1, 200), f1(1, 100) + + enqueue(flow1, 2) + -- 0: f2(1, 200), f1(1, 100) + -- 100: *f1(2, 100) + + enqueue(flow2, 2) + -- 0: f2(1, 200), f1(1, 100) + -- 100: f1(2, 100) + -- 200: *f2(2, 400) + + dequeue_cmp(flow1, 1) + -- 0: f2(1, 200) ---> *f1(1, 100) + -- 100: f1(2, 100) + -- 200: f2(2, 400) + + enqueue(flow1, 3) + -- 0: f2(1, 200) + -- 100: f1(2, 100) + -- 200: *f1(3, 300), f2(2, 400) + + dequeue_cmp(flow2, 1) + -- 0: ---> *f2(1, 200) + -- 100: f1(2, 100) + -- 200: f1(3, 300), f2(2, 400) + + dequeue_cmp(flow1, 2) + -- 100: ---> *f1(2, 100) + -- 200: f1(3, 300), f2(2, 400) + + enqueue(flow3, 1) + -- 100: *f3(1, 200) + -- 200: f1(3, 300), f2(2, 400) + + enqueue(flow3, 2) + -- 100: f3(1, 200) + -- 200: *f3(2, 300), f1(3, 300), f2(2, 400) + + dequeue_cmp(flow3, 1) + -- 100: ---> *f3(1, 200) + -- 200: f3(2, 300), f1(3, 300), f2(2, 400) + + dequeue_cmp(flow2, 2) + -- 200: f3(2, 300), f1(3, 300) ---> *f2(2, 400) + + dequeue_cmp(flow1, 3) + -- 200: f3(2, 300) ---> *f1(3, 300) + + dequeue_cmp(flow3, 2) + -- 200: ---> *f3(2, 300) +end + + +-- 4. Enqueue multiple packets +function wfq_test4() + for i = 0, 4095, 1 + do + enqueue(flow1, i + 1) + end + for i = 0, 4095, 1 + do + dequeue_cmp(flow1, i + 1) + end +end + + +-- 5. Enqueue packets with weights +function wfq_test5() + set_flow_weight(flow1, 1024) + enqueue(flow1, 1) + enqueue(flow1, 2) + enqueue(flow2, 1) + enqueue(flow2, 2) + + dequeue_cmp(flow1, 1) + dequeue_cmp(flow2, 1) + dequeue_cmp(flow2, 2) + dequeue_cmp(flow1, 2) + + set_flow_weight(flow1, 256) + set_flow_weight(flow2, 32) + + enqueue(flow1, 3) + enqueue(flow1, 4) + enqueue(flow2, 3) + enqueue(flow2, 4) + + dequeue_cmp(flow1, 3) + dequeue_cmp(flow2, 3) + dequeue_cmp(flow2, 4) + dequeue_cmp(flow1, 4) +end + + +-- Run tests +wfq_test1() +wfq_test2() +wfq_test3() +wfq_test4() +wfq_test5() diff --git a/xdq-tester/xdq-tester.c b/xdq-tester/xdq-tester.c new file mode 100644 index 00000000..9a0bdb5b --- /dev/null +++ b/xdq-tester/xdq-tester.c @@ -0,0 +1,1072 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include "logging.h" + +#include "xdq-tester.h" + +#include "bpf_shared_data.h" + +static const struct option long_options[] = { + {"verbose", no_argument, NULL, 'v' }, + {"help", no_argument, NULL, 'h' }, + {} +}; + +static void mac_to_string(char *dst, unsigned char *mac); +static __be32 calc_ipv6_chksum_part(const struct ipv6hdr *iph); +static __be16 calc_udp_cksum(const struct udphdr *udp, __be32 chksum_part); +static struct ethhdr *lua_to_eth_header(lua_State *L, struct packet *pkt); +static struct ipv6hdr *lua_to_ipv6_header(lua_State *L, struct packet *pkt); +static struct udphdr *lua_to_udp_header(lua_State *L, struct packet *pkt, __be64 checksum_part); +static struct packet *lua_parse_packet(lua_State *L); +static void set_bpf_fd(lua_State *L, struct bpf_object *obj, const char *func_name, int *prog_fd); +static struct ethhdr *parse_eth(lua_State *L, struct packet *pkt); +static struct ipv6hdr *parse_ipv6(lua_State *L, struct packet *pkt); +static struct udphdr *parse_udp(lua_State *L, struct packet *pkt); +static int bpf_xdp(lua_State *L, struct packet *pkt); +static int bpf_dequeue(lua_State *L, struct packet *pkt); +static struct ethhdr *parse_eth_to_lua(lua_State *L, struct packet *pkt); +static struct ipv6hdr *parse_ipv6_to_lua(lua_State *L, struct packet *pkt); +static struct udphdr *parse_udp_to_lua(lua_State *L, struct packet *pkt); +static void parse_packet_to_lua(lua_State *L, struct packet *pkt); +static void initLuaFunctions(lua_State *L, char *prog_name); +static void usage(char *prog_name); + + +static void mac_to_string(char *dst, unsigned char *mac) +{ + snprintf(dst, 18, "%02x:%02x:%02x:%02x:%02x:%02x", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); +} + +static __be32 calc_ipv6_chksum_part(const struct ipv6hdr *iph) +{ + __u32 chksum = iph->nexthdr + ntohs(iph->payload_len); + int i; + + for (i = 0; i < 8; i++) { + chksum += ntohs(iph->saddr.s6_addr16[i]); + chksum += ntohs(iph->daddr.s6_addr16[i]); + } + return chksum; +} + +static __be16 calc_udp_cksum(const struct udphdr *udp, __be32 chksum_part) +{ + __u32 chksum = chksum_part; + chksum += ntohs(udp->source); + chksum += ntohs(udp->dest); + chksum += ntohs(udp->len); + + while (chksum >> 16) + chksum = (chksum & 0xffff) + (chksum >> 16); + return htons(~chksum); +} + +struct xdq_state *get_xdq_state(lua_State *L) +{ + struct xdq_state *state; + + lua_getglobal(L, "_xdq"); + if (!lua_isuserdata(L, -1)) + die(L, ""); + state = lua_touserdata(L, -1); + lua_remove(L, -1); + + return state; +} + +void die(lua_State *L, const char *format, ...) +{ + struct xdq_state *state; + lua_Debug ar; + va_list args; + int level = 0; + + lua_getglobal(L, "_xdq"); + if (!lua_isuserdata(L, -1)) { + fprintf(stderr, "Missing internal XDQ state within the Lua environment\n"); + exit(EXIT_FAILURE); + } + state = lua_touserdata(L, -1); + + fprintf(stderr, "Stacktrace:\n"); + while (lua_getstack(L, level, &ar)) { + lua_getinfo(L, "nSl", &ar); + if (!strcmp(ar.short_src, "[C]")) + fprintf(stderr, " [%d] %s: %s [%s]\n", level, state->prog_name, + ar.name, ar.what); + else + fprintf(stderr, " [%d] %s:%d:%s [%s]\n", + level, ar.short_src, ar.currentline, + (ar.name ? ar.name : ""), ar.what); + ++level; + } + fprintf(stderr, " Error: "); + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + fprintf(stderr, "\n"); + free(state->xdq_script); + exit(EXIT_FAILURE); +} + +struct packet *packet_alloc(lua_State *L, struct packet *pkt, size_t size) +{ + const int ALLOC_SIZE = 4096; + size_t old_length; + + if (pkt == NULL) + pkt = calloc(1, sizeof(struct packet)); + if (pkt->data == NULL) { + pkt->data = malloc(ALLOC_SIZE); + if (!pkt->data) + die(L, "Failed to allocate memory for packet: %s", strerror(errno)); + pkt->data_end = pkt->data; + pkt->cur = pkt->data; + pkt->length = 0; + } + old_length = pkt->length; + pkt->length += size; + pkt->data_end += size; + if (pkt->length > ALLOC_SIZE) + die(L, "Packet larger than %d octets\n", ALLOC_SIZE); + memset(pkt->data + old_length, '\0', size); + return pkt; +} + +void packet_free(struct packet *pkt) +{ + free(pkt->data); + free(pkt); +} + +static struct ethhdr *lua_to_eth_header(lua_State *L, struct packet *pkt) +{ + struct ethhdr *eth; + const char *mac_src_str; + const char *mac_dst_str; + int proto; + packet_alloc(L, pkt, sizeof(struct ethhdr)); + eth = (struct ethhdr *) pkt->cur; + + if (!lua_istable(L, -1)) + die(L, "Missing eth header\n"); + lua_getfield(L, -1, "eth"); + + lua_getfield(L, -1, "source"); + if (!lua_isstring(L, -1)) + die(L, "Source MAC address is not a string\n"); + mac_src_str = lua_tostring(L, -1); + if (!ether_aton_r(mac_src_str, (struct ether_addr *) ð->h_source)) + die(L, "Not a valid source MAC address: '%s'\n", mac_src_str); + lua_remove(L, -1); + + lua_getfield(L, -1, "dest"); + if (!lua_isstring(L, -1)) + die(L, "Destination MAC address is not a string\n"); + mac_dst_str = lua_tostring(L, -1); + if (!ether_aton_r(mac_dst_str, (struct ether_addr *) ð->h_dest)) + die(L, "Not a valid destination MAC address: '%s'\n", mac_dst_str); + lua_remove(L, -1); + + lua_getfield(L, -1, "proto"); + if (!lua_isinteger(L, -1)) + die(L, "Ethernet protocol field must be an integer\n"); + proto = lua_tointeger(L, -1); + if (proto < 0 || proto > 0xffff) + die(L, "Ethernet protocol field must be an integer between 0x0 and 0xffff, but was 0x%x\n", + proto); + eth->h_proto = htons((short) proto); + lua_remove(L, -1); + + lua_remove(L, -1); // Remove eth table from the stack + return eth; +} + +static struct ipv6hdr *lua_to_ipv6_header(lua_State *L, struct packet *pkt) +{ + struct ipv6hdr *iph; + int priority; + int version; + int flow_lbl_int; + int payload_len; + int nexthdr; + int hop_limit; + const char *src_ip; + const char *dst_ip; + + packet_alloc(L, pkt, sizeof(struct ipv6hdr)); + + iph = (struct ipv6hdr *) pkt->cur; + + lua_getfield(L, -1, "ip"); + if (!lua_istable(L, -1)) + die(L, "Missing eth header\n"); + + lua_getfield(L, -1, "priority"); + if (!lua_isinteger(L, -1)) + die(L, "IPv6 prirotiy field must be an integer\n"); + priority = lua_tointeger(L, -1); + if (priority < 0 || priority > 15) + die(L, "IPv6 prirotiy must be an integer between 0 and 15, but was %d\n", priority); + iph->priority = priority; + lua_remove(L, -1); + + lua_getfield(L, -1, "version"); + if (!lua_isinteger(L, -1)) + die(L, "IPv6 version field must be an integer\n"); + version = lua_tointeger(L, -1); + if (version < 0 || version > 15) + die(L, "IPv6 version must be an integer between 0 and 15, but was %d\n", version); + iph->version = version; + lua_remove(L, -1); + + lua_getfield(L, -1, "flow_lbl"); + if (!lua_istable(L, -1)) + die(L, "IPv6 flow_lbl must be a table\n"); + for (int i = 0; i < 3; i++) { + lua_rawgeti(L, -1, i + 1); + if (!lua_isinteger(L, -1)) + die(L, "IPv6 flow_lbl[%d] field must be an integer\n", i); + flow_lbl_int = lua_tointeger(L, -1); + if (flow_lbl_int < 0 || flow_lbl_int > 0xff) + die(L, "IPv6 flow_lbl[%d] field must be between 0x0 and 0xff but was 0x%x\n", + i, flow_lbl_int); + iph->flow_lbl[i] = flow_lbl_int; + lua_remove(L, -1); + } + lua_remove(L, -1); + + if (lua_getfield(L, -1, "payload_len") != LUA_TNIL) { + if (!lua_isinteger(L, -1)) + die(L, "IPv6 payload_len field must be an integer\n"); + payload_len = lua_tointeger(L, -1); + if (payload_len < 0 || payload_len > 0xffff) + die(L, "IPv6 payload_len field must be an integer between 0x0 and 0xffff, but was 0x%x\n", + payload_len); + iph->payload_len = htons((short) payload_len); + } + lua_remove(L, -1); + + lua_getfield(L, -1, "nexthdr"); + if (!lua_isinteger(L, -1)) + die(L, "IPv6 nexthdr field must be an integer\n"); + nexthdr = lua_tointeger(L, -1); + if (nexthdr < 0x0 || nexthdr > 0xff) + die(L, "IPv6 nexthdr must be an integer between 0x0 and 0xff, but was 0x%x\n", + nexthdr); + iph->nexthdr = nexthdr; + lua_remove(L, -1); + + lua_getfield(L, -1, "hop_limit"); + if (!lua_isinteger(L, -1)) + die(L, "IPv6 hop_limit field must be an integer\n"); + hop_limit = lua_tointeger(L, -1); + if (hop_limit < 0x0 || hop_limit > 0xff) + die(L, "IPv6 hop_limit must be an integer between 0x0 and 0xff, but was 0x%x\n", + hop_limit); + iph->hop_limit = hop_limit; + lua_remove(L, -1); + + lua_getfield(L, -1, "saddr"); + if (!lua_isstring(L, -1)) + die(L, "Source IPv6 address is not a string\n"); + src_ip = lua_tostring(L, -1); + if (!inet_pton(AF_INET6, src_ip, &iph->saddr)) + die(L, "Failed to set source IPv6 address to %s", src_ip); + lua_remove(L, -1); + + lua_getfield(L, -1, "daddr"); + if (!lua_isstring(L, -1)) + die(L, "Destination IPv6 address is not a string\n"); + dst_ip = lua_tostring(L, -1); + if (!inet_pton(AF_INET6, dst_ip, &iph->daddr)) + die(L, "Failed to set destination IPv6 address to %s", dst_ip); + lua_remove(L, -1); + + lua_remove(L, -1); // Remove ip table from the stack + return iph; +} + +static struct udphdr *lua_to_udp_header(lua_State *L, struct packet *pkt, __be64 checksum_part) +{ + struct udphdr *udp; + int src_port; + int dst_port; + int len; + int check; + const char *payload; + + packet_alloc(L, pkt, sizeof(struct udphdr)); + udp = (struct udphdr *) pkt->cur; + + lua_getfield(L, -1, "udp"); + if (!lua_istable(L, -1)) + die(L, "Missing udp header\n"); + + lua_getfield(L, -1, "source"); + if (!lua_isinteger(L, -1)) + die(L, "UDP source port must be an integer\n"); + src_port = lua_tointeger(L, -1); + if (src_port < 0 || src_port > 65535) + die(L, "UDP source port must be an integer between 0 and 65535, but was %d\n", + src_port); + udp->source = htons((short) src_port); + lua_remove(L, -1); + + lua_getfield(L, -1, "dest"); + if (!lua_isinteger(L, -1)) + die(L, "UDP destination port must be an integer\n"); + dst_port = lua_tointeger(L, -1); + if (dst_port < 0 || dst_port > 0xffff) + die(L, "UDP destination port must be an integer between 0 and 65535, but was %d\n", + dst_port); + udp->dest = htons((short) dst_port); + lua_remove(L, -1); + + if (lua_getfield(L, -1, "payload") != LUA_TNIL) { + if (!lua_isstring(L, -1)) + die(L, "UDP payload field must be a string\n"); + len = lua_rawlen(L, -1); + payload = lua_tostring(L, -1); + packet_alloc(L, pkt, len); + memcpy(pkt->cur + sizeof(struct udphdr), payload, len); + udp->len = htons(sizeof(struct udphdr) + len); + } + lua_remove(L, -1); + + if (lua_getfield(L, -1, "len") != LUA_TNIL) { + if (!lua_isinteger(L, -1)) + die(L, "UDP len field must be an integer\n"); + len = lua_tointeger(L, -1); + if (len < 0 || len > 0xffff) + die(L, "UDP len field must be an integer between 0 and 65535, but was %d\n", + len); + udp->len = htons((short) len); + } + lua_remove(L, -1); + + if (lua_getfield(L, -1, "check") != LUA_TNIL) { + if (!lua_isinteger(L, -1)) + die(L, "UDP check field must be an integer\n"); + check = lua_tointeger(L, -1); + if (check < 0 || check > 0xffff) + die(L, "UDP check field must be an integer between 0 and 65535, but was %d\n", + check); + udp->check = htons((short) check); + } + lua_remove(L, -1); + + if (checksum_part != -1) { + udp->check = calc_udp_cksum(udp, checksum_part); + } + + lua_remove(L, -1); // Remove udp table from the stack + return udp; +} + +static struct packet *lua_parse_packet(lua_State *L) +{ + struct packet *pkt = packet_alloc(L, NULL, 0); + struct ethhdr *eth = NULL; + struct ipv6hdr *iph = NULL; + struct udphdr *udp = NULL; + int proto = -1; + __be64 checksum_part = -1; + + eth = lua_to_eth_header(L, pkt); + pkt->cur += sizeof(struct ethhdr); + if (eth->h_proto == ntohs(ETH_P_IPV6)) { + iph = lua_to_ipv6_header(L, pkt); + proto = iph->nexthdr; + pkt->cur += sizeof(struct ipv6hdr); + checksum_part = calc_ipv6_chksum_part(iph); + } + if (proto == IPPROTO_UDP) { + udp = lua_to_udp_header(L, pkt, checksum_part); + + if (iph && iph->payload_len == 0) + iph->payload_len = udp->len; + } + pkt->cur = pkt->data; // Reset cur pointer for comparison + return pkt; +} + +static void set_bpf_fd(lua_State *L, struct bpf_object *obj, const char *func_name, int *prog_fd) +{ + struct bpf_program *prog = bpf_object__find_program_by_name(obj, func_name); + *prog_fd = bpf_program__fd(prog); + if (*prog_fd < 0 ) { + bpf_object__close(obj); + die(L, "Failed to run bpf_program__fd: %s", strerror(errno)); + } +} + +int load_xdq_file(lua_State *L) +{ + struct xdq_state *state; + const char *filename; + struct bpf_object *xdq_bpf_obj; + const char *xdp_func; + const char *dequeue_func; + struct bpf_program *prog; + int err = 0; + + if (lua_gettop(L) != 1) + die(L, "Incorrect number of arguments"); + if (!lua_isstring(L, 1)) + die(L, "Argument must be a string"); + filename = lua_tostring(L, 1); + + state = get_xdq_state(L); + + if (state->xdq_script) + free(state->xdq_script); + state->xdq_script = strdup(filename); + + lua_getglobal(L, "config"); + if (!lua_istable(L, -1)) + die(L, "Missing config table\n"); + + lua_getfield(L, -1, "bpf"); + if (!lua_istable(L, -1)) + die(L, "Missing config.bpf table\n"); + + lua_getfield(L, -1, "xdp_func"); + if (!lua_isstring(L, -1)) + die(L, "Missing config.bpf.xdq_func\n"); + xdp_func = lua_tostring(L, -1); + if (strlen(xdp_func) == 0) + die(L, "config.bpf.xdp_func can't be an empty string"); + lua_remove(L, -1); + + lua_getfield(L, -1, "dequeue_func"); + if (!lua_isstring(L, -1)) + die(L, "Missing config.bpf.dequeue_func\n"); + dequeue_func = lua_tostring(L, -1); + if (strlen(xdp_func) == 0) + die(L, "config.bpf.dequeue_func can't be an empty string"); + lua_remove(L, -1); + + lua_remove(L, -1); // Remove bpf table from the stack + lua_remove(L, -1); // Remove config table from the stack + + xdq_bpf_obj = bpf_object__open_file(state->xdq_script, NULL); + err = libbpf_get_error(xdq_bpf_obj); + if (err) + die(L, "Failed to run bpf_object__open: %s", strerror(errno)); + state->xdq_bpf_obj = xdq_bpf_obj; + + prog = bpf_object__find_program_by_name(xdq_bpf_obj, dequeue_func); + if (!prog) { + bpf_object__close(xdq_bpf_obj); + die(L, "Failed to run bpf_object_find_program_by_name: %s", strerror(errno)); + } + + bpf_program__set_type(prog, BPF_PROG_TYPE_DEQUEUE); + err = bpf_object__load(xdq_bpf_obj); + if (err) { + bpf_object__close(xdq_bpf_obj); + die(L, "Failed to run bpf_object__load: %s", strerror(errno)); + } + + set_bpf_fd(L, xdq_bpf_obj, xdp_func, &state->xdp_prog_fd); + set_bpf_fd(L, xdq_bpf_obj, dequeue_func, &state->dequeue_prog_fd); + return 0; +} + + +static int bpf_xdp(lua_State *L, struct packet *pkt) +{ + struct xdq_state *state; + int total_queued_packets; + int currently_queued_packets; + int err; + struct xdp_md ctx_in = { + .data_end = pkt->length, + }; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = pkt->data, + .data_size_in = pkt->length, + .ctx_in = &ctx_in, + .ctx_size_in = sizeof(ctx_in), + .repeat = 1, + .flags = BPF_F_TEST_XDP_DO_REDIRECT, + ); + ctx_in.data_end = ctx_in.data + pkt->length; + + state = get_xdq_state(L); + if (state->xdp_prog_fd <= 0) + die(L, "No XDP hook attached"); + err = bpf_prog_test_run_opts(state->xdp_prog_fd, &opts); + if (err) + die(L, "Failed to run XDP hook: %s", strerror(errno)); + + lua_getglobal(L, "xdq"); + if (!lua_istable(L, -1)) + die(L, "Missing xdq table\n"); + + lua_getfield(L, -1, "total_queued"); + if (!lua_isinteger(L, -1)) + die(L, "xdq.total_queued is not an integer\n"); + total_queued_packets = lua_tointeger(L, -1); + lua_remove(L, -1); + total_queued_packets++; + lua_pushinteger(L, total_queued_packets); + lua_setfield(L, -2, "total_queued"); + + lua_getfield(L, -1, "currently_queued"); + if (!lua_isinteger(L, -1)) + die(L, "xdq.currently_queued is not an integer\n"); + currently_queued_packets = lua_tointeger(L, -1); + lua_remove(L, -1); + currently_queued_packets++; + lua_pushinteger(L, currently_queued_packets); + lua_setfield(L, -2, "currently_queued"); + + lua_remove(L, -1); // Remove xdq table from the stack + + return opts.retval; +} + +static struct ethhdr *parse_eth(lua_State *L, struct packet *pkt) +{ + struct ethhdr *eth = (struct ethhdr *) pkt->cur; + + if (pkt->cur + sizeof(struct ethhdr) > pkt->data_end) + die(L, "Missing expected eth header"); + pkt->cur += sizeof(struct ethhdr); + return eth; +} + +static struct ipv6hdr *parse_ipv6(lua_State *L, struct packet *pkt) +{ + struct ipv6hdr *iph = (struct ipv6hdr *) pkt->cur; + + if (pkt->cur + sizeof(struct ipv6hdr) > pkt->data_end) + die(L, "Missing expected IPv6 header"); + pkt->cur += sizeof(struct ipv6hdr); + return iph; +} + +static struct udphdr *parse_udp(lua_State *L, struct packet *pkt) +{ + struct udphdr *udp = (struct udphdr *) pkt->cur; + + if (pkt->cur + sizeof(struct udphdr) > pkt->data_end) + die(L, "Missing expected UDP header"); + pkt->cur += sizeof(struct udphdr); + return udp; +} + +int enqueue(lua_State *L) +{ + struct packet *pkt; + int retval; + + if (lua_gettop(L) != 1) + die(L, "Incorrect number of arguments"); + if (!lua_istable(L, 1)) + die(L, "Argument must be a table"); + + pkt = lua_parse_packet(L); + + retval = bpf_xdp(L, pkt); + + packet_free(pkt); + + lua_pushinteger(L, retval); + return 1; +} + +static struct ethhdr *parse_eth_to_lua(lua_State *L, struct packet *pkt) +{ + struct ethhdr *eth = parse_eth(L, pkt); + char src_mac[18]; + char dst_mac[18]; + + // Ethernet header + lua_createtable(L, -1, 0); + + lua_pushinteger(L, ntohs(eth->h_proto)); + lua_setfield(L, -2, "proto"); + + + mac_to_string(src_mac, eth->h_source); + lua_pushstring(L, src_mac); + lua_setfield(L, -2, "source"); + + mac_to_string(dst_mac, eth->h_dest); + lua_pushstring(L, dst_mac); + lua_setfield(L, -2, "dest"); + + lua_setfield(L, -2, "eth"); + return eth; +} + +static struct ipv6hdr *parse_ipv6_to_lua(lua_State *L, struct packet *pkt) +{ + struct ipv6hdr *iph = parse_ipv6(L, pkt); + char src_ip[INET6_ADDRSTRLEN + 1]; + char dst_ip[INET6_ADDRSTRLEN + 1]; + + // IPv6 header + lua_createtable(L, -1, 0); + + lua_pushinteger(L, (unsigned int) iph->priority); + lua_setfield(L, -2, "priority"); + + lua_pushinteger(L, (unsigned int) iph->version); + lua_setfield(L, -2, "version"); + + lua_createtable(L, -1, 0); + for (int i = 0; i < 3; i++) { + lua_pushinteger(L, (unsigned int) iph->flow_lbl[i]); + lua_rawseti(L, -2, i + 1); + } + lua_setfield(L, -2, "flow_lbl"); + + lua_pushinteger(L, (unsigned int) ntohs(iph->payload_len)); + lua_setfield(L, -2, "payload_len"); + + lua_pushinteger(L, (unsigned int) iph->nexthdr); + lua_setfield(L, -2, "nexthdr"); + + lua_pushinteger(L, (unsigned int) iph->hop_limit); + lua_setfield(L, -2, "hop_limit"); + + lua_pushstring(L, inet_ntop(AF_INET6, &iph->saddr, (char *) &src_ip, sizeof(src_ip))); + lua_setfield(L, -2, "saddr"); + + lua_pushstring(L, inet_ntop(AF_INET6, &iph->daddr, (char *) &dst_ip, sizeof(dst_ip))); + lua_setfield(L, -2, "daddr"); + + lua_setfield(L, -2, "ip"); + return iph; +} + +static struct udphdr *parse_udp_to_lua(lua_State *L, struct packet *pkt) +{ + struct udphdr *udp = parse_udp(L, pkt); + + // UDP header + lua_createtable(L, -1, 0); + + lua_pushinteger(L, (unsigned int) ntohs(udp->source)); + lua_setfield(L, -2, "source"); + + lua_pushinteger(L, (unsigned int) ntohs(udp->dest)); + lua_setfield(L, -2, "dest"); + + lua_pushinteger(L, (unsigned int) ntohs(udp->len)); + lua_setfield(L, -2, "len"); + + lua_pushinteger(L, (unsigned int) ntohs(udp->check)); + lua_setfield(L, -2, "check"); + + if (udp->len - sizeof(struct udphdr) > 0) { + lua_pushlstring(L, pkt->cur, ntohs(udp->len) - sizeof(struct udphdr)); + lua_setfield(L, -2, "payload"); + } + + lua_setfield(L, -2, "udp"); + + return udp; +} + +static void parse_packet_to_lua(lua_State *L, struct packet *pkt) +{ + struct xdq_state *state = get_xdq_state(L); + struct ethhdr *eth = NULL; + struct ipv6hdr *iph = NULL; + int proto = -1; + + // Handle metadata + if (pkt->cur + state->metadata_size > pkt->data_end) + die(L, "Metadata section larger than packet!"); + pkt->cur += state->metadata_size; + + // Packet table + lua_createtable(L, -1, 0); + + eth = parse_eth_to_lua(L, pkt); + + if (eth->h_proto == ntohs(ETH_P_IPV6)) { + iph = parse_ipv6_to_lua(L, pkt); + proto = iph->nexthdr; + } + if (proto == IPPROTO_UDP) + parse_udp_to_lua(L, pkt); +} + +static int bpf_dequeue(lua_State *L, struct packet *pkt) +{ + + struct xdq_state *state = get_xdq_state(L); + int err; + int total_dequeued_packets; + int currently_queued_packets; + if (state->dequeue_prog_fd <= 0) + die(L, "No DEQUEUE hook attached"); + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_out = pkt->data, + .data_size_out = pkt->length, + .repeat = 1, + ); + + err = bpf_prog_test_run_opts(state->dequeue_prog_fd, &opts); + if (err) + die(L, "Failed to run DEQUEUE hook: %s", strerror(errno)); + + lua_getglobal(L, "xdq"); + if (!lua_istable(L, -1)) + die(L, "Missing xdq table\n"); + + lua_getfield(L, -1, "total_dequeued"); + if (!lua_isinteger(L, -1)) + die(L, "xdq.total_dequeued is not an integer\n"); + total_dequeued_packets = lua_tointeger(L, -1); + lua_remove(L, -1); + total_dequeued_packets++; + lua_pushinteger(L, total_dequeued_packets); + lua_setfield(L, -2, "total_dequeued"); + + lua_getfield(L, -1, "currently_queued"); + if (!lua_isinteger(L, -1)) + die(L, "xdq.currently_queued is not an integer\n"); + currently_queued_packets = lua_tointeger(L, -1); + lua_remove(L, -1); + currently_queued_packets--; + lua_pushinteger(L, currently_queued_packets); + lua_setfield(L, -2, "currently_queued"); + + lua_remove(L, -1); // Remove xdq table from the stack + + return opts.retval; +} + +int dequeue(lua_State *L) +{ + struct packet *pkt; + int retval; + + if (lua_gettop(L) != 0) + die(L, "Function takes no arguments"); + + pkt = packet_alloc(L, NULL, 4096); + retval = bpf_dequeue(L, pkt); + + parse_packet_to_lua(L, pkt); + packet_free(pkt); + + lua_pushinteger(L, retval); + // Return packet and DEQEUEUE hook return value + return 2; +} + +int normalize_ipv6_address(lua_State *L) +{ + char ip_str[INET6_ADDRSTRLEN + 1]; + struct in6_addr ip; + const char *ip_param; + + if (lua_gettop(L) != 1) + die(L, "Incorrect number of arguments"); + if (!lua_isstring(L, 1)) + die(L, "Argument must be a string"); + ip_param = lua_tostring(L, 1); + + if (!inet_pton(AF_INET6, ip_param, &ip)) + die(L, "Failed to parse IPv6 address %s", ip_param); + + lua_pushstring(L, inet_ntop(AF_INET6, &ip, ip_str, sizeof(ip_str))); + return 1; +} + +int adjust_meta(lua_State *L) +{ + struct xdq_state *state = get_xdq_state(L); + int metadata_size = 0; + + if (lua_gettop(L) != 1) + die(L, "Incorrect number of arguments"); + if (!lua_isstring(L, 1)) + die(L, "Argument must be an integer"); + metadata_size += lua_tointeger(L, 1); + + if (metadata_size < 0) + die(L, "Metadata can not be less than zero"); + + state->metadata_size = metadata_size; + return 0; +} + +int fail_xdq(lua_State *L) +{ + const char *message = ""; + if (lua_gettop(L) == 1 && lua_isstring(L, -1)) + message = lua_tostring(L, -1); + die(L, message); + return -1; +} + +/* Scheduler specific helpers + * + * The following functions are here to aid scheduling algorithms to function, such + * as providing our Lua implementation with five-tuple flow handling. Ideally, this + * functionality these functions would be generic and use BTF. + * For now, we have tailored these functions for specific scheduling algorithms. + */ + +int show_flow_map(lua_State *L) +{ + system("bpftool map dump name flow_states"); + return 0; +} + +int set_flow_weight(lua_State *L) +{ + struct xdq_state *state = get_xdq_state(L); + struct network_tuple nt = {0}; + struct flow_state flow = {0}; + int flow_states_fd; + struct packet *pkt; + struct ipv6hdr *iph; + struct udphdr *udp; + int weight; + + if (lua_gettop(L) != 2) + die(L, "Incorrect number of arguments"); + if (!lua_istable(L, 1)) + die(L, "First argument must be a table"); + + if (!lua_isinteger(L, -1)) + die(L, "weight parameter isn't a number\n"); + weight = lua_tointeger(L, 2); + lua_pop(L, 1); + + pkt = lua_parse_packet(L); + parse_eth(L, pkt); + iph = parse_ipv6(L, pkt); + udp = parse_udp(L, pkt); + + nt.proto = iph->nexthdr; + nt.ipv = iph->version; + nt.saddr.ip = iph->saddr; + nt.daddr.ip = iph->daddr; + nt.daddr.port = udp->dest; + nt.saddr.port = udp->source; + + + flow.pkts = 0; + flow.finish_bytes = 0; + flow.weight = weight; + flow.persistent = 1; + + flow_states_fd = bpf_object__find_map_fd_by_name(state->xdq_bpf_obj, "flow_states"); + + if (bpf_map_update_elem(flow_states_fd, &nt, &flow, BPF_ANY)) + die(L, "Failed to update map"); + + return 0; +} + +int set_time_ns(lua_State *L) +{ + struct xdq_state *state = get_xdq_state(L); + int time_ns_fd; + __u32 key = 0; + __u64 time_ns; + + if (lua_gettop(L) != 1) + die(L, "Incorrect number of arguments"); + if (!lua_isinteger(L, -1)) + die(L, "Argument must be an integer"); + time_ns = lua_tointeger(L, 1); + lua_pop(L, 1); + + time_ns_fd = bpf_object__find_map_fd_by_name(state->xdq_bpf_obj, "xdq_time_ns"); + + if (bpf_map_update_elem(time_ns_fd, &key, &time_ns, BPF_ANY)) + die(L, "Failed to update map"); + + return 0; +} + +int get_time_ns(lua_State *L) +{ + struct xdq_state *state = get_xdq_state(L); + int time_ns_fd; + __u32 key = 0; + __u64 time_ns; + + if (lua_gettop(L) != 0) + die(L, "Incorrect number of arguments"); + + time_ns_fd = bpf_object__find_map_fd_by_name(state->xdq_bpf_obj, "xdq_time_ns"); + + if (bpf_map_lookup_elem(time_ns_fd, &key, &time_ns)) + die(L, "Failed to lookup map"); + + lua_pushnumber(L, time_ns); + + return 1; +} +/* End of Scheduler specific helpers */ + + +int get_clock(lua_State *L) +{ + struct timespec t; + clock_gettime(CLOCK_MONOTONIC, &t); + lua_pushnumber(L, (int64_t)t.tv_sec * (int64_t)1000000000UL + (int64_t)t.tv_nsec); + return 1; +} + + +static void initLuaFunctions(lua_State *L, char *prog_name) +{ + struct xdq_state *state = lua_newuserdatauv(L, sizeof(struct xdq_state), 0); + state->prog_name = prog_name; + state->xdq_script = NULL; + state->xdp_prog_fd = -1; + state->dequeue_prog_fd = -1; + lua_setglobal(L, "_xdq"); + + lua_pushcfunction(L, enqueue); + lua_setglobal(L, "xdq_enqueue"); + + lua_pushcfunction(L, dequeue); + lua_setglobal(L, "xdq_dequeue"); + + lua_pushcfunction(L, load_xdq_file); + lua_setglobal(L, "load_xdq_file"); + + lua_pushcfunction(L, normalize_ipv6_address); + lua_setglobal(L, "normalize_ipv6_address"); + + lua_pushcfunction(L, adjust_meta); + lua_setglobal(L, "adjust_meta"); + + lua_pushcfunction(L, fail_xdq); + lua_setglobal(L, "fail"); + + /* Scheduler specific helpers */ + lua_pushcfunction(L, set_flow_weight); + lua_setglobal(L, "set_flow_weight"); + + lua_pushcfunction(L, set_time_ns); + lua_setglobal(L, "set_time_ns"); + + lua_pushcfunction(L, get_time_ns); + lua_setglobal(L, "get_time_ns"); + + lua_pushcfunction(L, show_flow_map); + lua_setglobal(L, "show_flow_map"); + + lua_pushcfunction(L, get_clock); + lua_setglobal(L, "get_clock"); +} + +static void run_lua_file(lua_State *L, char *fullpath, char *filename) +{ + if (!realpath("/proc/self/exe", fullpath)) + die(L, "Program location not found"); + dirname(fullpath); + if (strlen(fullpath) + strlen(filename + 1) >= PATH_MAX) + die(L, "Path to '%s' too long\nPath: '%s'", filename, fullpath); + strncat(fullpath, "/", PATH_MAX); + strncat(fullpath, filename, PATH_MAX); + + if (luaL_dofile(L, fullpath) != LUA_OK) + die(L, "Failed to load LUA file: %s\n", fullpath); +} + +static void usage(char *prog_name) +{ + printf("Usage: %s [OPTIONS] \n", prog_name); + fputs("\nTest XDP and DEQUEUE BPF hooks.\n", stdout); + fputs("Mandatory arguments to long options are mandatory for short options too.\n", stdout); + fputs("\n", stdout); + fputs("-v, --verbose output BPF diagnostic\n", stdout); + fputs("-h, --help display this help and exit\n", stdout); +} + +int main(int argc, char *argv[]) +{ + lua_State *L; + struct xdq_state *state; + char lib_file[PATH_MAX + 1] = {0}; + char config_file[PATH_MAX + 1] = {0}; + char *sched_file = NULL; + int opt; + + init_lib_logging(); + L = luaL_newstate(); + luaL_openlibs(L); + initLuaFunctions(L, argv[0]); + state = get_xdq_state(L); + + run_lua_file(L, lib_file, XDQ_LIBRARY); + run_lua_file(L, config_file, XDQ_CONFIG); + + while ((opt = getopt_long(argc, argv, "f:vh", long_options, NULL)) != -1) { + switch (opt) { + case 'f': + sched_file = optarg; + break; + case 'v': + set_log_level(LOG_VERBOSE); + break; + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + default: + usage(argv[0]); + exit(EXIT_FAILURE); + } + } + if (argc - optind == 1) + sched_file = argv[optind]; + if (!sched_file) { + fprintf(stderr, "No XDQ object file provided. Use %s \n", argv[0]); + usage(argv[0]); + exit(EXIT_FAILURE); + } + + if (luaL_dofile(L, sched_file) != LUA_OK) { + die(L, "%s", lua_tostring(L, -1)); + } + free(state->xdq_script); + lua_close(L); + return EXIT_SUCCESS; +} diff --git a/xdq-tester/xdq-tester.h b/xdq-tester/xdq-tester.h new file mode 100644 index 00000000..8132763a --- /dev/null +++ b/xdq-tester/xdq-tester.h @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022 Freysteinn Alfredsson */ + +#ifndef XDQ_TESTER_H_ +#define XDQ_TESTER_H_ + +#include +#include +#include +#include +#include +#include + +#define BPF_PROG_TYPE_DEQUEUE 32 +#define BPF_F_TEST_XDP_DO_REDIRECT (1U << 1) + +#define XDQ_LIBRARY "lib.lua" +#define XDQ_CONFIG "config.lua" + +struct packet { + char *data; + char *data_end; + size_t length; + char *cur; +}; + +struct xdq_state { + struct bpf_object *xdq_bpf_obj; + char *xdq_script; + char *prog_name; + int xdp_prog_fd; + int dequeue_prog_fd; + int metadata_size; +}; + +struct xdq_state *get_xdq_state(lua_State *L); +void die(lua_State *L, const char *format, ...); +struct packet *packet_alloc(lua_State *L, struct packet *pkt, size_t size); +void packet_free(struct packet *pkt); + +/* Lua exported functions */ +int load_xdq_file(lua_State *L); +int enqueue(lua_State *L); +int dequeue(lua_State *L); +int normalize_ipv6_address(lua_State *L); +int fail_xdq(lua_State *L); + +#endif // XDQ_TESTER_H_