diff --git a/README.md b/README.md index faed493..934c16e 100644 --- a/README.md +++ b/README.md @@ -116,16 +116,40 @@ Load it with `tc` commands: - Client: ``` tc qdisc add dev "${IFACE}" clsact - tc filter add dev "${IFACE}" egress bpf obj tcp_in_udp_tc.o sec tc_client_egress action csum udp - tc filter add dev "${IFACE}" ingress bpf da obj tcp_in_udp_tc.o sec tc_client_ingress + tc filter add dev "${IFACE}" egress protocol ip flower ip_proto tcp dst_port "${PORT_START}"-"${PORT_END}" action goto chain 1 + tc filter add dev "${IFACE}" egress chain 1 bpf object-file tcp_in_udp_tc.o section tc action csum udp + tc filter add dev "${IFACE}" ingress protocol ip flower ip_proto udp src_port "${PORT_START}"-"${PORT_END}" action goto chain 1 + tc filter add dev "${IFACE}" ingress chain 1 bpf object-file tcp_in_udp_tc.o section tc direct-action ``` - Server: ``` tc qdisc add dev "${IFACE}" clsact - tc filter add dev "${IFACE}" egress bpf obj tcp_in_udp_tc.o sec tc_server_egress action csum udp - tc filter add dev "${IFACE}" ingress bpf da obj tcp_in_udp_tc.o sec tc_server_ingress + tc filter add dev "${IFACE}" egress protocol ip flower ip_proto tcp src_port "${PORT_START}"-"${PORT_END}" action goto chain 1 + tc filter add dev "${IFACE}" egress chain 1 bpf object-file tcp_in_udp_tc.o section tc action csum udp + tc filter add dev "${IFACE}" ingress protocol ip flower ip_proto udp dst_port "${PORT_START}"-"${PORT_END}" action goto chain 1 + tc filter add dev "${IFACE}" ingress chain 1 bpf object-file tcp_in_udp_tc.o section tc direct-action ``` +If the TCP programme supports setting marks (SO_MARK), use it for egress to +prevent processing traffic that is not from the TCP programme. For client, this +allows traffic to a different IP address with the same TCP port. For server, +this prevents sending packet to BPF programme if the interface has multiple IP +addresses assigned and if the TCP programme doesn't bind to all of them. + +- Client & Server: + ``` + tc filter add dev "${IFACE}" egress handle 2 fw action goto chain 1 + ``` + +Be warned that SO_MARK can't be used for ingress as the system doesn't expect +incoming UDP packets. Therefore, all incoming UDP packets from the interface +with matching port will be sent to the BPF programme. For example, if client or +server has UDP traffic to a matching port, incoming packet will be +unintentionally processed by the BPF programme. Therefore, you're recommended to +use ports that are outside of the ephemeral port range set on +net.ipv4.ip_local_port_range (default: 32768-60999). The +net.ipv4.ip_local_port_range option applies to IPv6 too. + GRO/TSO cannot be used on this interface, because each UDP packet will carry a part of the TCP headers as part of the data: this is specific to one packet, and it cannot be merged with the next data. Please use this: @@ -163,15 +187,3 @@ tc filter del dev "${IFACE}" ingress Because the packets will be in UDP and not TCP, any MSS clamping will have no effects here. It is important to avoid IP fragmentation. In other words, it might be required to adapt the MTU (or the MSS). - -## Identification - -### Client side: - -- Ingress: From a specific destination IP and port in UDP -- Egress: To a specific destination IP and port in TCP - -### Server side: - -- Ingress: To a specific destination IP and port in UDP -- Egress: From a previously used `sk`: use ConnMark to set a specific `SO_MARK` diff --git a/tcp_in_udp_tc.c b/tcp_in_udp_tc.c index 01fc82e..dbfae37 100644 --- a/tcp_in_udp_tc.c +++ b/tcp_in_udp_tc.c @@ -26,13 +26,6 @@ struct hdr_cursor { void *pos; }; -__u16 PORT = 5201; - -enum side { - SERVER, - CLIENT, -}; - /******************************************* ** parse_*hdr helpers from XDP tutorials ** *******************************************/ @@ -156,7 +149,7 @@ static __always_inline int parse_udphdr(struct hdr_cursor *nh, static __always_inline void udp_to_tcp(struct __sk_buff *skb, struct hdr_cursor *nh, - struct iphdr *iphdr, struct ipv6hdr *ipv6hdr, enum side side) + struct iphdr *iphdr, struct ipv6hdr *ipv6hdr) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -169,17 +162,6 @@ udp_to_tcp(struct __sk_buff *skb, struct hdr_cursor *nh, if (parse_udphdr(nh, data_end, (struct udphdr**)&tuhdr) < 0) goto out; - switch (side) { - case SERVER: - if (tuhdr->udphdr.dest != bpf_htons(PORT)) - goto out; - break; - case CLIENT: - if (tuhdr->udphdr.source != bpf_htons(PORT)) - goto out; - break; - } - if (skb->gso_segs > 1) { bpf_printk("udp-tcp: WARNING, GRO/LRO should be disabled: length:%u, segs:%u, size:%u\n", skb->len, skb->gso_segs, skb->gso_size); @@ -249,45 +231,6 @@ udp_to_tcp(struct __sk_buff *skb, struct hdr_cursor *nh, return; } -static __always_inline int -tc_ingress(struct __sk_buff *skb, enum side side) -{ - void *data_end = (void *)(long)skb->data_end; - void *data = (void *)(long)skb->data; - struct hdr_cursor nh = { .pos = data }; - int eth_type, ip_type, ret = TC_ACT_OK; - struct ipv6hdr *ipv6hdr = NULL; - struct iphdr *iphdr = NULL; - struct ethhdr *eth; - - eth_type = parse_ethhdr(&nh, data_end, ð); - if (eth_type == bpf_htons(ETH_P_IP)) { - ip_type = parse_iphdr(&nh, data_end, &iphdr); - } else if (eth_type == bpf_htons(ETH_P_IPV6)) { - ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); - } else { - goto out; - } - - if (ip_type == IPPROTO_UDP) - udp_to_tcp(skb, &nh, iphdr, ipv6hdr, side); - -out: - return ret; -} - -SEC("tc_client_ingress") -int client_ingress(struct __sk_buff *skb) -{ - return tc_ingress(skb, CLIENT); -} - -SEC("tc_server_ingress") -int server_ingress(struct __sk_buff *skb) -{ - return tc_ingress(skb, SERVER); -} - /************ ** Egress ** @@ -295,7 +238,7 @@ int server_ingress(struct __sk_buff *skb) static __always_inline int tcp_to_udp(struct __sk_buff *skb, struct hdr_cursor *nh, - struct iphdr *iphdr, struct ipv6hdr *ipv6hdr, enum side side) + struct iphdr *iphdr, struct ipv6hdr *ipv6hdr) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -309,17 +252,6 @@ tcp_to_udp(struct __sk_buff *skb, struct hdr_cursor *nh, if (parse_tcphdr(nh, data_end, &tcphdr) < 0) goto out; - switch (side) { - case SERVER: - if (tcphdr->source != bpf_htons(PORT)) - goto out; - break; - case CLIENT: - if (tcphdr->dest != bpf_htons(PORT)) - goto out; - break; - } - if (tcphdr->urg) { if (iphdr) bpf_printk("tcp-udp: Skip: %pI4:%u -> %pI4:%u: urgent\n", @@ -386,8 +318,8 @@ tcp_to_udp(struct __sk_buff *skb, struct hdr_cursor *nh, return TC_ACT_OK; } -static __always_inline int -tc_egress(struct __sk_buff *skb, enum side side) +SEC("tc") +int tc_tcp_in_udp(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -403,26 +335,22 @@ tc_egress(struct __sk_buff *skb, enum side side) } else if (eth_type == bpf_htons(ETH_P_IPV6)) { ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); } else { - goto out; + nh.pos = data; + if (skb->protocol == bpf_htons(ETH_P_IP)) + ip_type = parse_iphdr(&nh, data_end, &iphdr); + else if (skb->protocol == bpf_htons(ETH_P_IPV6)) + ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); + else + goto out; } if (ip_type == IPPROTO_TCP) - return tcp_to_udp(skb, &nh, iphdr, ipv6hdr, side); + return tcp_to_udp(skb, &nh, iphdr, ipv6hdr); + else if (ip_type == IPPROTO_UDP) + udp_to_tcp(skb, &nh, iphdr, ipv6hdr); out: return ret; } -SEC("tc_client_egress") -int client_egress(struct __sk_buff *skb) -{ - return tc_egress(skb, CLIENT); -} - -SEC("tc_server_egress") -int server_egress(struct __sk_buff *skb) -{ - return tc_egress(skb, SERVER); -} - char _license[] SEC("license") = "GPL"; diff --git a/test.sh b/test.sh index 7d7b51b..0dc9466 100755 --- a/test.sh +++ b/test.sh @@ -40,14 +40,16 @@ server() tc_client() { - local ns="${NS}_cpe" iface="int" + local ns="${NS}_cpe" iface="int" port_start="5201" port_end="5203" # ip netns will umount everything on exit ip netns exec "${ns}" sh -c "mount -t debugfs none /sys/kernel/debug && cat /sys/kernel/debug/tracing/trace_pipe" & tc -n "${ns}" qdisc add dev "${iface}" clsact - tc -n "${ns}" filter add dev "${iface}" egress bpf obj tcp_in_udp_tc.o sec tc_client_egress action csum udp index 100 - tc -n "${ns}" filter add dev "${iface}" ingress bpf da obj tcp_in_udp_tc.o sec tc_client_ingress + tc -n "${ns}" filter add dev "${iface}" egress protocol ip flower ip_proto tcp dst_port "${port_start}"-"${port_end}" action goto chain 1 + tc -n "${ns}" filter add dev "${iface}" egress chain 1 bpf object-file tcp_in_udp_tc.o section tc action csum udp + tc -n "${ns}" filter add dev "${iface}" ingress protocol ip flower ip_proto udp src_port "${port_start}"-"${port_end}" action goto chain 1 + tc -n "${ns}" filter add dev "${iface}" ingress chain 1 bpf object-file tcp_in_udp_tc.o section tc direct-action tc -n "${ns}" filter show dev "${iface}" egress tc -n "${ns}" filter show dev "${iface}" ingress @@ -59,14 +61,16 @@ tc_client() tc_server() { - local ns="${NS}_net" iface="int" + local ns="${NS}_net" iface="int" port_start="5201" port_end="5203" # ip netns will umount everything on exit ip netns exec "${ns}" sh -c "mount -t debugfs none /sys/kernel/debug && cat /sys/kernel/debug/tracing/trace_pipe" & tc -n "${ns}" qdisc add dev "${iface}" clsact - tc -n "${ns}" filter add dev "${iface}" egress bpf obj tcp_in_udp_tc.o sec tc_server_egress action csum udp index 100 - tc -n "${ns}" filter add dev "${iface}" ingress bpf da obj tcp_in_udp_tc.o sec tc_server_ingress + tc -n "${ns}" filter add dev "${iface}" egress protocol ip flower ip_proto tcp src_port "${port_start}"-"${port_end}" action goto chain 1 + tc -n "${ns}" filter add dev "${iface}" egress chain 1 bpf object-file tcp_in_udp_tc.o section tc action csum udp + tc -n "${ns}" filter add dev "${iface}" ingress protocol ip flower ip_proto udp dst_port "${port_start}"-"${port_end}" action goto chain 1 + tc -n "${ns}" filter add dev "${iface}" ingress chain 1 bpf object-file tcp_in_udp_tc.o section tc direct-action tc -n "${ns}" filter show dev "${iface}" egress tc -n "${ns}" filter show dev "${iface}" ingress