From 9ebca18847e10561ced5882c7bd1c8fc584e613e Mon Sep 17 00:00:00 2001 From: "Chester A. Unal" Date: Wed, 30 Jul 2025 08:35:02 +0100 Subject: [PATCH 1/5] {readme,test}: only disable GSO and explain no need to disable GRO Offloads other than GSO and GRO do not break this type of traffic. Document disabling GSO and explain why disabling GRO is not needed. Signed-off-by: Chester A. Unal --- README.md | 14 +++++++------- test.sh | 8 ++------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index faed493..81aa2e8 100644 --- a/README.md +++ b/README.md @@ -126,17 +126,17 @@ Load it with `tc` commands: tc filter add dev "${IFACE}" ingress bpf da obj tcp_in_udp_tc.o sec tc_server_ingress ``` -GRO/TSO cannot be used on this interface, because each UDP packet will carry a -part of the TCP headers as part of the data: this is specific to one packet, and -it cannot be merged with the next data. Please use this: +Generic Segmentation Offload (GSO) and Generic Receive Offload (GRO) cannot be +used for this traffic, because each UDP packet will carry a part of the TCP +headers as part of the data. This part of the data is specific to one packet, +therefore, it cannot be merged with the next data. UDP GRO is only done on +demand, e.g. when the userspace asks it (setsockopt(IPPROTO_UDP, UDP_GRO)) or +for some in-kernel tunnels, so GRO doesn't need to be disabled. To disable GSO: ``` -ethtool -K "${IFACE}" gro off lro off gso off tso off ufo off sg off -ip link set ${IFACE} gso_max_segs 1 +ip link set ${IFACE} gso_max_segs 0 ``` -(to be checked: maybe it is enough to disable `gro` and `gso/tso`.) - Note: to get some stats, in egress, it is possible to use: ``` diff --git a/test.sh b/test.sh index 7d7b51b..4e41257 100755 --- a/test.sh +++ b/test.sh @@ -52,9 +52,7 @@ tc_client() tc -n "${ns}" filter show dev "${iface}" egress tc -n "${ns}" filter show dev "${iface}" ingress - ip netns exec "${ns}" ethtool -K "${iface}" gro off gso off tso off lro off ufo off sg off - # ip -n "${NS}_cli" link set "cpe" gso_max_segs 1 # but perf impact in this particular setup - ip netns exec "${NS}_cli" ethtool -K "cpe" gro off gso off tso off lro off ufo off sg off + ip -n "${NS}_cli" link set "cpe" gso_max_segs 0 } tc_server() @@ -71,9 +69,7 @@ tc_server() tc -n "${ns}" filter show dev "${iface}" egress tc -n "${ns}" filter show dev "${iface}" ingress - ip netns exec "${ns}" ethtool -K "${iface}" gro off gso off tso off lro off ufo off sg off - # ip -n "${NS}_srv" link set "net" gso_max_segs 1 # but perf impact in this particular setup - ip netns exec "${NS}_srv" ethtool -K "net" gro off gso off tso off lro off ufo off sg off + ip -n "${NS}_srv" link set "net" gso_max_segs 0 } capture() From 33c6db7663e1a09d7819ada24dadd1012409ed68 Mon Sep 17 00:00:00 2001 From: "Chester A. Unal" Date: Wed, 30 Jul 2025 08:36:21 +0100 Subject: [PATCH 2/5] {readme,tc}: remove port check and document configuration with tc The layer 4 protocol and UDP or TCP port can be distinguished by a tc filter. Document that and remove the logic to discriminate packets by UDP or TCP port from the BPF programme. Add warnings to the README. Signed-off-by: Chester A. Unal --- README.md | 45 +++++++++++++++--------- tcp_in_udp_tc.c | 92 ++++--------------------------------------------- test.sh | 16 +++++---- 3 files changed, 46 insertions(+), 107 deletions(-) diff --git a/README.md b/README.md index 81aa2e8..e87d3c9 100644 --- a/README.md +++ b/README.md @@ -116,16 +116,41 @@ Load it with `tc` commands: - Client: ``` tc qdisc add dev "${IFACE}" clsact - tc filter add dev "${IFACE}" egress bpf obj tcp_in_udp_tc.o sec tc_client_egress action csum udp - tc filter add dev "${IFACE}" ingress bpf da obj tcp_in_udp_tc.o sec tc_client_ingress + tc filter add dev "${IFACE}" egress u32 match ip dport "${PORT}" 0xffff action goto chain 1 + tc filter add dev "${IFACE}" egress chain 1 bpf object-file tcp_in_udp_tc.o section tc action csum udp + tc filter add dev "${IFACE}" ingress u32 match ip sport "${PORT}" 0xffff action goto chain 1 + tc filter add dev "${IFACE}" ingress chain 1 bpf object-file tcp_in_udp_tc.o section tc direct-action ``` - Server: ``` tc qdisc add dev "${IFACE}" clsact - tc filter add dev "${IFACE}" egress bpf obj tcp_in_udp_tc.o sec tc_server_egress action csum udp - tc filter add dev "${IFACE}" ingress bpf da obj tcp_in_udp_tc.o sec tc_server_ingress + tc filter add dev "${IFACE}" egress u32 match ip sport "${PORT}" 0xffff action goto chain 1 + tc filter add dev "${IFACE}" egress chain 1 bpf object-file tcp_in_udp_tc.o section tc action csum udp + tc filter add dev "${IFACE}" ingress u32 match ip dport "${PORT}" 0xffff action goto chain 1 + tc filter add dev "${IFACE}" ingress chain 1 bpf object-file tcp_in_udp_tc.o section tc direct-action ``` +Multiple u32 filters can be used to have more than one port traffic sent to the +BPF programme. + +If the TCP programme supports setting marks (SO_MARK), use it for egress to +prevent processing traffic that is not from the TCP programme. For client, this +allows traffic to a different IP address with the same TCP port. For server, +this prevents sending packet to BPF programme if the interface has multiple IP +addresses assigned and if the TCP programme doesn't bind to all of them. + +- Client & Server: + ``` + tc filter add dev "${IFACE}" egress handle 2 fw action goto chain 1 + ``` + +Be warned that SO_MARK can't be used for ingress as the system doesn't expect +incoming UDP packets. Therefore, all incoming packets from the interface with +matching port will be sent to the BPF programme. To decrease the chance of this +happening, you're recommended to use ports that are outside of the ephemeral +port range set on net.ipv4.ip_local_port_range (default: 32768-60999). The +net.ipv4.ip_local_port_range option applies to IPv6 too. + Generic Segmentation Offload (GSO) and Generic Receive Offload (GRO) cannot be used for this traffic, because each UDP packet will carry a part of the TCP headers as part of the data. This part of the data is specific to one packet, @@ -163,15 +188,3 @@ tc filter del dev "${IFACE}" ingress Because the packets will be in UDP and not TCP, any MSS clamping will have no effects here. It is important to avoid IP fragmentation. In other words, it might be required to adapt the MTU (or the MSS). - -## Identification - -### Client side: - -- Ingress: From a specific destination IP and port in UDP -- Egress: To a specific destination IP and port in TCP - -### Server side: - -- Ingress: To a specific destination IP and port in UDP -- Egress: From a previously used `sk`: use ConnMark to set a specific `SO_MARK` diff --git a/tcp_in_udp_tc.c b/tcp_in_udp_tc.c index 01fc82e..f215d6f 100644 --- a/tcp_in_udp_tc.c +++ b/tcp_in_udp_tc.c @@ -26,13 +26,6 @@ struct hdr_cursor { void *pos; }; -__u16 PORT = 5201; - -enum side { - SERVER, - CLIENT, -}; - /******************************************* ** parse_*hdr helpers from XDP tutorials ** *******************************************/ @@ -156,7 +149,7 @@ static __always_inline int parse_udphdr(struct hdr_cursor *nh, static __always_inline void udp_to_tcp(struct __sk_buff *skb, struct hdr_cursor *nh, - struct iphdr *iphdr, struct ipv6hdr *ipv6hdr, enum side side) + struct iphdr *iphdr, struct ipv6hdr *ipv6hdr) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -169,17 +162,6 @@ udp_to_tcp(struct __sk_buff *skb, struct hdr_cursor *nh, if (parse_udphdr(nh, data_end, (struct udphdr**)&tuhdr) < 0) goto out; - switch (side) { - case SERVER: - if (tuhdr->udphdr.dest != bpf_htons(PORT)) - goto out; - break; - case CLIENT: - if (tuhdr->udphdr.source != bpf_htons(PORT)) - goto out; - break; - } - if (skb->gso_segs > 1) { bpf_printk("udp-tcp: WARNING, GRO/LRO should be disabled: length:%u, segs:%u, size:%u\n", skb->len, skb->gso_segs, skb->gso_size); @@ -249,45 +231,6 @@ udp_to_tcp(struct __sk_buff *skb, struct hdr_cursor *nh, return; } -static __always_inline int -tc_ingress(struct __sk_buff *skb, enum side side) -{ - void *data_end = (void *)(long)skb->data_end; - void *data = (void *)(long)skb->data; - struct hdr_cursor nh = { .pos = data }; - int eth_type, ip_type, ret = TC_ACT_OK; - struct ipv6hdr *ipv6hdr = NULL; - struct iphdr *iphdr = NULL; - struct ethhdr *eth; - - eth_type = parse_ethhdr(&nh, data_end, ð); - if (eth_type == bpf_htons(ETH_P_IP)) { - ip_type = parse_iphdr(&nh, data_end, &iphdr); - } else if (eth_type == bpf_htons(ETH_P_IPV6)) { - ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); - } else { - goto out; - } - - if (ip_type == IPPROTO_UDP) - udp_to_tcp(skb, &nh, iphdr, ipv6hdr, side); - -out: - return ret; -} - -SEC("tc_client_ingress") -int client_ingress(struct __sk_buff *skb) -{ - return tc_ingress(skb, CLIENT); -} - -SEC("tc_server_ingress") -int server_ingress(struct __sk_buff *skb) -{ - return tc_ingress(skb, SERVER); -} - /************ ** Egress ** @@ -295,7 +238,7 @@ int server_ingress(struct __sk_buff *skb) static __always_inline int tcp_to_udp(struct __sk_buff *skb, struct hdr_cursor *nh, - struct iphdr *iphdr, struct ipv6hdr *ipv6hdr, enum side side) + struct iphdr *iphdr, struct ipv6hdr *ipv6hdr) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -309,17 +252,6 @@ tcp_to_udp(struct __sk_buff *skb, struct hdr_cursor *nh, if (parse_tcphdr(nh, data_end, &tcphdr) < 0) goto out; - switch (side) { - case SERVER: - if (tcphdr->source != bpf_htons(PORT)) - goto out; - break; - case CLIENT: - if (tcphdr->dest != bpf_htons(PORT)) - goto out; - break; - } - if (tcphdr->urg) { if (iphdr) bpf_printk("tcp-udp: Skip: %pI4:%u -> %pI4:%u: urgent\n", @@ -386,8 +318,8 @@ tcp_to_udp(struct __sk_buff *skb, struct hdr_cursor *nh, return TC_ACT_OK; } -static __always_inline int -tc_egress(struct __sk_buff *skb, enum side side) +SEC("tc") +int tc_tcp_in_udp(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -407,22 +339,12 @@ tc_egress(struct __sk_buff *skb, enum side side) } if (ip_type == IPPROTO_TCP) - return tcp_to_udp(skb, &nh, iphdr, ipv6hdr, side); + return tcp_to_udp(skb, &nh, iphdr, ipv6hdr); + if (ip_type == IPPROTO_UDP) + udp_to_tcp(skb, &nh, iphdr, ipv6hdr); out: return ret; } -SEC("tc_client_egress") -int client_egress(struct __sk_buff *skb) -{ - return tc_egress(skb, CLIENT); -} - -SEC("tc_server_egress") -int server_egress(struct __sk_buff *skb) -{ - return tc_egress(skb, SERVER); -} - char _license[] SEC("license") = "GPL"; diff --git a/test.sh b/test.sh index 4e41257..478391d 100755 --- a/test.sh +++ b/test.sh @@ -40,14 +40,16 @@ server() tc_client() { - local ns="${NS}_cpe" iface="int" + local ns="${NS}_cpe" iface="int" port="5201" # ip netns will umount everything on exit ip netns exec "${ns}" sh -c "mount -t debugfs none /sys/kernel/debug && cat /sys/kernel/debug/tracing/trace_pipe" & tc -n "${ns}" qdisc add dev "${iface}" clsact - tc -n "${ns}" filter add dev "${iface}" egress bpf obj tcp_in_udp_tc.o sec tc_client_egress action csum udp index 100 - tc -n "${ns}" filter add dev "${iface}" ingress bpf da obj tcp_in_udp_tc.o sec tc_client_ingress + tc -n "${ns}" filter add dev "${iface}" egress u32 match tcp dst "${port}" 0xffff action goto chain 1 + tc -n "${ns}" filter add dev "${iface}" egress chain 1 bpf object-file tcp_in_udp_tc.o section tc action csum udp + tc -n "${ns}" filter add dev "${iface}" ingress u32 match udp src "${port}" 0xffff action goto chain 1 + tc -n "${ns}" filter add dev "${iface}" ingress chain 1 bpf object-file tcp_in_udp_tc.o section tc direct-action tc -n "${ns}" filter show dev "${iface}" egress tc -n "${ns}" filter show dev "${iface}" ingress @@ -57,14 +59,16 @@ tc_client() tc_server() { - local ns="${NS}_net" iface="int" + local ns="${NS}_net" iface="int" port="5201" # ip netns will umount everything on exit ip netns exec "${ns}" sh -c "mount -t debugfs none /sys/kernel/debug && cat /sys/kernel/debug/tracing/trace_pipe" & tc -n "${ns}" qdisc add dev "${iface}" clsact - tc -n "${ns}" filter add dev "${iface}" egress bpf obj tcp_in_udp_tc.o sec tc_server_egress action csum udp index 100 - tc -n "${ns}" filter add dev "${iface}" ingress bpf da obj tcp_in_udp_tc.o sec tc_server_ingress + tc -n "${ns}" filter add dev "${iface}" egress u32 match tcp src "${port}" 0xffff action goto chain 1 + tc -n "${ns}" filter add dev "${iface}" egress chain 1 bpf object-file tcp_in_udp_tc.o section tc action csum udp + tc -n "${ns}" filter add dev "${iface}" ingress u32 match udp dst "${port}" 0xffff action goto chain 1 + tc -n "${ns}" filter add dev "${iface}" ingress chain 1 bpf object-file tcp_in_udp_tc.o section tc direct-action tc -n "${ns}" filter show dev "${iface}" egress tc -n "${ns}" filter show dev "${iface}" ingress From 58342d35a6f1c3594e4566fef7923a88d9041ed1 Mon Sep 17 00:00:00 2001 From: "Chester A. Unal" Date: Wed, 16 Jul 2025 17:08:30 +0100 Subject: [PATCH 3/5] tc: support layer 3 interfaces Cellular interfaces do not include layer 2 header. When reading the Ethernet header, if there is no IPv4 or IPv6 header found, assume that the packet does not have an Ethernet header and check whether the protocol is IPv4 or IPv6. Signed-off-by: Chester A. Unal --- tcp_in_udp_tc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tcp_in_udp_tc.c b/tcp_in_udp_tc.c index f215d6f..9804b94 100644 --- a/tcp_in_udp_tc.c +++ b/tcp_in_udp_tc.c @@ -335,7 +335,13 @@ int tc_tcp_in_udp(struct __sk_buff *skb) } else if (eth_type == bpf_htons(ETH_P_IPV6)) { ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); } else { - goto out; + nh.pos = data; + if (skb->protocol == bpf_htons(ETH_P_IP)) + ip_type = parse_iphdr(&nh, data_end, &iphdr); + else if (skb->protocol == bpf_htons(ETH_P_IPV6)) + ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); + else + goto out; } if (ip_type == IPPROTO_TCP) From 73ccf7fca15fc64b552c5a8bc2bff797689579c8 Mon Sep 17 00:00:00 2001 From: "Chester A. Unal" Date: Mon, 28 Jul 2025 18:56:51 +0100 Subject: [PATCH 4/5] tc: remove unused includes and sort alphabetical Remove the unused includes. Sort in alphabetical order where possible. Signed-off-by: Chester A. Unal --- tcp_in_udp_tc.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tcp_in_udp_tc.c b/tcp_in_udp_tc.c index 9804b94..12138ae 100644 --- a/tcp_in_udp_tc.c +++ b/tcp_in_udp_tc.c @@ -1,18 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include -#include - -#include #include -#include +#include #include #include +#include #include #include -#include -#include #include - +#include struct tcp_in_udp_hdr { struct udphdr udphdr; From 17bc8346f962335a6d1609dfa2921d8f72618761 Mon Sep 17 00:00:00 2001 From: "Chester A. Unal" Date: Fri, 8 Aug 2025 19:10:12 +0100 Subject: [PATCH 5/5] readme: document only the necessary apt packages Only the make, clang, libelf-dev, libc6-dev-i386, and libbpf-dev packages are needed. Document them. Signed-off-by: Chester A. Unal --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e87d3c9..7e761d3 100644 --- a/README.md +++ b/README.md @@ -102,10 +102,11 @@ Checksum: ## Build -Build the binary using `make`. CLang and `libbpf` is required, e.g. +Build the binary using `make`. CLang, `libelf`, `libc6`, and `libbpf` are +required: ``` -sudo apt install clang llvm libelf-dev build-essential libc6-dev-i386 libbpf-dev +sudo apt install make clang libelf-dev libc6-dev-i386 libbpf-dev ```