diff --git a/README.md b/README.md index faed493..7e761d3 100644 --- a/README.md +++ b/README.md @@ -102,10 +102,11 @@ Checksum: ## Build -Build the binary using `make`. CLang and `libbpf` is required, e.g. +Build the binary using `make`. CLang, `libelf`, `libc6`, and `libbpf` are +required: ``` -sudo apt install clang llvm libelf-dev build-essential libc6-dev-i386 libbpf-dev +sudo apt install make clang libelf-dev libc6-dev-i386 libbpf-dev ``` @@ -116,27 +117,52 @@ Load it with `tc` commands: - Client: ``` tc qdisc add dev "${IFACE}" clsact - tc filter add dev "${IFACE}" egress bpf obj tcp_in_udp_tc.o sec tc_client_egress action csum udp - tc filter add dev "${IFACE}" ingress bpf da obj tcp_in_udp_tc.o sec tc_client_ingress + tc filter add dev "${IFACE}" egress u32 match ip dport "${PORT}" 0xffff action goto chain 1 + tc filter add dev "${IFACE}" egress chain 1 bpf object-file tcp_in_udp_tc.o section tc action csum udp + tc filter add dev "${IFACE}" ingress u32 match ip sport "${PORT}" 0xffff action goto chain 1 + tc filter add dev "${IFACE}" ingress chain 1 bpf object-file tcp_in_udp_tc.o section tc direct-action ``` - Server: ``` tc qdisc add dev "${IFACE}" clsact - tc filter add dev "${IFACE}" egress bpf obj tcp_in_udp_tc.o sec tc_server_egress action csum udp - tc filter add dev "${IFACE}" ingress bpf da obj tcp_in_udp_tc.o sec tc_server_ingress + tc filter add dev "${IFACE}" egress u32 match ip sport "${PORT}" 0xffff action goto chain 1 + tc filter add dev "${IFACE}" egress chain 1 bpf object-file tcp_in_udp_tc.o section tc action csum udp + tc filter add dev "${IFACE}" ingress u32 match ip dport "${PORT}" 0xffff action goto chain 1 + tc filter add dev "${IFACE}" ingress chain 1 bpf object-file tcp_in_udp_tc.o section tc direct-action ``` -GRO/TSO cannot be used on this interface, because each UDP packet will carry a -part of the TCP headers as part of the data: this is specific to one packet, and -it cannot be merged with the next data. Please use this: +Multiple u32 filters can be used to have more than one port traffic sent to the +BPF programme. + +If the TCP programme supports setting marks (SO_MARK), use it for egress to +prevent processing traffic that is not from the TCP programme. For client, this +allows traffic to a different IP address with the same TCP port. For server, +this prevents sending packet to BPF programme if the interface has multiple IP +addresses assigned and if the TCP programme doesn't bind to all of them. + +- Client & Server: + ``` + tc filter add dev "${IFACE}" egress handle 2 fw action goto chain 1 + ``` + +Be warned that SO_MARK can't be used for ingress as the system doesn't expect +incoming UDP packets. Therefore, all incoming packets from the interface with +matching port will be sent to the BPF programme. To decrease the chance of this +happening, you're recommended to use ports that are outside of the ephemeral +port range set on net.ipv4.ip_local_port_range (default: 32768-60999). The +net.ipv4.ip_local_port_range option applies to IPv6 too. + +Generic Segmentation Offload (GSO) and Generic Receive Offload (GRO) cannot be +used for this traffic, because each UDP packet will carry a part of the TCP +headers as part of the data. This part of the data is specific to one packet, +therefore, it cannot be merged with the next data. UDP GRO is only done on +demand, e.g. when the userspace asks it (setsockopt(IPPROTO_UDP, UDP_GRO)) or +for some in-kernel tunnels, so GRO doesn't need to be disabled. To disable GSO: ``` -ethtool -K "${IFACE}" gro off lro off gso off tso off ufo off sg off -ip link set ${IFACE} gso_max_segs 1 +ip link set ${IFACE} gso_max_segs 0 ``` -(to be checked: maybe it is enough to disable `gro` and `gso/tso`.) - Note: to get some stats, in egress, it is possible to use: ``` @@ -163,15 +189,3 @@ tc filter del dev "${IFACE}" ingress Because the packets will be in UDP and not TCP, any MSS clamping will have no effects here. It is important to avoid IP fragmentation. In other words, it might be required to adapt the MTU (or the MSS). - -## Identification - -### Client side: - -- Ingress: From a specific destination IP and port in UDP -- Egress: To a specific destination IP and port in TCP - -### Server side: - -- Ingress: To a specific destination IP and port in UDP -- Egress: From a previously used `sk`: use ConnMark to set a specific `SO_MARK` diff --git a/tcp_in_udp_tc.c b/tcp_in_udp_tc.c index 01fc82e..12138ae 100644 --- a/tcp_in_udp_tc.c +++ b/tcp_in_udp_tc.c @@ -1,18 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include -#include - -#include #include -#include +#include #include #include +#include #include #include -#include -#include #include - +#include struct tcp_in_udp_hdr { struct udphdr udphdr; @@ -26,13 +22,6 @@ struct hdr_cursor { void *pos; }; -__u16 PORT = 5201; - -enum side { - SERVER, - CLIENT, -}; - /******************************************* ** parse_*hdr helpers from XDP tutorials ** *******************************************/ @@ -156,7 +145,7 @@ static __always_inline int parse_udphdr(struct hdr_cursor *nh, static __always_inline void udp_to_tcp(struct __sk_buff *skb, struct hdr_cursor *nh, - struct iphdr *iphdr, struct ipv6hdr *ipv6hdr, enum side side) + struct iphdr *iphdr, struct ipv6hdr *ipv6hdr) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -169,17 +158,6 @@ udp_to_tcp(struct __sk_buff *skb, struct hdr_cursor *nh, if (parse_udphdr(nh, data_end, (struct udphdr**)&tuhdr) < 0) goto out; - switch (side) { - case SERVER: - if (tuhdr->udphdr.dest != bpf_htons(PORT)) - goto out; - break; - case CLIENT: - if (tuhdr->udphdr.source != bpf_htons(PORT)) - goto out; - break; - } - if (skb->gso_segs > 1) { bpf_printk("udp-tcp: WARNING, GRO/LRO should be disabled: length:%u, segs:%u, size:%u\n", skb->len, skb->gso_segs, skb->gso_size); @@ -249,45 +227,6 @@ udp_to_tcp(struct __sk_buff *skb, struct hdr_cursor *nh, return; } -static __always_inline int -tc_ingress(struct __sk_buff *skb, enum side side) -{ - void *data_end = (void *)(long)skb->data_end; - void *data = (void *)(long)skb->data; - struct hdr_cursor nh = { .pos = data }; - int eth_type, ip_type, ret = TC_ACT_OK; - struct ipv6hdr *ipv6hdr = NULL; - struct iphdr *iphdr = NULL; - struct ethhdr *eth; - - eth_type = parse_ethhdr(&nh, data_end, ð); - if (eth_type == bpf_htons(ETH_P_IP)) { - ip_type = parse_iphdr(&nh, data_end, &iphdr); - } else if (eth_type == bpf_htons(ETH_P_IPV6)) { - ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); - } else { - goto out; - } - - if (ip_type == IPPROTO_UDP) - udp_to_tcp(skb, &nh, iphdr, ipv6hdr, side); - -out: - return ret; -} - -SEC("tc_client_ingress") -int client_ingress(struct __sk_buff *skb) -{ - return tc_ingress(skb, CLIENT); -} - -SEC("tc_server_ingress") -int server_ingress(struct __sk_buff *skb) -{ - return tc_ingress(skb, SERVER); -} - /************ ** Egress ** @@ -295,7 +234,7 @@ int server_ingress(struct __sk_buff *skb) static __always_inline int tcp_to_udp(struct __sk_buff *skb, struct hdr_cursor *nh, - struct iphdr *iphdr, struct ipv6hdr *ipv6hdr, enum side side) + struct iphdr *iphdr, struct ipv6hdr *ipv6hdr) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -309,17 +248,6 @@ tcp_to_udp(struct __sk_buff *skb, struct hdr_cursor *nh, if (parse_tcphdr(nh, data_end, &tcphdr) < 0) goto out; - switch (side) { - case SERVER: - if (tcphdr->source != bpf_htons(PORT)) - goto out; - break; - case CLIENT: - if (tcphdr->dest != bpf_htons(PORT)) - goto out; - break; - } - if (tcphdr->urg) { if (iphdr) bpf_printk("tcp-udp: Skip: %pI4:%u -> %pI4:%u: urgent\n", @@ -386,8 +314,8 @@ tcp_to_udp(struct __sk_buff *skb, struct hdr_cursor *nh, return TC_ACT_OK; } -static __always_inline int -tc_egress(struct __sk_buff *skb, enum side side) +SEC("tc") +int tc_tcp_in_udp(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -403,26 +331,22 @@ tc_egress(struct __sk_buff *skb, enum side side) } else if (eth_type == bpf_htons(ETH_P_IPV6)) { ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); } else { - goto out; + nh.pos = data; + if (skb->protocol == bpf_htons(ETH_P_IP)) + ip_type = parse_iphdr(&nh, data_end, &iphdr); + else if (skb->protocol == bpf_htons(ETH_P_IPV6)) + ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); + else + goto out; } if (ip_type == IPPROTO_TCP) - return tcp_to_udp(skb, &nh, iphdr, ipv6hdr, side); + return tcp_to_udp(skb, &nh, iphdr, ipv6hdr); + if (ip_type == IPPROTO_UDP) + udp_to_tcp(skb, &nh, iphdr, ipv6hdr); out: return ret; } -SEC("tc_client_egress") -int client_egress(struct __sk_buff *skb) -{ - return tc_egress(skb, CLIENT); -} - -SEC("tc_server_egress") -int server_egress(struct __sk_buff *skb) -{ - return tc_egress(skb, SERVER); -} - char _license[] SEC("license") = "GPL"; diff --git a/test.sh b/test.sh index 7d7b51b..478391d 100755 --- a/test.sh +++ b/test.sh @@ -40,40 +40,40 @@ server() tc_client() { - local ns="${NS}_cpe" iface="int" + local ns="${NS}_cpe" iface="int" port="5201" # ip netns will umount everything on exit ip netns exec "${ns}" sh -c "mount -t debugfs none /sys/kernel/debug && cat /sys/kernel/debug/tracing/trace_pipe" & tc -n "${ns}" qdisc add dev "${iface}" clsact - tc -n "${ns}" filter add dev "${iface}" egress bpf obj tcp_in_udp_tc.o sec tc_client_egress action csum udp index 100 - tc -n "${ns}" filter add dev "${iface}" ingress bpf da obj tcp_in_udp_tc.o sec tc_client_ingress + tc -n "${ns}" filter add dev "${iface}" egress u32 match tcp dst "${port}" 0xffff action goto chain 1 + tc -n "${ns}" filter add dev "${iface}" egress chain 1 bpf object-file tcp_in_udp_tc.o section tc action csum udp + tc -n "${ns}" filter add dev "${iface}" ingress u32 match udp src "${port}" 0xffff action goto chain 1 + tc -n "${ns}" filter add dev "${iface}" ingress chain 1 bpf object-file tcp_in_udp_tc.o section tc direct-action tc -n "${ns}" filter show dev "${iface}" egress tc -n "${ns}" filter show dev "${iface}" ingress - ip netns exec "${ns}" ethtool -K "${iface}" gro off gso off tso off lro off ufo off sg off - # ip -n "${NS}_cli" link set "cpe" gso_max_segs 1 # but perf impact in this particular setup - ip netns exec "${NS}_cli" ethtool -K "cpe" gro off gso off tso off lro off ufo off sg off + ip -n "${NS}_cli" link set "cpe" gso_max_segs 0 } tc_server() { - local ns="${NS}_net" iface="int" + local ns="${NS}_net" iface="int" port="5201" # ip netns will umount everything on exit ip netns exec "${ns}" sh -c "mount -t debugfs none /sys/kernel/debug && cat /sys/kernel/debug/tracing/trace_pipe" & tc -n "${ns}" qdisc add dev "${iface}" clsact - tc -n "${ns}" filter add dev "${iface}" egress bpf obj tcp_in_udp_tc.o sec tc_server_egress action csum udp index 100 - tc -n "${ns}" filter add dev "${iface}" ingress bpf da obj tcp_in_udp_tc.o sec tc_server_ingress + tc -n "${ns}" filter add dev "${iface}" egress u32 match tcp src "${port}" 0xffff action goto chain 1 + tc -n "${ns}" filter add dev "${iface}" egress chain 1 bpf object-file tcp_in_udp_tc.o section tc action csum udp + tc -n "${ns}" filter add dev "${iface}" ingress u32 match udp dst "${port}" 0xffff action goto chain 1 + tc -n "${ns}" filter add dev "${iface}" ingress chain 1 bpf object-file tcp_in_udp_tc.o section tc direct-action tc -n "${ns}" filter show dev "${iface}" egress tc -n "${ns}" filter show dev "${iface}" ingress - ip netns exec "${ns}" ethtool -K "${iface}" gro off gso off tso off lro off ufo off sg off - # ip -n "${NS}_srv" link set "net" gso_max_segs 1 # but perf impact in this particular setup - ip netns exec "${NS}_srv" ethtool -K "net" gro off gso off tso off lro off ufo off sg off + ip -n "${NS}_srv" link set "net" gso_max_segs 0 } capture()