diff --git a/src/42-xdp-loadbalancer/README.md b/src/42-xdp-loadbalancer/README.md index c792e8f..8260d86 100644 --- a/src/42-xdp-loadbalancer/README.md +++ b/src/42-xdp-loadbalancer/README.md @@ -445,8 +445,11 @@ sudo ./teardown.sh ### Running the Load Balancer To run the XDP load balancer, execute the following command, specifying the interface and backends' IP and MAC addresses: +To make XDP work with veth interfaces we need to add a stub eBPF program to the other end of the pair. ```console +clang -target bpf -o stub.bpf.o -c stub.bpf.c +sudo ip l set dev veth7 xdp obj ./stub.bpf.o sec .xdp # stub XDP_PASS on the receiving interface sudo ip netns exec lb ./xdp_lb veth6 10.0.0.2 de:ad:be:ef:00:02 10.0.0.3 de:ad:be:ef:00:03 ``` diff --git a/src/42-xdp-loadbalancer/stub.bpf.c b/src/42-xdp-loadbalancer/stub.bpf.c new file mode 100644 index 0000000..0c8f1d8 --- /dev/null +++ b/src/42-xdp-loadbalancer/stub.bpf.c @@ -0,0 +1,7 @@ +#include +#include + +SEC(".xdp") +int main () { + return XDP_PASS; +} diff --git a/src/42-xdp-loadbalancer/xdp_lb.bpf.c b/src/42-xdp-loadbalancer/xdp_lb.bpf.c index 701cff6..8d0b39b 100644 --- a/src/42-xdp-loadbalancer/xdp_lb.bpf.c +++ b/src/42-xdp-loadbalancer/xdp_lb.bpf.c @@ -8,6 +8,8 @@ #include #include "xx_hash.h" +#define MAX_TCP_CHECK_WORDS 750 // max 1500 bytes to check in TCP checksum. This is MTU dependent + struct backend_config { __u32 ip; unsigned char mac[ETH_ALEN]; @@ -38,6 +40,35 @@ csum_fold_helper(__u64 csum) return ~csum; } +static __always_inline __u16 +tcph_csum(struct tcphdr *tcph, struct iphdr *iph, void *data_end) +{ + // Clear checksum + tcph->check = 0; + + // Pseudo header checksum calculation + __u32 sum = 0; + sum += (__u16)(iph->saddr >> 16) + (__u16)(iph->saddr & 0xFFFF); + sum += (__u16)(iph->daddr >> 16) + (__u16)(iph->daddr & 0xFFFF); + sum += __constant_htons(IPPROTO_TCP); + sum += __constant_htons((__u16)(data_end - (void *)tcph)); + + // TCP header and payload checksum + #pragma clang loop unroll_count(MAX_TCP_CHECK_WORDS) + for (int i = 0; i <= MAX_TCP_CHECK_WORDS; i++) { + __u16 *ptr = (__u16 *)tcph + i; + if ((void *)ptr + 2 > data_end) + break; + sum += *(__u16 *)ptr; + } + + // fold into 16 bit + while (sum >> 16) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum; +} + static __always_inline __u16 iph_csum(struct iphdr *iph) { @@ -70,6 +101,11 @@ int xdp_load_balancer(struct xdp_md *ctx) { // Check if the protocol is TCP or UDP if (iph->protocol != IPPROTO_TCP) return XDP_PASS; + + // TCP header + struct tcphdr *tcph = (void *)iph + iph->ihl * 4; + if ((void *)tcph + sizeof(*tcph) > data_end) + return XDP_PASS; bpf_printk("Received Source IP: 0x%x", bpf_ntohl(iph->saddr)); bpf_printk("Received Destination IP: 0x%x", bpf_ntohl(iph->daddr)); @@ -80,7 +116,19 @@ int xdp_load_balancer(struct xdp_md *ctx) { { bpf_printk("Packet from client"); - __u32 key = xxhash32((const char*)iph, sizeof(struct iphdr), 0) % 2; + struct { + __u32 src_ip; + __u32 dst_ip; + __u16 src_port; + __u16 dst_port; + } four_tuple = {iph->saddr, + iph->daddr, + bpf_ntohs(tcph->source), + bpf_ntohs(tcph->dest) + }; + + // Hash the 4-tuple for flow based backend decision + __u32 key = xxhash32((const char *)&four_tuple, sizeof(four_tuple), 0) % 2; struct backend_config *backend = bpf_map_lookup_elem(&backends, &key); if (!backend) @@ -102,7 +150,10 @@ int xdp_load_balancer(struct xdp_md *ctx) { __builtin_memcpy(eth->h_source, load_balancer_mac, ETH_ALEN); // Recalculate IP checksum - iph->check = iph_csum(iph); + iph->check = iph_csum(iph); + + // Recalculate TCP checksum + tcph->check = tcph_csum(tcph, iph, data_end); bpf_printk("Redirecting packet to new IP 0x%x from IP 0x%x", bpf_ntohl(iph->daddr),