mirror of
https://github.com/eunomia-bpf/bpf-developer-tutorial.git
synced 2026-06-30 08:56:31 +08:00
Add more XDP related blogs (#135)
* add setup * update * fix code * move to dir * fix code * update the code * update code of 42 * update 21 * update * fix linter issues and add xdp in rust * update the docker file * fix CI * fix kernel code * update * update * add guidline
This commit is contained in:
5
src/42-xdp-loadbalancer/.gitignore
vendored
Normal file
5
src/42-xdp-loadbalancer/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
.output
|
||||
uprobe
|
||||
merge-btf
|
||||
*.btf
|
||||
xdp_lb
|
||||
141
src/42-xdp-loadbalancer/Makefile
Normal file
141
src/42-xdp-loadbalancer/Makefile
Normal file
@@ -0,0 +1,141 @@
|
||||
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
OUTPUT := .output
|
||||
CLANG ?= clang
|
||||
LIBBPF_SRC := $(abspath ../third_party/libbpf/src)
|
||||
BPFTOOL_SRC := $(abspath ../third_party/bpftool/src)
|
||||
LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a)
|
||||
BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool)
|
||||
BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool
|
||||
LIBBLAZESYM_SRC := $(abspath ../third_party/blazesym/)
|
||||
LIBBLAZESYM_OBJ := $(abspath $(OUTPUT)/libblazesym.a)
|
||||
LIBBLAZESYM_HEADER := $(abspath $(OUTPUT)/blazesym.h)
|
||||
ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \
|
||||
| sed 's/arm.*/arm/' \
|
||||
| sed 's/aarch64/arm64/' \
|
||||
| sed 's/ppc64le/powerpc/' \
|
||||
| sed 's/mips.*/mips/' \
|
||||
| sed 's/riscv64/riscv/' \
|
||||
| sed 's/loongarch64/loongarch/')
|
||||
VMLINUX := ../third_party/vmlinux/$(ARCH)/vmlinux.h
|
||||
# Use our own libbpf API headers and Linux UAPI headers distributed with
|
||||
# libbpf to avoid dependency on system-wide headers, which could be missing or
|
||||
# outdated
|
||||
INCLUDES := -I$(OUTPUT) -I../third_party/libbpf/include/uapi -I$(dir $(VMLINUX))
|
||||
CFLAGS := -g -Wall
|
||||
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)
|
||||
|
||||
APPS = xdp_lb
|
||||
|
||||
CARGO ?= $(shell which cargo)
|
||||
ifeq ($(strip $(CARGO)),)
|
||||
BZS_APPS :=
|
||||
else
|
||||
BZS_APPS :=
|
||||
APPS += $(BZS_APPS)
|
||||
# Required by libblazesym
|
||||
ALL_LDFLAGS += -lrt -ldl -lpthread -lm
|
||||
endif
|
||||
|
||||
# Get Clang's default includes on this system. We'll explicitly add these dirs
|
||||
# to the includes list when compiling with `-target bpf` because otherwise some
|
||||
# architecture-specific dirs will be "missing" on some architectures/distros -
|
||||
# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h,
|
||||
# sys/cdefs.h etc. might be missing.
|
||||
#
|
||||
# Use '-idirafter': Don't interfere with include mechanics except where the
|
||||
# build would have failed anyways.
|
||||
CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - </dev/null 2>&1 \
|
||||
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
|
||||
|
||||
ifeq ($(V),1)
|
||||
Q =
|
||||
msg =
|
||||
else
|
||||
Q = @
|
||||
msg = @printf ' %-8s %s%s\n' \
|
||||
"$(1)" \
|
||||
"$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \
|
||||
"$(if $(3), $(3))";
|
||||
MAKEFLAGS += --no-print-directory
|
||||
endif
|
||||
|
||||
define allow-override
|
||||
$(if $(or $(findstring environment,$(origin $(1))),\
|
||||
$(findstring command line,$(origin $(1)))),,\
|
||||
$(eval $(1) = $(2)))
|
||||
endef
|
||||
|
||||
$(call allow-override,CC,$(CROSS_COMPILE)cc)
|
||||
$(call allow-override,LD,$(CROSS_COMPILE)ld)
|
||||
|
||||
.PHONY: all
|
||||
all: $(APPS)
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
$(call msg,CLEAN)
|
||||
$(Q)rm -rf $(OUTPUT) $(APPS)
|
||||
|
||||
$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT):
|
||||
$(call msg,MKDIR,$@)
|
||||
$(Q)mkdir -p $@
|
||||
|
||||
# Build libbpf
|
||||
$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf
|
||||
$(call msg,LIB,$@)
|
||||
$(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \
|
||||
OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \
|
||||
INCLUDEDIR= LIBDIR= UAPIDIR= \
|
||||
install
|
||||
|
||||
# Build bpftool
|
||||
$(BPFTOOL): | $(BPFTOOL_OUTPUT)
|
||||
$(call msg,BPFTOOL,$@)
|
||||
$(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap
|
||||
|
||||
|
||||
$(LIBBLAZESYM_SRC)/target/release/libblazesym.a::
|
||||
$(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --features=cheader,dont-generate-test-files --release
|
||||
|
||||
$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT)
|
||||
$(call msg,LIB, $@)
|
||||
$(Q)cp $(LIBBLAZESYM_SRC)/target/release/libblazesym.a $@
|
||||
|
||||
$(LIBBLAZESYM_HEADER): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT)
|
||||
$(call msg,LIB,$@)
|
||||
$(Q)cp $(LIBBLAZESYM_SRC)/target/release/blazesym.h $@
|
||||
|
||||
# Build BPF code
|
||||
$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL)
|
||||
$(call msg,BPF,$@)
|
||||
$(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \
|
||||
$(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \
|
||||
-c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
|
||||
$(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
|
||||
|
||||
# Generate BPF skeletons
|
||||
$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL)
|
||||
$(call msg,GEN-SKEL,$@)
|
||||
$(Q)$(BPFTOOL) gen skeleton $< > $@
|
||||
|
||||
# Build user-space code
|
||||
$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h
|
||||
|
||||
$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT)
|
||||
$(call msg,CC,$@)
|
||||
$(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
|
||||
|
||||
$(patsubst %,$(OUTPUT)/%.o,$(BZS_APPS)): $(LIBBLAZESYM_HEADER)
|
||||
|
||||
$(BZS_APPS): $(LIBBLAZESYM_OBJ)
|
||||
|
||||
# Build application binary
|
||||
$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT)
|
||||
$(call msg,BINARY,$@)
|
||||
$(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@
|
||||
|
||||
# delete failed targets
|
||||
.DELETE_ON_ERROR:
|
||||
|
||||
# keep intermediate (.skel.h, .bpf.o, etc) targets
|
||||
.SECONDARY:
|
||||
527
src/42-xdp-loadbalancer/README.md
Normal file
527
src/42-xdp-loadbalancer/README.md
Normal file
@@ -0,0 +1,527 @@
|
||||
# eBPF 开发者教程: 简单的 XDP 负载均衡器
|
||||
|
||||
在本教程中,我们将指导您如何使用eBPF(扩展的Berkeley Packet Filter)实现一个简单的XDP(eXpress Data Path)负载均衡器。只需使用C语言和libbpf库,无需外部依赖,这是一个适合开发者的实践指南,帮助您充分利用Linux内核的强大功能来构建高效的网络应用程序。
|
||||
|
||||
## 为什么选择XDP?
|
||||
|
||||
`XDP`(eXpress Data Path)是Linux中的一个高速、内核级网络框架,它允许在网络堆栈的最早阶段,即在网络接口卡(NIC)上处理数据包。这使得XDP可以进行超低延迟和高吞吐量的数据包处理,非常适合用于负载均衡、DDoS保护和流量过滤等任务。
|
||||
|
||||
XDP的关键特性:
|
||||
|
||||
1. **快速数据包处理**:XDP直接在网络接口卡(NIC)级别处理数据包,减少了延迟,并通过避免通常的网络堆栈开销来提高性能。
|
||||
2. **高效**:由于在数据包进入内核之前处理它们,XDP最大限度地减少了CPU使用率,能够在高流量负载下保持系统的快速响应。
|
||||
3. **可定制的eBPF**:XDP程序使用eBPF编写,允许您为特定的用例创建自定义的数据包处理逻辑,例如丢弃、重定向或转发数据包。
|
||||
4. **低CPU开销**:支持零拷贝数据包转发,XDP占用更少的系统资源,非常适合在最少CPU负载的情况下处理高流量。
|
||||
5. **简单操作**:XDP程序返回预定义的操作,例如丢弃、通过或重定向数据包,提供对流量处理的控制。
|
||||
|
||||
### 使用XDP的项目
|
||||
|
||||
- `Cilium` 是一个为云原生环境(如Kubernetes)设计的开源网络工具。它使用XDP高效处理数据包过滤和负载均衡,提升了高流量网络中的性能。
|
||||
- `Katran` 由Facebook开发,是一个负载均衡器,它使用XDP处理数百万的连接,且CPU使用率低。它高效地将流量分发到服务器,在Facebook内部被用于大规模的网络环境。
|
||||
- `Cloudflare` 使用XDP来防御DDoS攻击。通过在NIC级别过滤恶意流量,Cloudflare可以在攻击数据包进入内核之前将其丢弃,最大限度地减少对网络的影响。
|
||||
|
||||
### 为什么选择XDP而不是其他方法?
|
||||
|
||||
与传统工具如`iptables`或`tc`相比,XDP具有以下优势:
|
||||
|
||||
- **速度**:它直接在NIC驱动程序中操作,数据包处理速度远快于传统方法。
|
||||
- **灵活性**:通过eBPF,您可以编写自定义的数据包处理逻辑,以满足特定需求。
|
||||
- **效率**:XDP使用更少的资源,非常适合需要处理高流量而不使系统过载的环境。
|
||||
|
||||
## 项目:构建一个简单的负载均衡器
|
||||
|
||||
在本项目中,我们将专注于使用XDP构建一个负载均衡器。负载均衡器通过将传入的网络流量高效地分发到多个后端服务器,防止单个服务器过载。结合XDP和eBPF,我们可以构建一个运行在Linux网络堆栈边缘的负载均衡器,确保即使在高流量情况下也能保持高性能。
|
||||
|
||||
我们将实现的负载均衡器将具备以下功能:
|
||||
|
||||
- 监听传入的网络数据包。
|
||||
- 根据数据包的源IP和端口计算哈希值,从而将流量分发到多个后端服务器。
|
||||
- 根据计算出的哈希值将数据包转发到相应的后端服务器。
|
||||
|
||||
我们将保持设计简单但强大,向您展示如何利用eBPF的能力来创建一个轻量级的负载均衡解决方案。
|
||||
|
||||
## kernel eBPF code
|
||||
|
||||
```c
|
||||
// xdp_lb.bpf.c
|
||||
#include <bpf/bpf_endian.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/tcp.h>
|
||||
#include "xx_hash.h"
|
||||
|
||||
struct backend_config {
|
||||
__u32 ip;
|
||||
unsigned char mac[ETH_ALEN];
|
||||
};
|
||||
|
||||
// Backend IP and MAC address map
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 2); // Two backends
|
||||
__type(key, __u32);
|
||||
__type(value, struct backend_config);
|
||||
} backends SEC(".maps");
|
||||
|
||||
int client_ip = bpf_htonl(0xa000001);
|
||||
unsigned char client_mac[ETH_ALEN] = {0xDE, 0xAD, 0xBE, 0xEF, 0x0, 0x1};
|
||||
int load_balancer_ip = bpf_htonl(0xa00000a);
|
||||
unsigned char load_balancer_mac[ETH_ALEN] = {0xDE, 0xAD, 0xBE, 0xEF, 0x0, 0x10};
|
||||
|
||||
static __always_inline __u16
|
||||
csum_fold_helper(__u64 csum)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (csum >> 16)
|
||||
csum = (csum & 0xffff) + (csum >> 16);
|
||||
}
|
||||
return ~csum;
|
||||
}
|
||||
|
||||
static __always_inline __u16
|
||||
iph_csum(struct iphdr *iph)
|
||||
{
|
||||
iph->check = 0;
|
||||
unsigned long long csum = bpf_csum_diff(0, 0, (unsigned int *)iph, sizeof(struct iphdr), 0);
|
||||
return csum_fold_helper(csum);
|
||||
}
|
||||
|
||||
SEC("xdp")
|
||||
int xdp_load_balancer(struct xdp_md *ctx) {
|
||||
void *data_end = (void *)(long)ctx->data_end;
|
||||
void *data = (void *)(long)ctx->data;
|
||||
|
||||
bpf_printk("xdp_load_balancer received packet");
|
||||
|
||||
// Ethernet header
|
||||
struct ethhdr *eth = data;
|
||||
if ((void *)(eth + 1) > data_end)
|
||||
return XDP_PASS;
|
||||
|
||||
// Check if the packet is IP (IPv4)
|
||||
if (eth->h_proto != __constant_htons(ETH_P_IP))
|
||||
return XDP_PASS;
|
||||
|
||||
// IP header
|
||||
struct iphdr *iph = (struct iphdr *)(eth + 1);
|
||||
if ((void *)(iph + 1) > data_end)
|
||||
return XDP_PASS;
|
||||
|
||||
// Check if the protocol is TCP or UDP
|
||||
if (iph->protocol != IPPROTO_TCP)
|
||||
return XDP_PASS;
|
||||
|
||||
bpf_printk("Received Source IP: 0x%x", bpf_ntohl(iph->saddr));
|
||||
bpf_printk("Received Destination IP: 0x%x", bpf_ntohl(iph->daddr));
|
||||
bpf_printk("Received Source MAC: %x:%x:%x:%x:%x:%x", eth->h_source[0], eth->h_source[1], eth->h_source[2], eth->h_source[3], eth->h_source[4], eth->h_source[5]);
|
||||
bpf_printk("Received Destination MAC: %x:%x:%x:%x:%x:%x", eth->h_dest[0], eth->h_dest[1], eth->h_dest[2], eth->h_dest[3], eth->h_dest[4], eth->h_dest[5]);
|
||||
|
||||
if (iph->saddr == client_ip)
|
||||
{
|
||||
bpf_printk("Packet from client");
|
||||
|
||||
__u32 key = xxhash32((const char*)iph, sizeof(struct iphdr), 0) % 2;
|
||||
|
||||
struct backend_config *backend = bpf_map_lookup_elem(&backends, &key);
|
||||
if (!backend)
|
||||
return XDP_PASS;
|
||||
|
||||
iph->daddr = backend->ip;
|
||||
__builtin_memcpy(eth->h_dest, backend->mac, ETH_ALEN);
|
||||
}
|
||||
else
|
||||
{
|
||||
bpf_printk("Packet from backend");
|
||||
iph->daddr = client_ip;
|
||||
__builtin_memcpy(eth->h_dest, client_mac, ETH_ALEN);
|
||||
}
|
||||
|
||||
// Update IP source address to the load balancer's IP
|
||||
iph->saddr = load_balancer_ip;
|
||||
// Update Ethernet source MAC address to the current lb's MAC
|
||||
__builtin_memcpy(eth->h_source, load_balancer_mac, ETH_ALEN);
|
||||
|
||||
// Recalculate IP checksum
|
||||
iph->check = iph_csum(iph);
|
||||
|
||||
bpf_printk("Redirecting packet to new IP 0x%x from IP 0x%x",
|
||||
bpf_ntohl(iph->daddr),
|
||||
bpf_ntohl(iph->saddr)
|
||||
);
|
||||
bpf_printk("New Dest MAC: %x:%x:%x:%x:%x:%x", eth->h_dest[0], eth->h_dest[1], eth->h_dest[2], eth->h_dest[3], eth->h_dest[4], eth->h_dest[5]);
|
||||
bpf_printk("New Source MAC: %x:%x:%x:%x:%x:%x\n", eth->h_source[0], eth->h_source[1], eth->h_source[2], eth->h_source[3], eth->h_source[4], eth->h_source[5]);
|
||||
// Return XDP_TX to transmit the modified packet back to the network
|
||||
return XDP_TX;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
```
|
||||
|
||||
## 内核代码关键部分解读
|
||||
|
||||
### 1. **头文件和数据结构**
|
||||
|
||||
代码首先包含了一些必要的头文件,例如 `<bpf/bpf_helpers.h>`、`<linux/if_ether.h>`、`<linux/ip.h>` 等。这些头文件提供了处理以太网帧、IP 数据包以及 BPF 辅助函数的定义。
|
||||
|
||||
`backend_config` 结构体被定义用于存储后端服务器的 IP 和 MAC 地址。这将在负载均衡逻辑中用于根据流量分配规则路由数据包。
|
||||
|
||||
```c
|
||||
struct backend_config {
|
||||
__u32 ip;
|
||||
unsigned char mac[ETH_ALEN];
|
||||
};
|
||||
```
|
||||
|
||||
### 2. **后端和负载均衡器配置**
|
||||
|
||||
代码定义了一个名为 `backends` 的 eBPF map,用于存储两个后端的 IP 和 MAC 地址。`BPF_MAP_TYPE_ARRAY` 类型用于存储后端的配置信息,`max_entries` 设置为 2,表示该负载均衡器将把流量分配给两个后端服务器。
|
||||
|
||||
```c
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 2);
|
||||
__type(key, __u32);
|
||||
__type(value, struct backend_config);
|
||||
} backends SEC(".maps");
|
||||
```
|
||||
|
||||
同时也预定义了客户端和负载均衡器的 IP 地址和 MAC 地址:
|
||||
|
||||
```c
|
||||
int client_ip = bpf_htonl(0xa000001);
|
||||
unsigned char client_mac[ETH_ALEN] = {0xDE, 0xAD, 0xBE, 0xEF, 0x0, 0x1};
|
||||
int load_balancer_ip = bpf_htonl(0xa00000a);
|
||||
unsigned char load_balancer_mac[ETH_ALEN] = {0xDE, 0xAD, 0xBE, 0xEF, 0x0, 0x10};
|
||||
```
|
||||
|
||||
### 3. **校验和函数**
|
||||
|
||||
`iph_csum()` 函数在修改数据包内容后重新计算 IP 头的校验和。在对头部进行任何修改时,确保 IP 数据包的完整性是至关重要的。
|
||||
|
||||
```c
|
||||
static __always_inline __u16 iph_csum(struct iphdr *iph) {
|
||||
iph->check = 0;
|
||||
unsigned long long csum = bpf_csum_diff(0, 0, (unsigned int *)iph, sizeof(struct iphdr), 0);
|
||||
return csum_fold_helper(csum);
|
||||
}
|
||||
```
|
||||
|
||||
### 4. **XDP 程序逻辑**
|
||||
|
||||
XDP 负载均衡器的核心逻辑在 `xdp_load_balancer` 函数中实现,该函数附加到 XDP 钩子上。它处理传入的数据包,并根据不同情况将数据包转发到后端或回传给客户端。
|
||||
|
||||
- **初始检查**:
|
||||
函数首先验证数据包是否是以太网帧,接着检查它是否是 IP 数据包(IPv4)并且使用了 TCP 协议。
|
||||
|
||||
```c
|
||||
if (eth->h_proto != __constant_htons(ETH_P_IP))
|
||||
return XDP_PASS;
|
||||
if (iph->protocol != IPPROTO_TCP)
|
||||
return XDP_PASS;
|
||||
```
|
||||
|
||||
- **客户端数据包处理**:
|
||||
如果源 IP 与客户端 IP 匹配,代码使用 `xxhash32` 对 IP 头进行哈希处理,以确定相应的后端(基于 key 对 2 取模)。
|
||||
|
||||
```c
|
||||
if (iph->saddr == client_ip) {
|
||||
__u32 key = xxhash32((const char*)iph, sizeof(struct iphdr), 0) % 2;
|
||||
struct backend_config *backend = bpf_map_lookup_elem(&backends, &key);
|
||||
```
|
||||
|
||||
之后将目标 IP 和 MAC 替换为选定的后端的值,并将数据包转发到后端。
|
||||
|
||||
- **后端数据包处理**:
|
||||
如果数据包来自后端服务器,代码将目标设置为客户端的 IP 和 MAC 地址,确保后端的响应数据包被正确地转发回客户端。
|
||||
|
||||
```c
|
||||
iph->daddr = client_ip;
|
||||
__builtin_memcpy(eth->h_dest, client_mac, ETH_ALEN);
|
||||
```
|
||||
|
||||
- **重写 IP 和 MAC 地址**:
|
||||
对于所有的出站数据包,源 IP 和 MAC 地址会被更新为负载均衡器的值,以确保在客户端与后端之间通信时,负载均衡器作为源进行标识。
|
||||
|
||||
```c
|
||||
iph->saddr = load_balancer_ip;
|
||||
__builtin_memcpy(eth->h_source, load_balancer_mac, ETH_ALEN);
|
||||
```
|
||||
|
||||
- **重新计算校验和**:
|
||||
修改 IP 头之后,使用之前定义的 `iph_csum()` 函数重新计算校验和。
|
||||
|
||||
```c
|
||||
iph->check = iph_csum(iph);
|
||||
```
|
||||
|
||||
- **最终动作**:
|
||||
使用 `XDP_TX` 动作发送数据包,这指示网卡将修改后的数据包传输出去。
|
||||
|
||||
```c
|
||||
return XDP_TX;
|
||||
```
|
||||
|
||||
### 5. **结论**
|
||||
|
||||
在这部分博客中,可以解释负载均衡器是如何通过检查源 IP、进行哈希计算来分配流量,并通过修改目标 IP 和 MAC 来确保数据包的转发。`XDP_TX` 动作是实现 eBPF 在 XDP 层中高速数据包处理的关键。
|
||||
|
||||
这一解释可以帮助读者理解数据包的流转过程,以及代码中每个部分在实现多个后端之间负载均衡的过程中所起的作用。
|
||||
|
||||
|
||||
## Userspace code
|
||||
|
||||
```c
|
||||
// xdp_lb.c
|
||||
#include <arpa/inet.h>
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <net/if.h>
|
||||
#include "xdp_lb.skel.h" // The generated skeleton
|
||||
|
||||
struct backend_config {
|
||||
__u32 ip;
|
||||
unsigned char mac[6];
|
||||
};
|
||||
|
||||
static int parse_mac(const char *str, unsigned char *mac) {
|
||||
if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
|
||||
&mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) != 6) {
|
||||
fprintf(stderr, "Invalid MAC address format\n");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc != 6) {
|
||||
fprintf(stderr, "Usage: %s <ifname> <backend1_ip> <backend1_mac> <backend2_ip> <backend2_mac>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *ifname = argv[1];
|
||||
struct backend_config backend[2];
|
||||
|
||||
// Parse backend 1
|
||||
if (inet_pton(AF_INET, argv[2], &backend[0].ip) != 1) {
|
||||
fprintf(stderr, "Invalid backend 1 IP address\n");
|
||||
return 1;
|
||||
}
|
||||
if (parse_mac(argv[3], backend[0].mac) < 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Parse backend 2
|
||||
if (inet_pton(AF_INET, argv[4], &backend[1].ip) != 1) {
|
||||
fprintf(stderr, "Invalid backend 2 IP address\n");
|
||||
return 1;
|
||||
}
|
||||
if (parse_mac(argv[5], backend[1].mac) < 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Load and attach the BPF program
|
||||
struct xdp_lb_bpf *skel = xdp_lb_bpf__open_and_load();
|
||||
if (!skel) {
|
||||
fprintf(stderr, "Failed to open and load BPF skeleton\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ifindex = if_nametoindex(ifname);
|
||||
if (ifindex < 0) {
|
||||
perror("if_nametoindex");
|
||||
xdp_lb_bpf__destroy(skel);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (bpf_program__attach_xdp(skel->progs.xdp_load_balancer, ifindex) < 0) {
|
||||
fprintf(stderr, "Failed to attach XDP program\n");
|
||||
xdp_lb_bpf__destroy(skel);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Update backend configurations
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (bpf_map_update_elem(bpf_map__fd(skel->maps.backends), &i, &backend[i], 0) < 0) {
|
||||
perror("bpf_map_update_elem");
|
||||
xdp_lb_bpf__destroy(skel);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("XDP load balancer configured with backends:\n");
|
||||
printf("Backend 1 - IP: %s, MAC: %s\n", argv[2], argv[3]);
|
||||
printf("Backend 2 - IP: %s, MAC: %s\n", argv[4], argv[5]);
|
||||
|
||||
printf("Press Ctrl+C to exit...\n");
|
||||
while (1) {
|
||||
sleep(1); // Keep the program running
|
||||
}
|
||||
|
||||
// Cleanup and detach
|
||||
bpf_xdp_detach(ifindex, 0, NULL);
|
||||
xdp_lb_bpf__detach(skel);
|
||||
xdp_lb_bpf__destroy(skel);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
### 用户空间代码概述
|
||||
|
||||
提供的用户空间代码负责设置和配置在内核中运行的 XDP 负载均衡器程序。它接受命令行参数,加载 eBPF 程序,将其附加到网络接口,并更新后端服务器的配置信息。
|
||||
|
||||
### 1. **解析命令行参数和设置后端服务器**
|
||||
|
||||
程序期望五个命令行参数:网络接口的名称 (`ifname`)、两个后端服务器的 IP 地址和 MAC 地址。它通过 `inet_pton()` 函数解析 IP 地址,并使用 `parse_mac()` 函数解析 MAC 地址,确保提供的 MAC 地址格式正确。解析后的后端信息存储在 `backend_config` 结构体中。
|
||||
|
||||
### 2. **加载并附加 BPF 程序**
|
||||
|
||||
BPF skeleton(通过 `xdp_lb.skel.h` 生成)用于打开并将 XDP 程序加载到内核中。程序通过 `if_nametoindex()` 将网络接口名称转换为索引,然后使用 `bpf_program__attach_xdp()` 将加载的 BPF 程序附加到此接口上。
|
||||
|
||||
### 3. **配置后端服务器信息**
|
||||
|
||||
后端的 IP 和 MAC 地址被写入 `backends` BPF map 中,使用 `bpf_map_update_elem()` 函数。此步骤确保 BPF 程序能够访问后端配置,从而基于内核代码中的逻辑将数据包路由到正确的后端服务器。
|
||||
|
||||
### 4. **程序循环与清理**
|
||||
|
||||
程序进入无限循环(`while (1) { sleep(1); }`),使 XDP 程序保持运行。当用户通过按下 Ctrl+C 退出时,BPF 程序从网络接口上卸载,并通过调用 `xdp_lb_bpf__destroy()` 清理资源。
|
||||
|
||||
总的来说,这段用户空间代码负责配置和管理 XDP 负载均衡器的生命周期,使得可以动态更新后端配置,并确保负载均衡器正确附加到网络接口上。
|
||||
|
||||
### 测试环境拓扑
|
||||
|
||||
拓扑结构表示一个测试环境,其中本地机器通过负载均衡器与两个后端节点(h2 和 h3)通信。通过虚拟以太网对(veth0 到 veth6),本地机器与负载均衡器相连,在受控环境中模拟网络连接。每个虚拟接口都有自己的 IP 和 MAC 地址,代表不同的实体。
|
||||
|
||||
```txt
|
||||
+---------------------------+
|
||||
| 本地机器 |
|
||||
| IP: 10.0.0.1 (veth0) |
|
||||
| MAC: DE:AD:BE:EF:00:01 |
|
||||
+------------+---------------+
|
||||
|
|
||||
| (veth1)
|
||||
|
|
||||
+--------+---------------+
|
||||
| 负载均衡器 |
|
||||
| IP: 10.0.0.10 (veth6) |
|
||||
| MAC: DE:AD:BE:EF:00:10|
|
||||
+--------+---------------+
|
||||
|
|
||||
+---------+----------------------------+
|
||||
| |
|
||||
(veth2) (veth4)
|
||||
| |
|
||||
+--+---------------+ +--------+---------+
|
||||
| h2 | | h3 |
|
||||
| IP: | | IP: |
|
||||
|10.0.0.2 (veth3) | |10.0.0.3 (veth5) |
|
||||
| MAC: | | MAC: |
|
||||
|DE:AD:BE:EF:00:02 | |DE:AD:BE:EF:00:03 |
|
||||
+------------------+ +------------------+
|
||||
```
|
||||
|
||||
这个设置可以通过脚本(`setup.sh`)轻松初始化,并通过另一个脚本(`teardown.sh`)删除。
|
||||
|
||||
> 如果您对本教程感兴趣,请帮助我们创建一个容器化的版本,简化设置和拓扑结构!目前的设置和删除过程基于网络命名空间,容器化的版本会更加友好。
|
||||
|
||||
初始化:
|
||||
|
||||
```sh
|
||||
sudo ./setup.sh
|
||||
```
|
||||
|
||||
删除:
|
||||
|
||||
```sh
|
||||
sudo ./teardown.sh
|
||||
```
|
||||
|
||||
### 运行负载均衡器
|
||||
|
||||
要运行 XDP 负载均衡器,执行以下命令,指定接口和后端服务器的 IP 和 MAC 地址:
|
||||
|
||||
```console
|
||||
sudo ip netns exec lb ./xdp_lb veth6 10.0.0.2 de:ad:be:ef:00:02 10.0.0.3 de:ad:be:ef:00:03
|
||||
```
|
||||
|
||||
这将配置负载均衡器并输出后端服务器的详细信息:
|
||||
|
||||
```console
|
||||
XDP load balancer configured with backends:
|
||||
Backend 1 - IP: 10.0.0.2, MAC: de:ad:be:ef:00:02
|
||||
Backend 2 - IP: 10.0.0.3, MAC: de:ad:be:ef:00:03
|
||||
Press Ctrl+C to exit...
|
||||
```
|
||||
|
||||
### 测试设置
|
||||
|
||||
您可以通过在两个后端命名空间(`h2` 和 `h3`)启动 HTTP 服务器,并从本地机器向负载均衡器发送请求来测试设置:
|
||||
|
||||
在 `h2` 和 `h3` 上启动服务器:
|
||||
|
||||
```sh
|
||||
sudo ip netns exec h2 python3 -m http.server
|
||||
sudo ip netns exec h3 python3 -m http.server
|
||||
```
|
||||
|
||||
然后,向负载均衡器 IP 发送请求:
|
||||
|
||||
```sh
|
||||
curl 10.0.0.10:8000
|
||||
```
|
||||
|
||||
负载均衡器将根据哈希函数将流量分配到后端服务器(`h2` 和 `h3`)。
|
||||
|
||||
### 使用 `bpf_printk` 进行监控
|
||||
|
||||
您可以通过查看 `bpf_printk` 日志来监控负载均衡器的活动。BPF 程序在处理每个数据包时会打印诊断消息。您可以使用以下命令查看这些日志:
|
||||
|
||||
```console
|
||||
sudo cat /sys/kernel/debug/tracing/trace_pipe
|
||||
```
|
||||
|
||||
日志示例:
|
||||
|
||||
```console
|
||||
<idle>-0 [004] ..s2. 24174.812722: bpf_trace_printk: xdp_load_balancer received packet
|
||||
<idle>-0 [004] .Ns2. 24174.812729: bpf_trace_printk: Received Source IP: 0xa000001
|
||||
<idle>-0 [004] .Ns2. 24174.812729: Received Destination IP: 0xa00000a
|
||||
<idle>-0 [004] .Ns2. 24174.812731: Received Source MAC: de:ad:be:ef:0:1
|
||||
<idle>-0 [004] .Ns2. 24174.812732: Received Destination MAC: de:ad:be:ef:0:10
|
||||
<idle>-0 [004] .Ns2. 24174.812732: Packet from client
|
||||
<idle>-0 [004] .Ns2. 24174.812734: bpf_trace_printk: Redirecting packet to new IP 0xa000002 from IP 0xa00000a
|
||||
<idle>-0 [004] .Ns2. 24174.812735: New Dest MAC: de:ad:be:ef:0:2
|
||||
<idle>-0 [004] .Ns2. 24174.812735: New Source MAC: de:ad:be:ef:0:10
|
||||
```
|
||||
|
||||
### 调试问题
|
||||
|
||||
某些系统可能会因为类似于此[博客文章](https://fedepaol.github.io/blog/2023/09/11/xdp-ate-my-packets-and-how-i-debugged-it/)中描述的问题而导致数据包丢失或转发失败。您可以使用 `bpftrace` 跟踪 XDP 错误进行调试:
|
||||
|
||||
```sh
|
||||
sudo bpftrace -e 'tracepoint:xdp:xdp_bulk_tx{@redir_errno[-args->err] = count();}'
|
||||
```
|
||||
|
||||
如果输出如下所示:
|
||||
|
||||
```sh
|
||||
@redir_errno[6]: 3
|
||||
```
|
||||
|
||||
这表明与 XDP 数据包转发相关的错误。错误代码 `6` 通常指向可以进一步调查的特定转发问题。
|
||||
|
||||
### 结论
|
||||
|
||||
本教程展示了如何使用 eBPF 设置一个简单的 XDP 负载均衡器,以实现高效的流量分发。对于那些想了解更多关于 eBPF 知识的用户,包括更高级的示例和教程,请访问我们的 [https://github.com/eunomia-bpf/bpf-developer-tutorial](https://github.com/eunomia-bpf/bpf-developer-tutorial) 或我们的网站 [https://eunomia.dev/tutorials/](https://eunomia.dev/tutorials/)。
|
||||
|
||||
### 参考文献
|
||||
|
||||
- [XDP 编程实践教程](https://github.com/xdp-project/xdp-tutorial)
|
||||
528
src/42-xdp-loadbalancer/README_en.md
Normal file
528
src/42-xdp-loadbalancer/README_en.md
Normal file
@@ -0,0 +1,528 @@
|
||||
|
||||
# eBPF Developer Tutorial: XDP Load Balancer
|
||||
|
||||
In this tutorial, we will guide you through the process of implementing a simple XDP (eXpress Data Path) load balancer using eBPF (Extended Berkeley Packet Filter). With just C, libbpf, and no external dependencies, this hands-on guide is perfect for developers interested in harnessing the full power of the Linux kernel to build highly efficient network applications.
|
||||
|
||||
## Why XDP?
|
||||
|
||||
`XDP` (eXpress Data Path) is a fast, in-kernel networking framework in Linux that allows packet processing at the earliest point in the network stack, right in the network interface card (NIC). This enables ultra-low-latency and high-throughput packet handling, making XDP ideal for tasks like load balancing, DDoS protection, and traffic filtering.
|
||||
|
||||
Key Features of XDP
|
||||
|
||||
1. **Fast Packet Processing**: XDP handles packets directly at the NIC level, reducing latency and improving performance by avoiding the usual networking stack overhead.
|
||||
2. **Efficient**: Because it processes packets before they reach the kernel, XDP minimizes CPU usage and handles high traffic loads without slowing down the system.
|
||||
3. **Customizable with eBPF**: XDP programs are written using eBPF, allowing you to create custom packet-handling logic for specific use cases like dropping, redirecting, or forwarding packets.
|
||||
4. **Low CPU Overhead**: With support for zero-copy packet forwarding, XDP uses fewer system resources, making it perfect for handling high traffic with minimal CPU load.
|
||||
5. **Simple Actions**: XDP programs return predefined actions like dropping, passing, or redirecting packets, providing control over how traffic is handled.
|
||||
|
||||
Projects That Use XDP
|
||||
|
||||
- `Cilium` is an open-source networking tool for cloud-native environments like Kubernetes. It uses XDP to efficiently handle packet filtering and load balancing, improving performance in high-traffic networks.
|
||||
- `Katran`, developed by Facebook, is a load balancer that uses XDP to handle millions of connections with low CPU usage. It distributes traffic efficiently across servers and is used internally at Facebook for large-scale networking.
|
||||
- `Cloudflare` uses XDP to protect against DDoS attacks. By filtering out malicious traffic at the NIC level, Cloudflare can drop attack packets before they even reach the kernel, minimizing the impact on their network.
|
||||
|
||||
### Why Choose XDP Over Other Methods?
|
||||
|
||||
Compared to traditional tools like `iptables` or `tc`, XDP offers:
|
||||
|
||||
- **Speed**: It operates directly in the NIC driver, processing packets much faster than traditional methods.
|
||||
- **Flexibility**: With eBPF, you can write custom packet-handling logic to meet specific needs.
|
||||
- **Efficiency**: XDP uses fewer resources, making it suitable for environments that need to handle high traffic without overloading the system.
|
||||
|
||||
## The Project: Building a Simple Load Balancer
|
||||
|
||||
In this project, we will be focusing on building a load balancer using XDP. A load balancer efficiently distributes incoming network traffic across multiple backend servers to prevent any single server from becoming overwhelmed. With the combination of XDP and eBPF, we can build a load balancer that operates at the edge of the Linux networking stack, ensuring high performance even under heavy traffic conditions.
|
||||
|
||||
The load balancer we’ll be implementing will:
|
||||
|
||||
- Listen for incoming network packets.
|
||||
- Calculate a hash based on the packet's source IP and port, allowing us to distribute the traffic across multiple backend servers.
|
||||
- Forward the packet to the appropriate backend server based on the calculated hash.
|
||||
|
||||
We'll keep the design simple but powerful, showing you how to leverage eBPF’s capabilities to create a lightweight load balancing solution.
|
||||
|
||||
## kernel eBPF code
|
||||
|
||||
```c
|
||||
// xdp_lb.bpf.c
|
||||
#include <bpf/bpf_endian.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/tcp.h>
|
||||
#include "xx_hash.h"
|
||||
|
||||
struct backend_config {
|
||||
__u32 ip;
|
||||
unsigned char mac[ETH_ALEN];
|
||||
};
|
||||
|
||||
// Backend IP and MAC address map
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 2); // Two backends
|
||||
__type(key, __u32);
|
||||
__type(value, struct backend_config);
|
||||
} backends SEC(".maps");
|
||||
|
||||
int client_ip = bpf_htonl(0xa000001);
|
||||
unsigned char client_mac[ETH_ALEN] = {0xDE, 0xAD, 0xBE, 0xEF, 0x0, 0x1};
|
||||
int load_balancer_ip = bpf_htonl(0xa00000a);
|
||||
unsigned char load_balancer_mac[ETH_ALEN] = {0xDE, 0xAD, 0xBE, 0xEF, 0x0, 0x10};
|
||||
|
||||
static __always_inline __u16
|
||||
csum_fold_helper(__u64 csum)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (csum >> 16)
|
||||
csum = (csum & 0xffff) + (csum >> 16);
|
||||
}
|
||||
return ~csum;
|
||||
}
|
||||
|
||||
static __always_inline __u16
|
||||
iph_csum(struct iphdr *iph)
|
||||
{
|
||||
iph->check = 0;
|
||||
unsigned long long csum = bpf_csum_diff(0, 0, (unsigned int *)iph, sizeof(struct iphdr), 0);
|
||||
return csum_fold_helper(csum);
|
||||
}
|
||||
|
||||
SEC("xdp")
|
||||
int xdp_load_balancer(struct xdp_md *ctx) {
|
||||
void *data_end = (void *)(long)ctx->data_end;
|
||||
void *data = (void *)(long)ctx->data;
|
||||
|
||||
bpf_printk("xdp_load_balancer received packet");
|
||||
|
||||
// Ethernet header
|
||||
struct ethhdr *eth = data;
|
||||
if ((void *)(eth + 1) > data_end)
|
||||
return XDP_PASS;
|
||||
|
||||
// Check if the packet is IP (IPv4)
|
||||
if (eth->h_proto != __constant_htons(ETH_P_IP))
|
||||
return XDP_PASS;
|
||||
|
||||
// IP header
|
||||
struct iphdr *iph = (struct iphdr *)(eth + 1);
|
||||
if ((void *)(iph + 1) > data_end)
|
||||
return XDP_PASS;
|
||||
|
||||
// Check if the protocol is TCP or UDP
|
||||
if (iph->protocol != IPPROTO_TCP)
|
||||
return XDP_PASS;
|
||||
|
||||
bpf_printk("Received Source IP: 0x%x", bpf_ntohl(iph->saddr));
|
||||
bpf_printk("Received Destination IP: 0x%x", bpf_ntohl(iph->daddr));
|
||||
bpf_printk("Received Source MAC: %x:%x:%x:%x:%x:%x", eth->h_source[0], eth->h_source[1], eth->h_source[2], eth->h_source[3], eth->h_source[4], eth->h_source[5]);
|
||||
bpf_printk("Received Destination MAC: %x:%x:%x:%x:%x:%x", eth->h_dest[0], eth->h_dest[1], eth->h_dest[2], eth->h_dest[3], eth->h_dest[4], eth->h_dest[5]);
|
||||
|
||||
if (iph->saddr == client_ip)
|
||||
{
|
||||
bpf_printk("Packet from client");
|
||||
|
||||
__u32 key = xxhash32((const char*)iph, sizeof(struct iphdr), 0) % 2;
|
||||
|
||||
struct backend_config *backend = bpf_map_lookup_elem(&backends, &key);
|
||||
if (!backend)
|
||||
return XDP_PASS;
|
||||
|
||||
iph->daddr = backend->ip;
|
||||
__builtin_memcpy(eth->h_dest, backend->mac, ETH_ALEN);
|
||||
}
|
||||
else
|
||||
{
|
||||
bpf_printk("Packet from backend");
|
||||
iph->daddr = client_ip;
|
||||
__builtin_memcpy(eth->h_dest, client_mac, ETH_ALEN);
|
||||
}
|
||||
|
||||
// Update IP source address to the load balancer's IP
|
||||
iph->saddr = load_balancer_ip;
|
||||
// Update Ethernet source MAC address to the current lb's MAC
|
||||
__builtin_memcpy(eth->h_source, load_balancer_mac, ETH_ALEN);
|
||||
|
||||
// Recalculate IP checksum
|
||||
iph->check = iph_csum(iph);
|
||||
|
||||
bpf_printk("Redirecting packet to new IP 0x%x from IP 0x%x",
|
||||
bpf_ntohl(iph->daddr),
|
||||
bpf_ntohl(iph->saddr)
|
||||
);
|
||||
bpf_printk("New Dest MAC: %x:%x:%x:%x:%x:%x", eth->h_dest[0], eth->h_dest[1], eth->h_dest[2], eth->h_dest[3], eth->h_dest[4], eth->h_dest[5]);
|
||||
bpf_printk("New Source MAC: %x:%x:%x:%x:%x:%x\n", eth->h_source[0], eth->h_source[1], eth->h_source[2], eth->h_source[3], eth->h_source[4], eth->h_source[5]);
|
||||
// Return XDP_TX to transmit the modified packet back to the network
|
||||
return XDP_TX;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
```
|
||||
|
||||
Here’s a breakdown of the key sections of the kernel code for your blog:
|
||||
|
||||
### 1. **Header Files and Data Structures**
|
||||
|
||||
The code begins with necessary header files like `<bpf/bpf_helpers.h>`, `<linux/if_ether.h>`, `<linux/ip.h>`, and more. These headers provide definitions for handling Ethernet frames, IP packets, and BPF helper functions.
|
||||
|
||||
The `backend_config` struct is defined to hold the IP and MAC address of backend servers. This will later be used for routing packets based on load balancing logic.
|
||||
|
||||
```c
|
||||
struct backend_config {
|
||||
__u32 ip;
|
||||
unsigned char mac[ETH_ALEN];
|
||||
};
|
||||
```
|
||||
|
||||
### 2. **Backend and Load Balancer Configuration**
|
||||
|
||||
The code defines an eBPF map named `backends` that stores IP and MAC addresses for two backends. The `BPF_MAP_TYPE_ARRAY` type is used to store backend configuration, with `max_entries` set to 2, indicating the load balancer will route to two backend servers.
|
||||
|
||||
```c
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 2);
|
||||
__type(key, __u32);
|
||||
__type(value, struct backend_config);
|
||||
} backends SEC(".maps");
|
||||
```
|
||||
|
||||
There are also predefined IP addresses and MAC addresses for the client and load balancer:
|
||||
|
||||
```c
|
||||
int client_ip = bpf_htonl(0xa000001);
|
||||
unsigned char client_mac[ETH_ALEN] = {0xDE, 0xAD, 0xBE, 0xEF, 0x0, 0x1};
|
||||
int load_balancer_ip = bpf_htonl(0xa00000a);
|
||||
unsigned char load_balancer_mac[ETH_ALEN] = {0xDE, 0xAD, 0xBE, 0xEF, 0x0, 0x10};
|
||||
```
|
||||
|
||||
### 3. **Checksum Functions**
|
||||
|
||||
The function `iph_csum()` recalculates the IP header checksum after modifying the packet's contents. It's essential to keep the integrity of IP packets when any modification is done to the headers.
|
||||
|
||||
```c
|
||||
static __always_inline __u16 iph_csum(struct iphdr *iph) {
|
||||
iph->check = 0;
|
||||
unsigned long long csum = bpf_csum_diff(0, 0, (unsigned int *)iph, sizeof(struct iphdr), 0);
|
||||
return csum_fold_helper(csum);
|
||||
}
|
||||
```
|
||||
|
||||
### 4. **XDP Program Logic**
|
||||
|
||||
The core of the XDP load balancer logic is implemented in the `xdp_load_balancer` function, which is attached to the XDP hook. It processes incoming packets and directs them either to a backend or back to the client.
|
||||
|
||||
- **Initial Checks**:
|
||||
The function begins by verifying that the packet is an Ethernet frame, then checks if it's an IP packet (IPv4) and if it's using the TCP protocol.
|
||||
|
||||
```c
|
||||
if (eth->h_proto != __constant_htons(ETH_P_IP))
|
||||
return XDP_PASS;
|
||||
if (iph->protocol != IPPROTO_TCP)
|
||||
return XDP_PASS;
|
||||
```
|
||||
|
||||
- **Client Packet Handling**:
|
||||
If the source IP matches the client IP, the code hashes the IP header using `xxhash32` to determine the appropriate backend (based on the key modulo 2).
|
||||
|
||||
```c
|
||||
if (iph->saddr == client_ip) {
|
||||
__u32 key = xxhash32((const char*)iph, sizeof(struct iphdr), 0) % 2;
|
||||
struct backend_config *backend = bpf_map_lookup_elem(&backends, &key);
|
||||
```
|
||||
|
||||
The destination IP and MAC are replaced with those of the selected backend, and the packet is forwarded to the backend.
|
||||
|
||||
- **Backend Packet Handling**:
|
||||
If the packet is from a backend server, the destination is set to the client’s IP and MAC address, ensuring that the backend’s response is directed back to the client.
|
||||
|
||||
```c
|
||||
iph->daddr = client_ip;
|
||||
__builtin_memcpy(eth->h_dest, client_mac, ETH_ALEN);
|
||||
```
|
||||
|
||||
- **Rewriting IP and MAC Addresses**:
|
||||
The source IP and MAC are updated to the load balancer’s values for all outgoing packets, ensuring that the load balancer appears as the source for both client-to-backend and backend-to-client communication.
|
||||
|
||||
```c
|
||||
iph->saddr = load_balancer_ip;
|
||||
__builtin_memcpy(eth->h_source, load_balancer_mac, ETH_ALEN);
|
||||
```
|
||||
|
||||
- **Recalculate Checksum**:
|
||||
After modifying the IP header, the checksum is recalculated using the previously defined `iph_csum()` function.
|
||||
|
||||
```c
|
||||
iph->check = iph_csum(iph);
|
||||
```
|
||||
|
||||
- **Final Action**:
|
||||
The packet is transmitted using the `XDP_TX` action, which instructs the NIC to send the modified packet.
|
||||
|
||||
```c
|
||||
return XDP_TX;
|
||||
```
|
||||
|
||||
### 5. **Conclusion**
|
||||
|
||||
This part of the blog could explain how the load balancer ensures traffic is efficiently routed between the client and two backend servers by inspecting the source IP, hashing it for load distribution, and modifying the destination IP and MAC before forwarding the packet. The `XDP_TX` action is key to the high-speed packet handling provided by eBPF in the XDP layer.
|
||||
|
||||
This explanation can help readers understand the flow of the packet and the role of each section of the code in managing load balancing across multiple backends.
|
||||
|
||||
## Userspace code
|
||||
|
||||
```c
|
||||
// xdp_lb.c
|
||||
#include <arpa/inet.h>
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <net/if.h>
|
||||
#include "xdp_lb.skel.h" // The generated skeleton
|
||||
|
||||
struct backend_config {
|
||||
__u32 ip;
|
||||
unsigned char mac[6];
|
||||
};
|
||||
|
||||
static int parse_mac(const char *str, unsigned char *mac) {
|
||||
if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
|
||||
&mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) != 6) {
|
||||
fprintf(stderr, "Invalid MAC address format\n");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc != 6) {
|
||||
fprintf(stderr, "Usage: %s <ifname> <backend1_ip> <backend1_mac> <backend2_ip> <backend2_mac>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *ifname = argv[1];
|
||||
struct backend_config backend[2];
|
||||
|
||||
// Parse backend 1
|
||||
if (inet_pton(AF_INET, argv[2], &backend[0].ip) != 1) {
|
||||
fprintf(stderr, "Invalid backend 1 IP address\n");
|
||||
return 1;
|
||||
}
|
||||
if (parse_mac(argv[3], backend[0].mac) < 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Parse backend 2
|
||||
if (inet_pton(AF_INET, argv[4], &backend[1].ip) != 1) {
|
||||
fprintf(stderr, "Invalid backend 2 IP address\n");
|
||||
return 1;
|
||||
}
|
||||
if (parse_mac(argv[5], backend[1].mac) < 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Load and attach the BPF program
|
||||
struct xdp_lb_bpf *skel = xdp_lb_bpf__open_and_load();
|
||||
if (!skel) {
|
||||
fprintf(stderr, "Failed to open and load BPF skeleton\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ifindex = if_nametoindex(ifname);
|
||||
if (ifindex < 0) {
|
||||
perror("if_nametoindex");
|
||||
xdp_lb_bpf__destroy(skel);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (bpf_program__attach_xdp(skel->progs.xdp_load_balancer, ifindex) < 0) {
|
||||
fprintf(stderr, "Failed to attach XDP program\n");
|
||||
xdp_lb_bpf__destroy(skel);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Update backend configurations
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (bpf_map_update_elem(bpf_map__fd(skel->maps.backends), &i, &backend[i], 0) < 0) {
|
||||
perror("bpf_map_update_elem");
|
||||
xdp_lb_bpf__destroy(skel);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("XDP load balancer configured with backends:\n");
|
||||
printf("Backend 1 - IP: %s, MAC: %s\n", argv[2], argv[3]);
|
||||
printf("Backend 2 - IP: %s, MAC: %s\n", argv[4], argv[5]);
|
||||
|
||||
printf("Press Ctrl+C to exit...\n");
|
||||
while (1) {
|
||||
sleep(1); // Keep the program running
|
||||
}
|
||||
|
||||
// Cleanup and detach
|
||||
bpf_xdp_detach(ifindex, 0, NULL);
|
||||
xdp_lb_bpf__detach(skel);
|
||||
xdp_lb_bpf__destroy(skel);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
The userspace code provided is responsible for setting up and configuring the XDP load balancer program that runs in the kernel. It accepts command-line arguments, loads the eBPF program, attaches it to a network interface, and updates the backend configurations.
|
||||
|
||||
### 1. **Argument Parsing and Backend Setup**
|
||||
|
||||
The program expects five command-line arguments: the name of the network interface (`ifname`), the IP addresses and MAC addresses of two backend servers. It then parses the IP addresses using `inet_pton()` and the MAC addresses using the `parse_mac()` function, which ensures that the format of the provided MAC addresses is correct. The parsed backend information is stored in a `backend_config` structure.
|
||||
|
||||
### 2. **Loading and Attaching the BPF Program**
|
||||
|
||||
The BPF skeleton (generated via `xdp_lb.skel.h`) is used to open and load the XDP program into the kernel. The program then identifies the network interface by converting the interface name into an index using `if_nametoindex()`. Afterward, it attaches the loaded BPF program to this interface using `bpf_program__attach_xdp()`.
|
||||
|
||||
### 3. **Configuring Backend Information**
|
||||
|
||||
The backend IP and MAC addresses are written to the `backends` BPF map using `bpf_map_update_elem()`. This step ensures that the BPF program has access to the backend configurations, allowing it to route packets to the correct backend servers based on the logic in the kernel code.
|
||||
|
||||
### 4. **Program Loop and Cleanup**
|
||||
|
||||
The program enters an infinite loop (`while (1) { sleep(1); }`) to keep running, allowing the XDP program to continue functioning. When the user decides to exit by pressing Ctrl+C, the BPF program is detached from the network interface, and resources are cleaned up by calling `xdp_lb_bpf__destroy()`.
|
||||
|
||||
In summary, this userspace code is responsible for configuring and managing the lifecycle of the XDP load balancer, making it easy to update backend configurations dynamically and ensuring the load balancer is correctly attached to a network interface.
|
||||
|
||||
## The topology of test environment
|
||||
|
||||
The topology represents a test environment where a local machine communicates with two backend nodes (h2 and h3) through a load balancer. The local machine is connected to the load balancer via virtual Ethernet pairs (veth0 to veth6), simulating network connections in a controlled environment. Each virtual interface has its own IP and MAC address to represent different entities.
|
||||
|
||||
```txt
|
||||
+---------------------------+
|
||||
| Local Machine |
|
||||
| IP: 10.0.0.1 (veth0) |
|
||||
| MAC: DE:AD:BE:EF:00:01 |
|
||||
+------------+---------------+
|
||||
|
|
||||
| (veth1)
|
||||
|
|
||||
+--------+---------------+
|
||||
| Load Balancer |
|
||||
| IP: 10.0.0.10 (veth6) |
|
||||
| MAC: DE:AD:BE:EF:00:10|
|
||||
+--------+---------------+
|
||||
|
|
||||
+---------+----------------------------+
|
||||
| |
|
||||
(veth2) (veth4)
|
||||
| |
|
||||
+--+---------------+ +--------+---------+
|
||||
| h2 | | h3 |
|
||||
| IP: | | IP: |
|
||||
|10.0.0.2 (veth3) | |10.0.0.3 (veth5) |
|
||||
| MAC: | | MAC: |
|
||||
|DE:AD:BE:EF:00:02 | |DE:AD:BE:EF:00:03 |
|
||||
+------------------+ +------------------+
|
||||
```
|
||||
|
||||
The setup can be easily initialized with a script (setup.sh), and removed with a teardown script (teardown.sh).
|
||||
|
||||
> If you are interested in this tutorial, please help us create a containerized version of the setup and topology! Currently the setup and teardown are based on the network namespace, it will be more friendly to have a containerized version of the setup and topology.
|
||||
|
||||
Setup:
|
||||
|
||||
```sh
|
||||
sudo ./setup.sh
|
||||
```
|
||||
|
||||
Teardown:
|
||||
|
||||
```sh
|
||||
sudo ./teardown.sh
|
||||
```
|
||||
|
||||
### Running the Load Balancer
|
||||
|
||||
To run the XDP load balancer, execute the following command, specifying the interface and backends' IP and MAC addresses:
|
||||
|
||||
```console
|
||||
sudo ip netns exec lb ./xdp_lb veth6 10.0.0.2 de:ad:be:ef:00:02 10.0.0.3 de:ad:be:ef:00:03
|
||||
```
|
||||
|
||||
This will configure the load balancer and print the details of the backends:
|
||||
|
||||
```console
|
||||
XDP load balancer configured with backends:
|
||||
Backend 1 - IP: 10.0.0.2, MAC: de:ad:be:ef:00:02
|
||||
Backend 2 - IP: 10.0.0.3, MAC: de:ad:be:ef:00:03
|
||||
Press Ctrl+C to exit...
|
||||
```
|
||||
|
||||
### Testing the Setup
|
||||
|
||||
You can test the setup by starting HTTP servers on the two backend namespaces (`h2` and `h3`) and sending requests from the local machine to the load balancer:
|
||||
|
||||
Start servers on `h2` and `h3`:
|
||||
|
||||
```sh
|
||||
sudo ip netns exec h2 python3 -m http.server
|
||||
sudo ip netns exec h3 python3 -m http.server
|
||||
```
|
||||
|
||||
Then, send a request to the load balancer IP:
|
||||
|
||||
```sh
|
||||
curl 10.0.0.10:8000
|
||||
```
|
||||
|
||||
The load balancer will distribute traffic to the backends (`h2` and `h3`) based on the hashing function.
|
||||
|
||||
### Monitoring with `bpf_printk`
|
||||
|
||||
You can monitor the load balancer's activity by checking the `bpf_printk` logs. The BPF program prints diagnostic messages whenever a packet is processed. You can view these logs using:
|
||||
|
||||
```console
|
||||
sudo cat /sys/kernel/debug/tracing/trace_pipe
|
||||
```
|
||||
|
||||
Example output:
|
||||
|
||||
```console
|
||||
<idle>-0 [004] ..s2. 24174.812722: bpf_trace_printk: xdp_load_balancer received packet
|
||||
<idle>-0 [004] .Ns2. 24174.812729: bpf_trace_printk: Received Source IP: 0xa000001
|
||||
<idle>-0 [004] .Ns2. 24174.812729: bpf_trace_printk: Received Destination IP: 0xa00000a
|
||||
<idle>-0 [004] .Ns2. 24174.812731: bpf_trace_printk: Received Source MAC: de:ad:be:ef:0:1
|
||||
<idle>-0 [004] .Ns2. 24174.812732: bpf_trace_printk: Received Destination MAC: de:ad:be:ef:0:10
|
||||
<idle>-0 [004] .Ns2. 24174.812732: bpf_trace_printk: Packet from client
|
||||
<idle>-0 [004] .Ns2. 24174.812734: bpf_trace_printk: Redirecting packet to new IP 0xa000002 from IP 0xa00000a
|
||||
<idle>-0 [004] .Ns2. 24174.812735: bpf_trace_printk: New Dest MAC: de:ad:be:ef:0:2
|
||||
<idle>-0 [004] .Ns2. 24174.812735: bpf_trace_printk: New Source MAC: de:ad:be:ef:0:10
|
||||
```
|
||||
|
||||
### Debugging Issues
|
||||
|
||||
Some systems may experience packet loss or failure to forward packets due to issues similar to those described in this [blog post](https://fedepaol.github.io/blog/2023/09/11/xdp-ate-my-packets-and-how-i-debugged-it/). You can debug these issues using `bpftrace` to trace XDP errors:
|
||||
|
||||
```sh
|
||||
sudo bpftrace -e 'tracepoint:xdp:xdp_bulk_tx{@redir_errno[-args->err] = count();}'
|
||||
```
|
||||
|
||||
If you see an output like this:
|
||||
|
||||
```sh
|
||||
@redir_errno[6]: 3
|
||||
```
|
||||
|
||||
It indicates errors related to XDP packet forwarding. The error code `6` typically points to a particular forwarding issue that can be further investigated.
|
||||
|
||||
### Conclusion
|
||||
|
||||
This tutorial demonstrates how to set up a simple XDP load balancer using eBPF, providing efficient traffic distribution across backend servers. For those interested in learning more about eBPF, including more advanced examples and tutorials, please visit our [https://github.com/eunomia-bpf/bpf-developer-tutorial](https://github.com/eunomia-bpf/bpf-developer-tutorial) or our website [https://eunomia.dev/tutorials/](https://eunomia.dev/tutorials/).
|
||||
|
||||
### References
|
||||
|
||||
Here’s a simple list of XDP references:
|
||||
|
||||
1. [XDP Programming Hands-On Tutorial](https://github.com/xdp-project/xdp-tutorial)
|
||||
2. [XDP Tutorial in bpf-developer-tutorial](https://eunomia.dev/tutorials/21-xdp/)
|
||||
70
src/42-xdp-loadbalancer/connect.md
Normal file
70
src/42-xdp-loadbalancer/connect.md
Normal file
@@ -0,0 +1,70 @@
|
||||
# Network setup for bpf-developer-tutorial
|
||||
|
||||
In this tutorial, we will set up a simple network topology that simulates a load balancer using eBPF/XDP (Express Data Path). The setup includes a local machine, a load balancer (which can be enhanced with an XDP program), and two backend servers (`h2` and `h3`). The local machine routes packets to the load balancer, which then distributes traffic between the backend servers.
|
||||
|
||||
# Simple XDP Load Balancer Tutorial
|
||||
|
||||
This tutorial will guide you in setting up a simple virtual network to simulate a load balancer using eBPF/XDP.
|
||||
|
||||
## Network Topology
|
||||
|
||||
```txt
|
||||
+------------------+
|
||||
| Local Machine |
|
||||
| IP: 10.0.0.1 |
|
||||
+--------+---------+
|
||||
|
|
||||
+--------+---------+
|
||||
| Load Balancer |
|
||||
| IP: 10.0.0.10 |
|
||||
+--------+---------+
|
||||
|
|
||||
+-------+-------+
|
||||
| |
|
||||
+---+---+ +---+---+
|
||||
| h2 | | h3 |
|
||||
|10.0.0.2| |10.0.0.3|
|
||||
+-------+ +-------+
|
||||
```
|
||||
|
||||
- **Local Machine**: Simulates a client (`10.0.0.1`) sending traffic.
|
||||
- **Load Balancer**: Distributes traffic to backend servers (`10.0.0.10`).
|
||||
- **h2** and **h3**: Simulate backend servers (`10.0.0.2` and `10.0.0.3`).
|
||||
|
||||
### Setup Steps
|
||||
|
||||
This script creates virtual network namespaces and sets up IP addresses for the local machine, load balancer, and backend servers.
|
||||
|
||||
```bash
|
||||
sudo ./setup.sh
|
||||
```
|
||||
|
||||
To clean up the setup after testing:
|
||||
|
||||
```bash
|
||||
sudo ./teardown.sh
|
||||
```
|
||||
|
||||
### Testing the Network
|
||||
|
||||
You can test the network connectivity using `ping` commands:
|
||||
|
||||
Ping Between Backend Servers (`h2` to `h3`)
|
||||
|
||||
```bash
|
||||
sudo ip netns exec h2 ping -c 3 10.0.0.3
|
||||
```
|
||||
|
||||
Ping from Backend Server (`h2`) to Load Balancer
|
||||
|
||||
```bash
|
||||
sudo ip netns exec h2 ping -c 3 10.0.0.10
|
||||
```
|
||||
|
||||
Ping from Local Machine to Load Balancer
|
||||
|
||||
```bash
|
||||
ping -c 3 10.0.0.10
|
||||
```
|
||||
|
||||
That's it! This simple setup lets you simulate a load balancer using eBPF/XDP. You can extend it by adding custom XDP programs to control the traffic distribution between `h2` and `h3`.
|
||||
14
src/42-xdp-loadbalancer/no-docker/xdp_pass.c
Normal file
14
src/42-xdp-loadbalancer/no-docker/xdp_pass.c
Normal file
@@ -0,0 +1,14 @@
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
SEC("xdp")
|
||||
int xdp_pass(struct xdp_md* ctx) {
|
||||
void* data = (void*)(long)ctx->data;
|
||||
void* data_end = (void*)(long)ctx->data_end;
|
||||
int pkt_sz = data_end - data;
|
||||
|
||||
bpf_printk("packet size is %d", pkt_sz);
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
char __license[] SEC("license") = "GPL";
|
||||
BIN
src/42-xdp-loadbalancer/no-docker/xdp_pass.o
Normal file
BIN
src/42-xdp-loadbalancer/no-docker/xdp_pass.o
Normal file
Binary file not shown.
159
src/42-xdp-loadbalancer/setup.sh
Executable file
159
src/42-xdp-loadbalancer/setup.sh
Executable file
@@ -0,0 +1,159 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
part_mac="DE:AD:BE:EF:00:"
|
||||
|
||||
create_bridge () {
|
||||
if ! ip link show $1 &> /dev/null; then
|
||||
ip link add name $1 type bridge
|
||||
ip link set dev $1 up
|
||||
else
|
||||
echo "Bridge $1 already exists."
|
||||
fi
|
||||
}
|
||||
|
||||
create_pair () {
|
||||
if ! ip link show $1 &> /dev/null; then
|
||||
ip link add name $1 type veth peer name $2
|
||||
ip link set $1 address "$part_mac""$5"
|
||||
ip addr add $3 brd + dev $1
|
||||
ip link set $2 master $4
|
||||
ip link set dev $1 up
|
||||
ip link set dev $2 up
|
||||
else
|
||||
echo "Veth pair $1 <--> $2 already exists."
|
||||
fi
|
||||
}
|
||||
|
||||
create_pair_ns () {
|
||||
if ! ip link show $2 &> /dev/null; then
|
||||
ip link add name $1 type veth peer name $2
|
||||
ip link set $2 master $4
|
||||
ip link set dev $2 up
|
||||
|
||||
ip netns add $5
|
||||
ip link set $1 netns $5
|
||||
ip netns exec $5 ip addr add $3 brd + dev $1
|
||||
ip netns exec $5 ip link set $1 address "$part_mac""$6"
|
||||
ip netns exec $5 ip link set dev $1 up
|
||||
ip netns exec $5 ip link set lo up # Bring up loopback interface
|
||||
else
|
||||
echo "Veth pair $1 <--> $2 already exists in namespace $5."
|
||||
fi
|
||||
}
|
||||
|
||||
# Create bridge br0
|
||||
create_bridge br0
|
||||
|
||||
# Create veth pairs and assign IPs
|
||||
create_pair veth0 veth1 "10.0.0.1/24" br0 01
|
||||
|
||||
# Create veth pairs in namespaces h2, h3, and lb
|
||||
create_pair_ns veth2 veth3 "10.0.0.2/24" br0 h2 02
|
||||
create_pair_ns veth4 veth5 "10.0.0.3/24" br0 h3 03
|
||||
|
||||
# Create the lb namespace
|
||||
create_pair_ns veth6 veth7 "10.0.0.10/24" br0 lb 10
|
||||
|
||||
# Enable IP forwarding on the host
|
||||
sudo sysctl -w net.ipv4.ip_forward=1
|
||||
|
||||
# Set the FORWARD chain policy to ACCEPT in iptables to ensure packets are forwarded
|
||||
sudo iptables -P FORWARD ACCEPT
|
||||
|
||||
# maybe you can do similar things
|
||||
# sudo ip netns exec h2 bpftool load xdp_pass.o veth2
|
||||
# sudo ip netns exec h3 bpftool load xdp_pass.o veth4
|
||||
|
||||
# Helper function for error exit on ping failure
|
||||
function ping_or_fail() {
|
||||
if ! sudo ip netns exec $1 ping -c 3 $2; then
|
||||
echo "Ping from $1 to $2 failed!"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Ping test with failure checks
|
||||
function check_connectivity() {
|
||||
echo "Testing connectivity between namespaces and Load Balancer..."
|
||||
|
||||
# Ping from h2 to h3 and h3 to h2
|
||||
ping_or_fail h2 10.0.0.3
|
||||
ping_or_fail h3 10.0.0.2
|
||||
|
||||
# Ping from h2 to Load Balancer and h3 to Load Balancer
|
||||
ping_or_fail h2 10.0.0.10
|
||||
ping_or_fail h3 10.0.0.10
|
||||
|
||||
# Ping from Load Balancer to h2 and h3
|
||||
ping_or_fail lb 10.0.0.2
|
||||
ping_or_fail lb 10.0.0.3
|
||||
|
||||
# Ping from Local Machine to Load Balancer
|
||||
ping -c 3 10.0.0.10 || { echo "Ping from Local Machine to Load Balancer failed!"; exit 1; }
|
||||
|
||||
echo "All ping tests passed!"
|
||||
}
|
||||
|
||||
# Debugging helper functions
|
||||
|
||||
# Check if all interfaces are up and running
|
||||
check_interfaces () {
|
||||
for ns in h2 h3 lb; do
|
||||
echo "Checking interfaces in namespace $ns..."
|
||||
sudo ip netns exec $ns ip addr show
|
||||
sudo ip netns exec $ns ip link show
|
||||
done
|
||||
|
||||
echo "Checking bridge br0..."
|
||||
ip addr show br0
|
||||
ip link show br0
|
||||
}
|
||||
|
||||
# Check IP forwarding settings
|
||||
check_ip_forwarding () {
|
||||
echo "Checking IP forwarding status on the host..."
|
||||
sudo sysctl net.ipv4.ip_forward
|
||||
|
||||
echo "Checking IP forwarding status in namespace $ns..."
|
||||
sudo ip netns exec $ns sysctl net.ipv4.ip_forward
|
||||
}
|
||||
|
||||
# Check ARP table
|
||||
check_arp_table () {
|
||||
echo "Checking ARP table on the host..."
|
||||
arp -n
|
||||
|
||||
for ns in h2 h3 lb; do
|
||||
echo "Checking ARP table in namespace $ns..."
|
||||
sudo ip netns exec $ns ip neigh show
|
||||
done
|
||||
}
|
||||
|
||||
# Check routing tables
|
||||
check_routing_table () {
|
||||
echo "Checking routing table on the host..."
|
||||
ip route show
|
||||
|
||||
for ns in h2 h3 lb; do
|
||||
echo "Checking routing table in namespace $ns..."
|
||||
sudo ip netns exec $ns ip route show
|
||||
done
|
||||
}
|
||||
|
||||
# Check if firewall rules are blocking traffic
|
||||
check_firewall_rules () {
|
||||
echo "Checking firewall rules on the host..."
|
||||
sudo iptables -L
|
||||
}
|
||||
|
||||
# Run checks to verify the network
|
||||
check_interfaces
|
||||
check_ip_forwarding
|
||||
check_arp_table
|
||||
check_routing_table
|
||||
check_firewall_rules
|
||||
check_connectivity
|
||||
|
||||
echo "Setup and checks completed!"
|
||||
36
src/42-xdp-loadbalancer/teardown.sh
Executable file
36
src/42-xdp-loadbalancer/teardown.sh
Executable file
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -xe
|
||||
|
||||
rm_bridge () {
|
||||
if ip link show $1 &> /dev/null; then
|
||||
ip link set dev $1 down
|
||||
ip link delete $1 type bridge
|
||||
fi
|
||||
}
|
||||
|
||||
rm_pair () {
|
||||
if ip link show $1 &> /dev/null; then
|
||||
ip link delete $1 type veth
|
||||
fi
|
||||
}
|
||||
|
||||
rm_ns () {
|
||||
if ip netns list | grep -w "$1" &> /dev/null; then
|
||||
ip netns delete $1
|
||||
fi
|
||||
}
|
||||
|
||||
# Remove bridge br0
|
||||
rm_bridge br0
|
||||
|
||||
# Remove veth pairs
|
||||
rm_pair veth0
|
||||
rm_pair veth2
|
||||
rm_pair veth4
|
||||
rm_pair veth6
|
||||
|
||||
# Remove namespaces
|
||||
rm_ns h2
|
||||
rm_ns h3
|
||||
rm_ns lb
|
||||
117
src/42-xdp-loadbalancer/xdp_lb.bpf.c
Normal file
117
src/42-xdp-loadbalancer/xdp_lb.bpf.c
Normal file
@@ -0,0 +1,117 @@
|
||||
// xdp_lb.bpf.c
|
||||
#include <bpf/bpf_endian.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/tcp.h>
|
||||
#include "xx_hash.h"
|
||||
|
||||
struct backend_config {
|
||||
__u32 ip;
|
||||
unsigned char mac[ETH_ALEN];
|
||||
};
|
||||
|
||||
// Backend IP and MAC address map
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 2); // Two backends
|
||||
__type(key, __u32);
|
||||
__type(value, struct backend_config);
|
||||
} backends SEC(".maps");
|
||||
|
||||
int client_ip = bpf_htonl(0xa000001);
|
||||
unsigned char client_mac[ETH_ALEN] = {0xDE, 0xAD, 0xBE, 0xEF, 0x0, 0x1};
|
||||
int load_balancer_ip = bpf_htonl(0xa00000a);
|
||||
unsigned char load_balancer_mac[ETH_ALEN] = {0xDE, 0xAD, 0xBE, 0xEF, 0x0, 0x10};
|
||||
|
||||
static __always_inline __u16
|
||||
csum_fold_helper(__u64 csum)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (csum >> 16)
|
||||
csum = (csum & 0xffff) + (csum >> 16);
|
||||
}
|
||||
return ~csum;
|
||||
}
|
||||
|
||||
static __always_inline __u16
|
||||
iph_csum(struct iphdr *iph)
|
||||
{
|
||||
iph->check = 0;
|
||||
unsigned long long csum = bpf_csum_diff(0, 0, (unsigned int *)iph, sizeof(struct iphdr), 0);
|
||||
return csum_fold_helper(csum);
|
||||
}
|
||||
|
||||
SEC("xdp")
|
||||
int xdp_load_balancer(struct xdp_md *ctx) {
|
||||
void *data_end = (void *)(long)ctx->data_end;
|
||||
void *data = (void *)(long)ctx->data;
|
||||
|
||||
bpf_printk("xdp_load_balancer received packet");
|
||||
|
||||
// Ethernet header
|
||||
struct ethhdr *eth = data;
|
||||
if ((void *)(eth + 1) > data_end)
|
||||
return XDP_PASS;
|
||||
|
||||
// Check if the packet is IP (IPv4)
|
||||
if (eth->h_proto != __constant_htons(ETH_P_IP))
|
||||
return XDP_PASS;
|
||||
|
||||
// IP header
|
||||
struct iphdr *iph = (struct iphdr *)(eth + 1);
|
||||
if ((void *)(iph + 1) > data_end)
|
||||
return XDP_PASS;
|
||||
|
||||
// Check if the protocol is TCP or UDP
|
||||
if (iph->protocol != IPPROTO_TCP)
|
||||
return XDP_PASS;
|
||||
|
||||
bpf_printk("Received Source IP: 0x%x", bpf_ntohl(iph->saddr));
|
||||
bpf_printk("Received Destination IP: 0x%x", bpf_ntohl(iph->daddr));
|
||||
bpf_printk("Received Source MAC: %x:%x:%x:%x:%x:%x", eth->h_source[0], eth->h_source[1], eth->h_source[2], eth->h_source[3], eth->h_source[4], eth->h_source[5]);
|
||||
bpf_printk("Received Destination MAC: %x:%x:%x:%x:%x:%x", eth->h_dest[0], eth->h_dest[1], eth->h_dest[2], eth->h_dest[3], eth->h_dest[4], eth->h_dest[5]);
|
||||
|
||||
if (iph->saddr == client_ip)
|
||||
{
|
||||
bpf_printk("Packet from client");
|
||||
|
||||
__u32 key = xxhash32((const char*)iph, sizeof(struct iphdr), 0) % 2;
|
||||
|
||||
struct backend_config *backend = bpf_map_lookup_elem(&backends, &key);
|
||||
if (!backend)
|
||||
return XDP_PASS;
|
||||
|
||||
iph->daddr = backend->ip;
|
||||
__builtin_memcpy(eth->h_dest, backend->mac, ETH_ALEN);
|
||||
}
|
||||
else
|
||||
{
|
||||
bpf_printk("Packet from backend");
|
||||
iph->daddr = client_ip;
|
||||
__builtin_memcpy(eth->h_dest, client_mac, ETH_ALEN);
|
||||
}
|
||||
|
||||
// Update IP source address to the load balancer's IP
|
||||
iph->saddr = load_balancer_ip;
|
||||
// Update Ethernet source MAC address to the current lb's MAC
|
||||
__builtin_memcpy(eth->h_source, load_balancer_mac, ETH_ALEN);
|
||||
|
||||
// Recalculate IP checksum
|
||||
iph->check = iph_csum(iph);
|
||||
|
||||
bpf_printk("Redirecting packet to new IP 0x%x from IP 0x%x",
|
||||
bpf_ntohl(iph->daddr),
|
||||
bpf_ntohl(iph->saddr)
|
||||
);
|
||||
bpf_printk("New Dest MAC: %x:%x:%x:%x:%x:%x", eth->h_dest[0], eth->h_dest[1], eth->h_dest[2], eth->h_dest[3], eth->h_dest[4], eth->h_dest[5]);
|
||||
bpf_printk("New Source MAC: %x:%x:%x:%x:%x:%x\n", eth->h_source[0], eth->h_source[1], eth->h_source[2], eth->h_source[3], eth->h_source[4], eth->h_source[5]);
|
||||
// Return XDP_TX to transmit the modified packet back to the network
|
||||
return XDP_TX;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
96
src/42-xdp-loadbalancer/xdp_lb.c
Normal file
96
src/42-xdp-loadbalancer/xdp_lb.c
Normal file
@@ -0,0 +1,96 @@
|
||||
// xdp_lb.c
|
||||
#include <arpa/inet.h>
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <net/if.h>
|
||||
#include "xdp_lb.skel.h" // The generated skeleton
|
||||
|
||||
struct backend_config {
|
||||
__u32 ip;
|
||||
unsigned char mac[6];
|
||||
};
|
||||
|
||||
static int parse_mac(const char *str, unsigned char *mac) {
|
||||
if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
|
||||
&mac[0], &mac[1], &mac[2], &mac[3], &mac[4], &mac[5]) != 6) {
|
||||
fprintf(stderr, "Invalid MAC address format\n");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc != 6) {
|
||||
fprintf(stderr, "Usage: %s <ifname> <backend1_ip> <backend1_mac> <backend2_ip> <backend2_mac>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *ifname = argv[1];
|
||||
struct backend_config backend[2];
|
||||
|
||||
// Parse backend 1
|
||||
if (inet_pton(AF_INET, argv[2], &backend[0].ip) != 1) {
|
||||
fprintf(stderr, "Invalid backend 1 IP address\n");
|
||||
return 1;
|
||||
}
|
||||
if (parse_mac(argv[3], backend[0].mac) < 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Parse backend 2
|
||||
if (inet_pton(AF_INET, argv[4], &backend[1].ip) != 1) {
|
||||
fprintf(stderr, "Invalid backend 2 IP address\n");
|
||||
return 1;
|
||||
}
|
||||
if (parse_mac(argv[5], backend[1].mac) < 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Load and attach the BPF program
|
||||
struct xdp_lb_bpf *skel = xdp_lb_bpf__open_and_load();
|
||||
if (!skel) {
|
||||
fprintf(stderr, "Failed to open and load BPF skeleton\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ifindex = if_nametoindex(ifname);
|
||||
if (ifindex < 0) {
|
||||
perror("if_nametoindex");
|
||||
xdp_lb_bpf__destroy(skel);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (bpf_program__attach_xdp(skel->progs.xdp_load_balancer, ifindex) < 0) {
|
||||
fprintf(stderr, "Failed to attach XDP program\n");
|
||||
xdp_lb_bpf__destroy(skel);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Update backend configurations
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (bpf_map_update_elem(bpf_map__fd(skel->maps.backends), &i, &backend[i], 0) < 0) {
|
||||
perror("bpf_map_update_elem");
|
||||
xdp_lb_bpf__destroy(skel);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("XDP load balancer configured with backends:\n");
|
||||
printf("Backend 1 - IP: %s, MAC: %s\n", argv[2], argv[3]);
|
||||
printf("Backend 2 - IP: %s, MAC: %s\n", argv[4], argv[5]);
|
||||
|
||||
printf("Press Ctrl+C to exit...\n");
|
||||
while (1) {
|
||||
sleep(1); // Keep the program running
|
||||
}
|
||||
|
||||
// Cleanup and detach
|
||||
bpf_xdp_detach(ifindex, 0, NULL);
|
||||
xdp_lb_bpf__detach(skel);
|
||||
xdp_lb_bpf__destroy(skel);
|
||||
return 0;
|
||||
}
|
||||
57
src/42-xdp-loadbalancer/xx_hash.h
Normal file
57
src/42-xdp-loadbalancer/xx_hash.h
Normal file
@@ -0,0 +1,57 @@
|
||||
#ifndef XXHASH_BPF_H
|
||||
#define XXHASH_BPF_H
|
||||
|
||||
#define PRIME1 0x9E3779B1U
|
||||
#define PRIME2 0x85EBCA77U
|
||||
#define PRIME3 0xC2B2AE3DU
|
||||
#define PRIME4 0x27D4EB2FU
|
||||
#define PRIME5 0x165667B1U
|
||||
|
||||
static __always_inline unsigned int rotl (unsigned int x, int r) {
|
||||
return ((x << r) | (x >> (32 - r)));
|
||||
}
|
||||
// Normal stripe processing routine.
|
||||
static __always_inline unsigned int round_xxhash(unsigned int acc, const unsigned int input) {
|
||||
return rotl(acc + (input * PRIME2), 13) * PRIME1;
|
||||
}
|
||||
|
||||
static __always_inline unsigned int avalanche_step (const unsigned int h, const int rshift, const unsigned int prime) {
|
||||
return (h ^ (h >> rshift)) * prime;
|
||||
}
|
||||
// Mixes all bits to finalize the hash.
|
||||
static __always_inline unsigned int avalanche (const unsigned int h) {
|
||||
return avalanche_step(avalanche_step(avalanche_step(h, 15, PRIME2), 13, PRIME3), 16, 1);
|
||||
}
|
||||
|
||||
static __always_inline unsigned int endian32 (const char *v) {
|
||||
return (unsigned int)((unsigned char)(v[0]))|((unsigned int)((unsigned char)(v[1])) << 8)
|
||||
|((unsigned int)((unsigned char)(v[2])) << 16)|((unsigned int)((unsigned char)(v[3])) << 24);
|
||||
}
|
||||
|
||||
static __always_inline unsigned int fetch32 (const char *p, const unsigned int v) {
|
||||
return round_xxhash(v, endian32(p));
|
||||
}
|
||||
|
||||
// Processes the last 0-15 bytes of p.
|
||||
static __always_inline unsigned int finalize (const unsigned int h, const char *p, unsigned int len) {
|
||||
return
|
||||
(len >= 4) ? finalize(rotl(h + (endian32(p) * PRIME3), 17) * PRIME4, p + 4, len - 4) :
|
||||
(len > 0) ? finalize(rotl(h + ((unsigned char)(*p) * PRIME5), 11) * PRIME1, p + 1, len - 1) :
|
||||
avalanche(h);
|
||||
}
|
||||
|
||||
static __always_inline unsigned int h16bytes_4 (const char *p, unsigned int len, const unsigned int v1, const unsigned int v2, const unsigned int v3, const unsigned int v4) {
|
||||
return
|
||||
(len >= 16) ? h16bytes_4(p + 16, len - 16, fetch32(p, v1), fetch32(p+4, v2), fetch32(p+8, v3), fetch32(p+12, v4)) :
|
||||
rotl(v1, 1) + rotl(v2, 7) + rotl(v3, 12) + rotl(v4, 18);
|
||||
}
|
||||
|
||||
static __always_inline unsigned int h16bytes_3 (const char *p, unsigned int len, const unsigned int seed) {
|
||||
return h16bytes_4(p, len, seed + PRIME1 + PRIME2, seed + PRIME2, seed, seed - PRIME1);
|
||||
}
|
||||
|
||||
static __always_inline unsigned int xxhash32 (const char *input, unsigned int len, unsigned int seed) {
|
||||
return finalize((len >= 16 ? h16bytes_3(input, len, seed) : seed + PRIME5) + len, (input) + (len & ~0xF), len & 0xF);
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user