mirror of
https://github.com/eunomia-bpf/bpf-developer-tutorial.git
synced 2026-02-03 18:24:27 +08:00
add socops code
This commit is contained in:
8
src/29-sockops/.gitignore
vendored
Normal file
8
src/29-sockops/.gitignore
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
.vscode
|
||||
package.json
|
||||
*.o
|
||||
*.skel.json
|
||||
*.skel.yaml
|
||||
package.yaml
|
||||
ecli
|
||||
ecc
|
||||
@@ -0,0 +1,76 @@
|
||||
# eBPF sockops 示例
|
||||
|
||||
## 利用 eBPF 的 sockops 进行性能优化
|
||||
|
||||
网络连接本质上是 socket 之间的通讯,eBPF 提供了一个 [bpf_msg_redirect_hash](https://man7.org/linux/man-pages/man7/bpf-helpers.7.html) 函数,用来将应用发出的包直接转发到对端的 socket,可以极大地加速包在内核中的处理流程。
|
||||
|
||||
这里 sock_map 是记录 socket 规则的关键部分,即根据当前的数据包信息,从 sock_map 中挑选一个存在的 socket 连接来转发请求。所以需要先在 sockops 的 hook 处或者其它地方,将 socket 信息保存到 sock_map,并提供一个规则 (一般为四元组) 根据 key 查找到 socket。
|
||||
|
||||
Merbridge 项目就是这样实现了用 eBPF 代替 iptables 为 Istio 进行加速。在使用 Merbridge (eBPF) 优化之后,出入口流量会直接跳过很多内核模块,明显提高性能,如下图所示:
|
||||
|
||||

|
||||
|
||||
## 运行样例
|
||||
|
||||
此示例程序从发送者的套接字(出口)重定向流量至接收者的套接字(入口),**跳过 TCP/IP 内核网络栈**。在这个示例中,我们假定发送者和接收者都在**同一台**机器上运行。
|
||||
|
||||
### 编译 eBPF 程序
|
||||
|
||||
```shell
|
||||
# Compile the bpf_sockops program
|
||||
clang -O2 -g -Wall -target bpf -c bpf_sockops.c -o bpf_sockops.o
|
||||
clang -O2 -g -Wall -target bpf -c bpf_redir.c -o bpf_redir.o
|
||||
```
|
||||
|
||||
### 加载 eBPF 程序
|
||||
|
||||
```shell
|
||||
sudo ./load.sh
|
||||
```
|
||||
|
||||
您可以使用 [bpftool utility](https://github.com/torvalds/linux/blob/master/tools/bpf/bpftool/Documentation/bpftool-prog.rst) 检查这两个 eBPF 程序是否已经加载。
|
||||
|
||||
```console
|
||||
$ sudo bpftool prog show
|
||||
63: sock_ops name bpf_sockmap tag 275467be1d69253d gpl
|
||||
loaded_at 2019-01-24T13:07:17+0200 uid 0
|
||||
xlated 1232B jited 750B memlock 4096B map_ids 58
|
||||
64: sk_msg name bpf_redir tag bc78074aa9dd96f4 gpl
|
||||
loaded_at 2019-01-24T13:07:17+0200 uid 0
|
||||
xlated 304B jited 233B memlock 4096B map_ids 58
|
||||
```
|
||||
|
||||
### 运行 [iperf3](https://iperf.fr/) 服务器
|
||||
|
||||
```shell
|
||||
iperf3 -s -p 10000
|
||||
```
|
||||
|
||||
### 运行 [iperf3](https://iperf.fr/) 客户端
|
||||
|
||||
```shell
|
||||
iperf3 -c 127.0.0.1 -t 10 -l 64k -p 10000
|
||||
```
|
||||
|
||||
### 收集追踪
|
||||
|
||||
```console
|
||||
$ ./trace.sh
|
||||
iperf3-9516 [001] .... 22500.634108: 0: <<< ipv4 op = 4, port 18583 --> 4135
|
||||
iperf3-9516 [001] ..s1 22500.634137: 0: <<< ipv4 op = 5, port 4135 --> 18583
|
||||
iperf3-9516 [001] .... 22500.634523: 0: <<< ipv4 op = 4, port 19095 --> 4135
|
||||
iperf3-9516 [001] ..s1 22500.634536: 0: <<< ipv4 op = 5, port 4135 --> 19095
|
||||
```
|
||||
|
||||
你应该可以看到 4 个用于套接字建立的事件。如果你没有看到任何事件,那么 eBPF 程序可能没有正确地附加上。
|
||||
|
||||
### 卸载 eBPF 程序
|
||||
|
||||
```shell
|
||||
sudo ./unload.sh
|
||||
```
|
||||
|
||||
## 参考资料和源代码来源
|
||||
|
||||
- <https://github.com/zachidan/ebpf-sockops>
|
||||
- <https://github.com/merbridge/merbridge>
|
||||
|
||||
27
src/29-sockops/bpf_redir.c
Normal file
27
src/29-sockops/bpf_redir.c
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <linux/bpf.h>
|
||||
#include <sys/socket.h>
|
||||
|
||||
#include "bpf_sockops.h"
|
||||
|
||||
__section("sk_msg")
|
||||
int bpf_redir(struct sk_msg_md *msg)
|
||||
{
|
||||
__u64 flags = BPF_F_INGRESS;
|
||||
struct sock_key key = {};
|
||||
|
||||
sk_msg_extract4_key(msg, &key);
|
||||
// See whether the source or destination IP is local host
|
||||
if (key.sip4 == 16777343 || key.dip4 == 16777343) {
|
||||
// See whether the source or destination port is 10000
|
||||
if (key.sport == 4135 || key.dport == 4135) {
|
||||
int len1 = (__u64)msg->data_end - (__u64)msg->data;
|
||||
printk("<<< redir_proxy port %d --> %d (%d)\n", key.sport, key.dport, len1);
|
||||
msg_redirect_hash(msg, &sock_ops_map, &key, flags);
|
||||
}
|
||||
}
|
||||
|
||||
return SK_PASS;
|
||||
}
|
||||
|
||||
BPF_LICENSE("GPL");
|
||||
int _version __section("version") = 1;
|
||||
52
src/29-sockops/bpf_sockops.c
Normal file
52
src/29-sockops/bpf_sockops.c
Normal file
@@ -0,0 +1,52 @@
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_common.h>
|
||||
#include <sys/socket.h>
|
||||
|
||||
#include "bpf_sockops.h"
|
||||
|
||||
static inline void bpf_sock_ops_ipv4(struct bpf_sock_ops *skops)
|
||||
{
|
||||
struct sock_key key = {};
|
||||
sk_extract4_key(skops, &key);
|
||||
if (key.dip4 == 16777343 || key.sip4 == 16777343 ) {
|
||||
if (key.dport == 4135 || key.sport == 4135) {
|
||||
int ret = sock_hash_update(skops, &sock_ops_map, &key, BPF_NOEXIST);
|
||||
printk("<<< ipv4 op = %d, port %d --> %d\n", skops->op, key.sport, key.dport);
|
||||
if (ret != 0)
|
||||
printk("*** FAILED %d ***\n", ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void bpf_sock_ops_ipv6(struct bpf_sock_ops *skops)
|
||||
{
|
||||
if (skops->remote_ip4)
|
||||
bpf_sock_ops_ipv4(skops);
|
||||
}
|
||||
|
||||
|
||||
__section("sockops")
|
||||
int bpf_sockmap(struct bpf_sock_ops *skops)
|
||||
{
|
||||
__u32 family, op;
|
||||
|
||||
family = skops->family;
|
||||
op = skops->op;
|
||||
|
||||
//printk("<<< op %d, port = %d --> %d\n", op, skops->local_port, skops->remote_port);
|
||||
switch (op) {
|
||||
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
|
||||
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
|
||||
if (family == AF_INET6)
|
||||
bpf_sock_ops_ipv6(skops);
|
||||
else if (family == AF_INET)
|
||||
bpf_sock_ops_ipv4(skops);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
BPF_LICENSE("GPL");
|
||||
int _version __section("version") = 1;
|
||||
168
src/29-sockops/bpf_sockops.h
Normal file
168
src/29-sockops/bpf_sockops.h
Normal file
@@ -0,0 +1,168 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/swab.h>
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
# define __bpf_ntohs(x) __builtin_bswap16(x)
|
||||
# define __bpf_htons(x) __builtin_bswap16(x)
|
||||
# define __bpf_constant_ntohs(x) ___constant_swab16(x)
|
||||
# define __bpf_constant_htons(x) ___constant_swab16(x)
|
||||
# define __bpf_ntohl(x) __builtin_bswap32(x)
|
||||
# define __bpf_htonl(x) __builtin_bswap32(x)
|
||||
# define __bpf_constant_ntohl(x) ___constant_swab32(x)
|
||||
# define __bpf_constant_htonl(x) ___constant_swab32(x)
|
||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
# define __bpf_ntohs(x) (x)
|
||||
# define __bpf_htons(x) (x)
|
||||
# define __bpf_constant_ntohs(x) (x)
|
||||
# define __bpf_constant_htons(x) (x)
|
||||
# define __bpf_ntohl(x) (x)
|
||||
# define __bpf_htonl(x) (x)
|
||||
# define __bpf_constant_ntohl(x) (x)
|
||||
# define __bpf_constant_htonl(x) (x)
|
||||
#else
|
||||
# error "Fix your compiler's __BYTE_ORDER__?!"
|
||||
#endif
|
||||
|
||||
#define bpf_htons(x) \
|
||||
(__builtin_constant_p(x) ? \
|
||||
__bpf_constant_htons(x) : __bpf_htons(x))
|
||||
#define bpf_ntohs(x) \
|
||||
(__builtin_constant_p(x) ? \
|
||||
__bpf_constant_ntohs(x) : __bpf_ntohs(x))
|
||||
#define bpf_htonl(x) \
|
||||
(__builtin_constant_p(x) ? \
|
||||
__bpf_constant_htonl(x) : __bpf_htonl(x))
|
||||
#define bpf_ntohl(x) \
|
||||
(__builtin_constant_p(x) ? \
|
||||
__bpf_constant_ntohl(x) : __bpf_ntohl(x))
|
||||
|
||||
/** Section helper macros. */
|
||||
|
||||
#ifndef __section
|
||||
# define __section(NAME) \
|
||||
__attribute__((section(NAME), used))
|
||||
#endif
|
||||
|
||||
#ifndef __section_tail
|
||||
# define __section_tail(ID, KEY) \
|
||||
__section(__stringify(ID) "/" __stringify(KEY))
|
||||
#endif
|
||||
|
||||
#ifndef __section_cls_entry
|
||||
# define __section_cls_entry \
|
||||
__section("classifier")
|
||||
#endif
|
||||
|
||||
#ifndef __section_act_entry
|
||||
# define __section_act_entry \
|
||||
__section("action")
|
||||
#endif
|
||||
|
||||
#ifndef __section_license
|
||||
# define __section_license \
|
||||
__section("license")
|
||||
#endif
|
||||
|
||||
#ifndef __section_maps
|
||||
# define __section_maps \
|
||||
__section("maps")
|
||||
#endif
|
||||
|
||||
/** Declaration helper macros. */
|
||||
|
||||
#ifndef BPF_LICENSE
|
||||
# define BPF_LICENSE(NAME) \
|
||||
char ____license[] __section_license = NAME
|
||||
#endif
|
||||
|
||||
#ifndef BPF_FUNC
|
||||
# define BPF_FUNC(NAME, ...) \
|
||||
(*NAME)(__VA_ARGS__) = (void *)BPF_FUNC_##NAME
|
||||
#endif
|
||||
|
||||
static int BPF_FUNC(sock_hash_update, struct bpf_sock_ops *skops, void *map, void *key, uint64_t flags);
|
||||
static int BPF_FUNC(msg_redirect_hash, struct sk_msg_md *md, void *map, void *key, uint64_t flags);
|
||||
static void BPF_FUNC(trace_printk, const char *fmt, int fmt_size, ...);
|
||||
|
||||
#ifndef printk
|
||||
# define printk(fmt, ...) \
|
||||
({ \
|
||||
char ____fmt[] = fmt; \
|
||||
trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__); \
|
||||
})
|
||||
#endif
|
||||
|
||||
|
||||
struct bpf_map_def {
|
||||
__u32 type;
|
||||
__u32 key_size;
|
||||
__u32 value_size;
|
||||
__u32 max_entries;
|
||||
__u32 map_flags;
|
||||
};
|
||||
|
||||
union v6addr {
|
||||
struct {
|
||||
__u32 p1;
|
||||
__u32 p2;
|
||||
__u32 p3;
|
||||
__u32 p4;
|
||||
};
|
||||
__u8 addr[16];
|
||||
};
|
||||
|
||||
struct sock_key {
|
||||
union {
|
||||
struct {
|
||||
__u32 sip4;
|
||||
__u32 pad1;
|
||||
__u32 pad2;
|
||||
__u32 pad3;
|
||||
};
|
||||
union v6addr sip6;
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
__u32 dip4;
|
||||
__u32 pad4;
|
||||
__u32 pad5;
|
||||
__u32 pad6;
|
||||
};
|
||||
union v6addr dip6;
|
||||
};
|
||||
__u8 family;
|
||||
__u8 pad7;
|
||||
__u16 pad8;
|
||||
__u32 sport;
|
||||
__u32 dport;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct bpf_map_def __section_maps sock_ops_map = {
|
||||
.type = BPF_MAP_TYPE_SOCKHASH,
|
||||
.key_size = sizeof(struct sock_key),
|
||||
.value_size = sizeof(int),
|
||||
.max_entries = 65535,
|
||||
.map_flags = 0,
|
||||
};
|
||||
|
||||
static inline void sk_extract4_key(struct bpf_sock_ops *ops,
|
||||
struct sock_key *key)
|
||||
{
|
||||
key->dip4 = ops->remote_ip4;
|
||||
key->sip4 = ops->local_ip4;
|
||||
key->family = 1;
|
||||
|
||||
key->sport = (bpf_htonl(ops->local_port) >> 16);
|
||||
key->dport = ops->remote_port >> 16;
|
||||
}
|
||||
|
||||
static inline void sk_msg_extract4_key(struct sk_msg_md *msg,
|
||||
struct sock_key *key)
|
||||
{
|
||||
key->sip4 = msg->remote_ip4;
|
||||
key->dip4 = msg->local_ip4;
|
||||
key->family = 1;
|
||||
|
||||
key->dport = (bpf_htonl(msg->local_port) >> 16);
|
||||
key->sport = msg->remote_port >> 16;
|
||||
}
|
||||
3
src/29-sockops/envoy/Dockerfile
Normal file
3
src/29-sockops/envoy/Dockerfile
Normal file
@@ -0,0 +1,3 @@
|
||||
FROM envoyproxy/envoy:latest
|
||||
COPY envoy.yaml /etc/envoy/envoy.yaml
|
||||
EXPOSE 9901
|
||||
30
src/29-sockops/envoy/envoy.yaml
Normal file
30
src/29-sockops/envoy/envoy.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
admin:
|
||||
access_log_path: /tmp/admin_access.log
|
||||
address:
|
||||
socket_address:
|
||||
protocol: TCP
|
||||
address: 0.0.0.0
|
||||
port_value: 9901
|
||||
static_resources:
|
||||
listeners:
|
||||
- name: iperf3-listener
|
||||
address:
|
||||
socket_address:
|
||||
protocol: TCP
|
||||
address: 0.0.0.0
|
||||
port_value: 10000
|
||||
filter_chains:
|
||||
- filters:
|
||||
- name: envoy.tcp_proxy
|
||||
config:
|
||||
stat_prefix: iperf3-listener
|
||||
cluster: iperf3_server
|
||||
clusters:
|
||||
- name: iperf3_server
|
||||
connect_timeout: 1.0s
|
||||
type: static
|
||||
lb_policy: ROUND_ROBIN
|
||||
hosts:
|
||||
- socket_address:
|
||||
address: 127.0.0.1
|
||||
port_value: 5201
|
||||
20
src/29-sockops/load.sh
Executable file
20
src/29-sockops/load.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
set -e
|
||||
|
||||
# Mount bpf filesystem
|
||||
sudo mount -t bpf bpf /sys/fs/bpf/
|
||||
|
||||
# Load the bpf_sockops program
|
||||
sudo bpftool prog load bpf_sockops.o "/sys/fs/bpf/bpf_sockop"
|
||||
sudo bpftool cgroup attach "/sys/fs/cgroup/unified/" sock_ops pinned "/sys/fs/bpf/bpf_sockop"
|
||||
|
||||
MAP_ID=$(sudo bpftool prog show pinned "/sys/fs/bpf/bpf_sockop" | grep -o -E 'map_ids [0-9]+' | awk '{print $2}')
|
||||
sudo bpftool map pin id $MAP_ID "/sys/fs/bpf/sock_ops_map"
|
||||
|
||||
# Load the bpf_redir program
|
||||
if [ -z $1 ]
|
||||
then
|
||||
sudo bpftool prog load bpf_redir.o "/sys/fs/bpf/bpf_redir" map name sock_ops_map pinned "/sys/fs/bpf/sock_ops_map"
|
||||
sudo bpftool prog attach pinned "/sys/fs/bpf/bpf_redir" msg_verdict pinned "/sys/fs/bpf/sock_ops_map"
|
||||
fi
|
||||
BIN
src/29-sockops/merbridge.png
Normal file
BIN
src/29-sockops/merbridge.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 203 KiB |
2
src/29-sockops/trace.sh
Executable file
2
src/29-sockops/trace.sh
Executable file
@@ -0,0 +1,2 @@
|
||||
#!/bin/bash
|
||||
sudo cat /sys/kernel/debug/tracing/trace_pipe
|
||||
13
src/29-sockops/unload.sh
Executable file
13
src/29-sockops/unload.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
# UnLoad the bpf_redir program
|
||||
sudo bpftool prog detach pinned "/sys/fs/bpf/bpf_redir" msg_verdict pinned "/sys/fs/bpf/sock_ops_map"
|
||||
sudo rm "/sys/fs/bpf/bpf_redir"
|
||||
|
||||
# UnLoad the bpf_sockops program
|
||||
sudo bpftool cgroup detach "/sys/fs/cgroup/unified/" sock_ops pinned "/sys/fs/bpf/bpf_sockop"
|
||||
sudo rm "/sys/fs/bpf/bpf_sockop"
|
||||
|
||||
# Delete the map
|
||||
sudo rm "/sys/fs/bpf/sock_ops_map"
|
||||
Reference in New Issue
Block a user