add socops code

This commit is contained in:
yunwei37
2023-05-30 23:57:36 +08:00
committed by 云微
parent eb292e2ab5
commit 899f332f14
12 changed files with 404 additions and 1 deletions

8
src/29-sockops/.gitignore vendored Normal file
View File

@@ -0,0 +1,8 @@
.vscode
package.json
*.o
*.skel.json
*.skel.yaml
package.yaml
ecli
ecc

View File

@@ -0,0 +1,76 @@
# eBPF sockops 示例
## 利用 eBPF 的 sockops 进行性能优化
网络连接本质上是 socket 之间的通讯eBPF 提供了一个 [bpf_msg_redirect_hash](https://man7.org/linux/man-pages/man7/bpf-helpers.7.html) 函数,用来将应用发出的包直接转发到对端的 socket可以极大地加速包在内核中的处理流程。
这里 sock_map 是记录 socket 规则的关键部分,即根据当前的数据包信息,从 sock_map 中挑选一个存在的 socket 连接来转发请求。所以需要先在 sockops 的 hook 处或者其它地方,将 socket 信息保存到 sock_map并提供一个规则 (一般为四元组) 根据 key 查找到 socket。
Merbridge 项目就是这样实现了用 eBPF 代替 iptables 为 Istio 进行加速。在使用 Merbridge (eBPF) 优化之后,出入口流量会直接跳过很多内核模块,明显提高性能,如下图所示:
![merbridge](merbridge.png)
## 运行样例
此示例程序从发送者的套接字(出口)重定向流量至接收者的套接字(入口),**跳过 TCP/IP 内核网络栈**。在这个示例中,我们假定发送者和接收者都在**同一台**机器上运行。
### 编译 eBPF 程序
```shell
# Compile the bpf_sockops program
clang -O2 -g -Wall -target bpf -c bpf_sockops.c -o bpf_sockops.o
clang -O2 -g -Wall -target bpf -c bpf_redir.c -o bpf_redir.o
```
### 加载 eBPF 程序
```shell
sudo ./load.sh
```
您可以使用 [bpftool utility](https://github.com/torvalds/linux/blob/master/tools/bpf/bpftool/Documentation/bpftool-prog.rst) 检查这两个 eBPF 程序是否已经加载。
```console
$ sudo bpftool prog show
63: sock_ops name bpf_sockmap tag 275467be1d69253d gpl
loaded_at 2019-01-24T13:07:17+0200 uid 0
xlated 1232B jited 750B memlock 4096B map_ids 58
64: sk_msg name bpf_redir tag bc78074aa9dd96f4 gpl
loaded_at 2019-01-24T13:07:17+0200 uid 0
xlated 304B jited 233B memlock 4096B map_ids 58
```
### 运行 [iperf3](https://iperf.fr/) 服务器
```shell
iperf3 -s -p 10000
```
### 运行 [iperf3](https://iperf.fr/) 客户端
```shell
iperf3 -c 127.0.0.1 -t 10 -l 64k -p 10000
```
### 收集追踪
```console
$ ./trace.sh
iperf3-9516 [001] .... 22500.634108: 0: <<< ipv4 op = 4, port 18583 --> 4135
iperf3-9516 [001] ..s1 22500.634137: 0: <<< ipv4 op = 5, port 4135 --> 18583
iperf3-9516 [001] .... 22500.634523: 0: <<< ipv4 op = 4, port 19095 --> 4135
iperf3-9516 [001] ..s1 22500.634536: 0: <<< ipv4 op = 5, port 4135 --> 19095
```
你应该可以看到 4 个用于套接字建立的事件。如果你没有看到任何事件,那么 eBPF 程序可能没有正确地附加上。
### 卸载 eBPF 程序
```shell
sudo ./unload.sh
```
## 参考资料和源代码来源
- <https://github.com/zachidan/ebpf-sockops>
- <https://github.com/merbridge/merbridge>

View File

@@ -0,0 +1,27 @@
#include <linux/bpf.h>
#include <sys/socket.h>
#include "bpf_sockops.h"
__section("sk_msg")
int bpf_redir(struct sk_msg_md *msg)
{
__u64 flags = BPF_F_INGRESS;
struct sock_key key = {};
sk_msg_extract4_key(msg, &key);
// See whether the source or destination IP is local host
if (key.sip4 == 16777343 || key.dip4 == 16777343) {
// See whether the source or destination port is 10000
if (key.sport == 4135 || key.dport == 4135) {
int len1 = (__u64)msg->data_end - (__u64)msg->data;
printk("<<< redir_proxy port %d --> %d (%d)\n", key.sport, key.dport, len1);
msg_redirect_hash(msg, &sock_ops_map, &key, flags);
}
}
return SK_PASS;
}
BPF_LICENSE("GPL");
int _version __section("version") = 1;

View File

@@ -0,0 +1,52 @@
#include <linux/bpf.h>
#include <linux/bpf_common.h>
#include <sys/socket.h>
#include "bpf_sockops.h"
static inline void bpf_sock_ops_ipv4(struct bpf_sock_ops *skops)
{
struct sock_key key = {};
sk_extract4_key(skops, &key);
if (key.dip4 == 16777343 || key.sip4 == 16777343 ) {
if (key.dport == 4135 || key.sport == 4135) {
int ret = sock_hash_update(skops, &sock_ops_map, &key, BPF_NOEXIST);
printk("<<< ipv4 op = %d, port %d --> %d\n", skops->op, key.sport, key.dport);
if (ret != 0)
printk("*** FAILED %d ***\n", ret);
}
}
}
static inline void bpf_sock_ops_ipv6(struct bpf_sock_ops *skops)
{
if (skops->remote_ip4)
bpf_sock_ops_ipv4(skops);
}
__section("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
__u32 family, op;
family = skops->family;
op = skops->op;
//printk("<<< op %d, port = %d --> %d\n", op, skops->local_port, skops->remote_port);
switch (op) {
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
if (family == AF_INET6)
bpf_sock_ops_ipv6(skops);
else if (family == AF_INET)
bpf_sock_ops_ipv4(skops);
break;
default:
break;
}
return 0;
}
BPF_LICENSE("GPL");
int _version __section("version") = 1;

View File

@@ -0,0 +1,168 @@
#include <linux/types.h>
#include <linux/swab.h>
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define __bpf_ntohs(x) __builtin_bswap16(x)
# define __bpf_htons(x) __builtin_bswap16(x)
# define __bpf_constant_ntohs(x) ___constant_swab16(x)
# define __bpf_constant_htons(x) ___constant_swab16(x)
# define __bpf_ntohl(x) __builtin_bswap32(x)
# define __bpf_htonl(x) __builtin_bswap32(x)
# define __bpf_constant_ntohl(x) ___constant_swab32(x)
# define __bpf_constant_htonl(x) ___constant_swab32(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
# define __bpf_ntohs(x) (x)
# define __bpf_htons(x) (x)
# define __bpf_constant_ntohs(x) (x)
# define __bpf_constant_htons(x) (x)
# define __bpf_ntohl(x) (x)
# define __bpf_htonl(x) (x)
# define __bpf_constant_ntohl(x) (x)
# define __bpf_constant_htonl(x) (x)
#else
# error "Fix your compiler's __BYTE_ORDER__?!"
#endif
#define bpf_htons(x) \
(__builtin_constant_p(x) ? \
__bpf_constant_htons(x) : __bpf_htons(x))
#define bpf_ntohs(x) \
(__builtin_constant_p(x) ? \
__bpf_constant_ntohs(x) : __bpf_ntohs(x))
#define bpf_htonl(x) \
(__builtin_constant_p(x) ? \
__bpf_constant_htonl(x) : __bpf_htonl(x))
#define bpf_ntohl(x) \
(__builtin_constant_p(x) ? \
__bpf_constant_ntohl(x) : __bpf_ntohl(x))
/** Section helper macros. */
#ifndef __section
# define __section(NAME) \
__attribute__((section(NAME), used))
#endif
#ifndef __section_tail
# define __section_tail(ID, KEY) \
__section(__stringify(ID) "/" __stringify(KEY))
#endif
#ifndef __section_cls_entry
# define __section_cls_entry \
__section("classifier")
#endif
#ifndef __section_act_entry
# define __section_act_entry \
__section("action")
#endif
#ifndef __section_license
# define __section_license \
__section("license")
#endif
#ifndef __section_maps
# define __section_maps \
__section("maps")
#endif
/** Declaration helper macros. */
#ifndef BPF_LICENSE
# define BPF_LICENSE(NAME) \
char ____license[] __section_license = NAME
#endif
#ifndef BPF_FUNC
# define BPF_FUNC(NAME, ...) \
(*NAME)(__VA_ARGS__) = (void *)BPF_FUNC_##NAME
#endif
static int BPF_FUNC(sock_hash_update, struct bpf_sock_ops *skops, void *map, void *key, uint64_t flags);
static int BPF_FUNC(msg_redirect_hash, struct sk_msg_md *md, void *map, void *key, uint64_t flags);
static void BPF_FUNC(trace_printk, const char *fmt, int fmt_size, ...);
#ifndef printk
# define printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__); \
})
#endif
struct bpf_map_def {
__u32 type;
__u32 key_size;
__u32 value_size;
__u32 max_entries;
__u32 map_flags;
};
union v6addr {
struct {
__u32 p1;
__u32 p2;
__u32 p3;
__u32 p4;
};
__u8 addr[16];
};
struct sock_key {
union {
struct {
__u32 sip4;
__u32 pad1;
__u32 pad2;
__u32 pad3;
};
union v6addr sip6;
};
union {
struct {
__u32 dip4;
__u32 pad4;
__u32 pad5;
__u32 pad6;
};
union v6addr dip6;
};
__u8 family;
__u8 pad7;
__u16 pad8;
__u32 sport;
__u32 dport;
} __attribute__((packed));
struct bpf_map_def __section_maps sock_ops_map = {
.type = BPF_MAP_TYPE_SOCKHASH,
.key_size = sizeof(struct sock_key),
.value_size = sizeof(int),
.max_entries = 65535,
.map_flags = 0,
};
static inline void sk_extract4_key(struct bpf_sock_ops *ops,
struct sock_key *key)
{
key->dip4 = ops->remote_ip4;
key->sip4 = ops->local_ip4;
key->family = 1;
key->sport = (bpf_htonl(ops->local_port) >> 16);
key->dport = ops->remote_port >> 16;
}
static inline void sk_msg_extract4_key(struct sk_msg_md *msg,
struct sock_key *key)
{
key->sip4 = msg->remote_ip4;
key->dip4 = msg->local_ip4;
key->family = 1;
key->dport = (bpf_htonl(msg->local_port) >> 16);
key->sport = msg->remote_port >> 16;
}

View File

@@ -0,0 +1,3 @@
FROM envoyproxy/envoy:latest
COPY envoy.yaml /etc/envoy/envoy.yaml
EXPOSE 9901

View File

@@ -0,0 +1,30 @@
admin:
access_log_path: /tmp/admin_access.log
address:
socket_address:
protocol: TCP
address: 0.0.0.0
port_value: 9901
static_resources:
listeners:
- name: iperf3-listener
address:
socket_address:
protocol: TCP
address: 0.0.0.0
port_value: 10000
filter_chains:
- filters:
- name: envoy.tcp_proxy
config:
stat_prefix: iperf3-listener
cluster: iperf3_server
clusters:
- name: iperf3_server
connect_timeout: 1.0s
type: static
lb_policy: ROUND_ROBIN
hosts:
- socket_address:
address: 127.0.0.1
port_value: 5201

20
src/29-sockops/load.sh Executable file
View File

@@ -0,0 +1,20 @@
#!/bin/bash
set -x
set -e
# Mount bpf filesystem
sudo mount -t bpf bpf /sys/fs/bpf/
# Load the bpf_sockops program
sudo bpftool prog load bpf_sockops.o "/sys/fs/bpf/bpf_sockop"
sudo bpftool cgroup attach "/sys/fs/cgroup/unified/" sock_ops pinned "/sys/fs/bpf/bpf_sockop"
MAP_ID=$(sudo bpftool prog show pinned "/sys/fs/bpf/bpf_sockop" | grep -o -E 'map_ids [0-9]+' | awk '{print $2}')
sudo bpftool map pin id $MAP_ID "/sys/fs/bpf/sock_ops_map"
# Load the bpf_redir program
if [ -z $1 ]
then
sudo bpftool prog load bpf_redir.o "/sys/fs/bpf/bpf_redir" map name sock_ops_map pinned "/sys/fs/bpf/sock_ops_map"
sudo bpftool prog attach pinned "/sys/fs/bpf/bpf_redir" msg_verdict pinned "/sys/fs/bpf/sock_ops_map"
fi

Binary file not shown.

After

Width:  |  Height:  |  Size: 203 KiB

2
src/29-sockops/trace.sh Executable file
View File

@@ -0,0 +1,2 @@
#!/bin/bash
sudo cat /sys/kernel/debug/tracing/trace_pipe

13
src/29-sockops/unload.sh Executable file
View File

@@ -0,0 +1,13 @@
#!/bin/bash
set -x
# UnLoad the bpf_redir program
sudo bpftool prog detach pinned "/sys/fs/bpf/bpf_redir" msg_verdict pinned "/sys/fs/bpf/sock_ops_map"
sudo rm "/sys/fs/bpf/bpf_redir"
# UnLoad the bpf_sockops program
sudo bpftool cgroup detach "/sys/fs/cgroup/unified/" sock_ops pinned "/sys/fs/bpf/bpf_sockop"
sudo rm "/sys/fs/bpf/bpf_sockop"
# Delete the map
sudo rm "/sys/fs/bpf/sock_ops_map"