linited markdown documents

This commit is contained in:
officeyutong
2023-01-22 00:39:51 +08:00
committed by 云微
parent 325793dab0
commit 7b37adae07
3 changed files with 122 additions and 116 deletions

View File

@@ -1,4 +1,4 @@
## eBPF 入门实践教程
# eBPF 入门实践教程
## 备注
@@ -8,7 +8,7 @@
origin from:
https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c
<https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c>
## Compile and Run
@@ -30,10 +30,10 @@ TODO: support union in C
Demonstrations of tcpconnect, the Linux eBPF/bcc version.
This tool traces the kernel function performing active TCP connections
(eg, via a connect() syscall; accept() are passive connections). Some example
output (IP addresses changed to protect the innocent):
```console
# ./tcpconnect
PID COMM IP SADDR DADDR DPORT
@@ -43,6 +43,7 @@ PID COMM IP SADDR DADDR DPORT
1991 telnet 6 ::1 ::1 23
2015 ssh 6 fe80::2000:bff:fe82:3ac fe80::2000:bff:fe82:3ac 22
```
This output shows four connections, one from a "telnet" process, two from
"curl", and one from "ssh". The output details shows the IP version, source
address, destination address, and destination port. This traces attempted
@@ -52,8 +53,8 @@ The overhead of this tool should be negligible, since it is only tracing the
kernel functions performing connect. It is not tracing every packet and then
filtering.
The -t option prints a timestamp column:
```console
# ./tcpconnect -t
TIME(s) PID COMM IP SADDR DADDR DPORT
@@ -64,6 +65,7 @@ TIME(s) PID COMM IP SADDR DADDR DPORT
90.928 2482 local_agent 4 10.103.219.236 10.102.64.230 7001
90.938 2482 local_agent 4 10.103.219.236 10.115.167.169 7101
```
The output shows some periodic connections (or attempts) from a "local_agent"
process to various other addresses. A few connections occur every minute.
@@ -74,6 +76,7 @@ in this column. Queries for 127.0.0.1 and ::1 are automatically associated with
"localhost". If the time between when the DNS response was received and a
connect call was traced exceeds 100ms, the tool will print the time delta
after the query name. See below for www.domain.com for an example.
```console
# ./tcpconnect -d
PID COMM IP SADDR DADDR DPORT QUERY
@@ -86,6 +89,7 @@ PID COMM IP SADDR DADDR DPORT QUERY
```
The -L option prints a LPORT column:
```console
# ./tcpconnect -L
PID COMM IP SADDR LPORT DADDR DPORT
@@ -95,6 +99,7 @@ PID COMM IP SADDR LPORT DADDR DPORT
```
The -U option prints a UID column:
```console
# ./tcpconnect -U
UID PID COMM IP SADDR DADDR DPORT
@@ -105,14 +110,17 @@ UID PID COMM IP SADDR DADDR DPORT
```
The -u option filtering UID:
```console
# ./tcpconnect -Uu 1000
UID PID COMM IP SADDR DADDR DPORT
1000 31338 telnet 6 ::1 ::1 23
1000 31338 telnet 4 127.0.0.1 127.0.0.1 23
```
To spot heavy outbound connections quickly one can use the -c flag. It will
count all active connections per source ip and destination ip/port.
```console
# ./tcpconnect.py -c
Tracing connect ... Hit Ctrl-C to end
@@ -126,17 +134,18 @@ LADDR RADDR RPORT CONNECTS
The --cgroupmap option filters based on a cgroup set. It is meant to be used
with an externally created map.
```console
# ./tcpconnect --cgroupmap /sys/fs/bpf/test01
```
For more details, see docs/special_filtering.md
For more details, see docs/special_filtering.md
## eBPF入门实践教程使用 libbpf-bootstrap 开发程序统计 TCP 连接延时
## 来源
修改自 https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c
修改自 <https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c>
## 编译运行
@@ -147,7 +156,8 @@ For more details, see docs/special_filtering.md
- ```sudo ./tcpconnlat```
## 效果
```
```plain
root@yutong-VirtualBox:~/libbpf-bootstrap/examples/c# ./tcpconnlat
PID COMM IP SADDR DADDR DPORT LAT(ms)
222564 wget 4 192.168.88.15 110.242.68.3 80 25.29

View File

@@ -1,4 +1,6 @@
## eBPF 入门实践教程:编写 eBPF 程序 tcpconnlat 测量 tcp 连接延时
# eBPF 入门实践教程:编写 eBPF 程序 tcpconnlat 测量 tcp 连接延时
## 代码解释
### 背景
@@ -36,10 +38,9 @@ tcp 连接的整个过程如图所示:
- 半连接队列,也称 SYN 队列;
- 全连接队列,也称 accepet 队列;
服务端收到客户端发起的 SYN 请求后,内核会把该连接存储到半连接队列,并向客户端响应 SYN+ACK接着客户端会返回 ACK服务端收到第三次握手的 ACK 后,内核会把连接从半连接队列移除,然后创建新的完全的连接,并将其添加到 accept 队列,等待进程调用 accept 函数时把连接取出来。
我们的 ebpf 代码实现在 https://github.com/yunwei37/Eunomia/blob/master/bpftools/tcpconnlat/tcpconnlat.bpf.c 中:
我们的 ebpf 代码实现在 <https://github.com/yunwei37/Eunomia/blob/master/bpftools/tcpconnlat/tcpconnlat.bpf.c> 中:
它主要使用了 trace_tcp_rcv_state_process 和 kprobe/tcp_v4_connect 这样的跟踪点:
@@ -48,19 +49,19 @@ tcp 连接的整个过程如图所示:
SEC("kprobe/tcp_v4_connect")
int BPF_KPROBE(tcp_v4_connect, struct sock *sk)
{
return trace_connect(sk);
return trace_connect(sk);
}
SEC("kprobe/tcp_v6_connect")
int BPF_KPROBE(tcp_v6_connect, struct sock *sk)
{
return trace_connect(sk);
return trace_connect(sk);
}
SEC("kprobe/tcp_rcv_state_process")
int BPF_KPROBE(tcp_rcv_state_process, struct sock *sk)
{
return handle_tcp_rcv_state_process(ctx, sk);
return handle_tcp_rcv_state_process(ctx, sk);
}
```
@@ -68,25 +69,25 @@ int BPF_KPROBE(tcp_rcv_state_process, struct sock *sk)
```c
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 4096);
__type(key, struct sock *);
__type(value, struct piddata);
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 4096);
__type(key, struct sock *);
__type(value, struct piddata);
} start SEC(".maps");
static int trace_connect(struct sock *sk)
{
u32 tgid = bpf_get_current_pid_tgid() >> 32;
struct piddata piddata = {};
u32 tgid = bpf_get_current_pid_tgid() >> 32;
struct piddata piddata = {};
if (targ_tgid && targ_tgid != tgid)
return 0;
if (targ_tgid && targ_tgid != tgid)
return 0;
bpf_get_current_comm(&piddata.comm, sizeof(piddata.comm));
piddata.ts = bpf_ktime_get_ns();
piddata.tgid = tgid;
bpf_map_update_elem(&start, &sk, &piddata, 0);
return 0;
bpf_get_current_comm(&piddata.comm, sizeof(piddata.comm));
piddata.ts = bpf_ktime_get_ns();
piddata.tgid = tgid;
bpf_map_update_elem(&start, &sk, &piddata, 0);
return 0;
}
```
@@ -95,48 +96,48 @@ static int trace_connect(struct sock *sk)
```c
static int handle_tcp_rcv_state_process(void *ctx, struct sock *sk)
{
struct piddata *piddatap;
struct event event = {};
s64 delta;
u64 ts;
struct piddata *piddatap;
struct event event = {};
s64 delta;
u64 ts;
if (BPF_CORE_READ(sk, __sk_common.skc_state) != TCP_SYN_SENT)
return 0;
if (BPF_CORE_READ(sk, __sk_common.skc_state) != TCP_SYN_SENT)
return 0;
piddatap = bpf_map_lookup_elem(&start, &sk);
if (!piddatap)
return 0;
piddatap = bpf_map_lookup_elem(&start, &sk);
if (!piddatap)
return 0;
ts = bpf_ktime_get_ns();
delta = (s64)(ts - piddatap->ts);
if (delta < 0)
goto cleanup;
ts = bpf_ktime_get_ns();
delta = (s64)(ts - piddatap->ts);
if (delta < 0)
goto cleanup;
event.delta_us = delta / 1000U;
if (targ_min_us && event.delta_us < targ_min_us)
goto cleanup;
__builtin_memcpy(&event.comm, piddatap->comm,
sizeof(event.comm));
event.ts_us = ts / 1000;
event.tgid = piddatap->tgid;
event.lport = BPF_CORE_READ(sk, __sk_common.skc_num);
event.dport = BPF_CORE_READ(sk, __sk_common.skc_dport);
event.af = BPF_CORE_READ(sk, __sk_common.skc_family);
if (event.af == AF_INET) {
event.saddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr);
event.daddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_daddr);
} else {
BPF_CORE_READ_INTO(&event.saddr_v6, sk,
__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
BPF_CORE_READ_INTO(&event.daddr_v6, sk,
__sk_common.skc_v6_daddr.in6_u.u6_addr32);
}
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&event, sizeof(event));
event.delta_us = delta / 1000U;
if (targ_min_us && event.delta_us < targ_min_us)
goto cleanup;
__builtin_memcpy(&event.comm, piddatap->comm,
sizeof(event.comm));
event.ts_us = ts / 1000;
event.tgid = piddatap->tgid;
event.lport = BPF_CORE_READ(sk, __sk_common.skc_num);
event.dport = BPF_CORE_READ(sk, __sk_common.skc_dport);
event.af = BPF_CORE_READ(sk, __sk_common.skc_family);
if (event.af == AF_INET) {
event.saddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr);
event.daddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_daddr);
} else {
BPF_CORE_READ_INTO(&event.saddr_v6, sk,
__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
BPF_CORE_READ_INTO(&event.daddr_v6, sk,
__sk_common.skc_v6_daddr.in6_u.u6_addr32);
}
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&event, sizeof(event));
cleanup:
bpf_map_delete_elem(&start, &sk);
return 0;
bpf_map_delete_elem(&start, &sk);
return 0;
}
```
@@ -162,7 +163,7 @@ PID COMM IP SRC DEST PORT LAT(ms) CONATINER
使用下述查询命令即可看到延时的统计图表:
```
```plain
rate(eunomia_observed_tcpconnlat_v4_histogram_sum[5m])
/
rate(eunomia_observed_tcpconnlat_v4_histogram_count[5m])
@@ -178,9 +179,9 @@ PID COMM IP SRC DEST PORT LAT(ms) CONATINER
> `Eunomia` 是一个使用 C/C++ 开发的基于 eBPF的轻量级高性能云原生监控工具旨在帮助用户了解容器的各项行为、监控可疑的容器安全事件力求提供覆盖容器全生命周期的轻量级开源监控解决方案。它使用 `Linux` `eBPF` 技术在运行时跟踪您的系统和应用程序,并分析收集的事件以检测可疑的行为模式。目前,它包含性能分析、容器集群网络可视化分析*、容器安全感知告警、一键部署、持久化存储监控等功能,提供了多样化的 ebpf 追踪点。其核心导出器/命令行工具最小仅需要约 4MB 大小的二进制程序,即可在支持的 Linux 内核上启动。
项目地址https://github.com/yunwei37/Eunomia
项目地址:<https://github.com/yunwei37/Eunomia>
### 参考资料
1. http://kerneltravel.net/blog/2020/tcpconnlat/
2. https://network.51cto.com/article/640631.html
1. <http://kerneltravel.net/blog/2020/tcpconnlat/>
2. <https://network.51cto.com/article/640631.html>

View File

@@ -1,10 +1,10 @@
## eBPF 入门实践教程
# eBPF 入门实践教程
## origin
origin from:
https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c
<https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c>
## Compile and Run
@@ -13,6 +13,7 @@ Compile:
```shell
docker run -it -v `pwd`/:/src/ yunwei37/ebpm:latest
```
Run:
```shell
@@ -23,9 +24,9 @@ sudo ./ecli run package.json
Demonstrations of tcpstates, the Linux BPF/bcc version.
tcpstates prints TCP state change information, including the duration in each
state as milliseconds. For example, a single TCP session:
```console
# tcpstates
SKADDR C-PID C-COMM LADDR LPORT RADDR RPORT OLDSTATE -> NEWSTATE MS
@@ -36,6 +37,7 @@ ffff9fd7e8192000 0 swapper/5 100.66.100.185 63446 52.33.159.26 80 FI
ffff9fd7e8192000 0 swapper/5 100.66.100.185 63446 52.33.159.26 80 FIN_WAIT2 -> CLOSE 0.006
^C
```
This showed that the most time was spent in the ESTABLISHED state (which then
transitioned to FIN_WAIT1), which was 176.042 milliseconds.
@@ -49,7 +51,7 @@ process context. If that's not the case, they may show kernel details.
## 来源
修改自 https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpstates.bpf.c
修改自 <https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpstates.bpf.c>
## 编译运行
@@ -60,7 +62,8 @@ process context. If that's not the case, they may show kernel details.
- ```sudo ./tcpstates```
## 效果
```
```plain
root@yutong-VirtualBox:~/libbpf-bootstrap/examples/c# ./tcpstates
SKADDR PID COMM LADDR LPORT RADDR RPORT OLDSTATE -> NEWSTATE MS
ffff9bf61bb62bc0 164978 node 192.168.88.15 0 52.178.17.2 443 CLOSE -> SYN_SENT 0.000
@@ -87,73 +90,65 @@ int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx)
在套接字改变状态处附加一个eBPF跟踪函数。
```c
if (ctx->protocol != IPPROTO_TCP)
return 0;
if (ctx->protocol != IPPROTO_TCP)
return 0;
if (target_family && target_family != family)
return 0;
if (target_family && target_family != family)
return 0;
if (filter_by_sport && !bpf_map_lookup_elem(&sports, &sport))
return 0;
if (filter_by_sport && !bpf_map_lookup_elem(&sports, &sport))
return 0;
if (filter_by_dport && !bpf_map_lookup_elem(&dports, &dport))
return 0;
if (filter_by_dport && !bpf_map_lookup_elem(&dports, &dport))
return 0;
```
跟踪函数被调用后,先判断当前改变状态的套接字是否满足我们需要的过滤条件,如果不满足则不进行记录。
```c
tsp = bpf_map_lookup_elem(&timestamps, &sk);
ts = bpf_ktime_get_ns();
if (!tsp)
delta_us = 0;
else
delta_us = (ts - *tsp) / 1000;
tsp = bpf_map_lookup_elem(&timestamps, &sk);
ts = bpf_ktime_get_ns();
if (!tsp)
delta_us = 0;
else
delta_us = (ts - *tsp) / 1000;
event.skaddr = (__u64)sk;
event.ts_us = ts / 1000;
event.delta_us = delta_us;
event.pid = bpf_get_current_pid_tgid() >> 32;
event.oldstate = ctx->oldstate;
event.newstate = ctx->newstate;
event.family = family;
event.sport = sport;
event.dport = dport;
bpf_get_current_comm(&event.task, sizeof(event.task));
event.skaddr = (__u64)sk;
event.ts_us = ts / 1000;
event.delta_us = delta_us;
event.pid = bpf_get_current_pid_tgid() >> 32;
event.oldstate = ctx->oldstate;
event.newstate = ctx->newstate;
event.family = family;
event.sport = sport;
event.dport = dport;
bpf_get_current_comm(&event.task, sizeof(event.task));
if (family == AF_INET) {
bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr);
bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr);
} else { /* family == AF_INET6 */
bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
}
if (family == AF_INET) {
bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr);
bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr);
} else { /* family == AF_INET6 */
bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
}
```
使用状态改变相关填充event结构体。
- 此处使用了```libbpf``` 的 CO-RE 支持。
```c
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
```
将事件结构体发送至用户态程序。
```c
if (ctx->newstate == TCP_CLOSE)
bpf_map_delete_elem(&timestamps, &sk);
else
bpf_map_update_elem(&timestamps, &sk, &ts, BPF_ANY);
if (ctx->newstate == TCP_CLOSE)
bpf_map_delete_elem(&timestamps, &sk);
else
bpf_map_update_elem(&timestamps, &sk, &ts, BPF_ANY);
```
根据这个TCP链接的新状态决定是更新下时间戳记录还是不再记录它的时间戳。
@@ -213,4 +208,4 @@ static void handle_lost_events(void* ctx, int cpu, __u64 lost_cnt) {
}
```
收到事件后所调用对应的处理函数并进行输出打印。
收到事件后所调用对应的处理函数并进行输出打印。