From 325793dab0f711c73a0b454c40eb3eb4ad63dd9b Mon Sep 17 00:00:00 2001 From: officeyutong <9004058+Officeyutong@users.noreply.github.com> Date: Sun, 22 Jan 2023 00:35:48 +0800 Subject: [PATCH] move things to README.md --- 13-tcpconnlat/README.md | 28 +++ 13-tcpconnlat/tcpconnlat-libbpf-bootstrap.md | 27 --- 14-tcpstates/README.md | 173 ++++++++++++++++++- 14-tcpstates/tcpstates-libbpf-bootstrap.md | 169 ------------------ 4 files changed, 197 insertions(+), 200 deletions(-) delete mode 100644 13-tcpconnlat/tcpconnlat-libbpf-bootstrap.md delete mode 100644 14-tcpstates/tcpstates-libbpf-bootstrap.md diff --git a/13-tcpconnlat/README.md b/13-tcpconnlat/README.md index 989e6b6..d952d5e 100644 --- a/13-tcpconnlat/README.md +++ b/13-tcpconnlat/README.md @@ -131,3 +131,31 @@ with an externally created map. ``` For more details, see docs/special_filtering.md + +## eBPF入门实践教程:使用 libbpf-bootstrap 开发程序统计 TCP 连接延时 + +## 来源 + +修改自 https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c + +## 编译运行 + +- ```git clone https://github.com/libbpf/libbpf-bootstrap libbpf-bootstrap-cloned``` +- 将 [libbpf-bootstrap](libbpf-bootstrap)目录下的文件复制到 ```libbpf-bootstrap-cloned/examples/c```下 +- 修改 ```libbpf-bootstrap-cloned/examples/c/Makefile``` ,在其 ```APPS``` 项后添加 ```tcpconnlat``` +- 在 ```libbpf-bootstrap-cloned/examples/c``` 下运行 ```make tcpconnlat``` +- ```sudo ./tcpconnlat``` + +## 效果 +``` +root@yutong-VirtualBox:~/libbpf-bootstrap/examples/c# ./tcpconnlat +PID COMM IP SADDR DADDR DPORT LAT(ms) +222564 wget 4 192.168.88.15 110.242.68.3 80 25.29 +222684 wget 4 192.168.88.15 167.179.101.42 443 246.76 +222726 ssh 4 192.168.88.15 167.179.101.42 22 241.17 +222774 ssh 4 192.168.88.15 1.15.149.151 22 25.31 +``` + +对于输出的详细解释,详见 [README.md](README.md) + +对于源代码的详解,具体见 [tcpconnlat.md](tcpconnlat.md) diff --git a/13-tcpconnlat/tcpconnlat-libbpf-bootstrap.md b/13-tcpconnlat/tcpconnlat-libbpf-bootstrap.md deleted file mode 100644 index e0c3fb0..0000000 --- a/13-tcpconnlat/tcpconnlat-libbpf-bootstrap.md +++ /dev/null @@ -1,27 +0,0 @@ -## eBPF入门实践教程:使用 libbpf-bootstrap 开发程序统计 TCP 连接延时 - -## 来源 - -修改自 https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c - -## 编译运行 - -- ```git clone https://github.com/libbpf/libbpf-bootstrap libbpf-bootstrap-cloned``` -- 将 [libbpf-bootstrap](libbpf-bootstrap)目录下的文件复制到 ```libbpf-bootstrap-cloned/examples/c```下 -- 修改 ```libbpf-bootstrap-cloned/examples/c/Makefile``` ,在其 ```APPS``` 项后添加 ```tcpconnlat``` -- 在 ```libbpf-bootstrap-cloned/examples/c``` 下运行 ```make tcpconnlat``` -- ```sudo ./tcpconnlat``` - -## 效果 -``` -root@yutong-VirtualBox:~/libbpf-bootstrap/examples/c# ./tcpconnlat -PID COMM IP SADDR DADDR DPORT LAT(ms) -222564 wget 4 192.168.88.15 110.242.68.3 80 25.29 -222684 wget 4 192.168.88.15 167.179.101.42 443 246.76 -222726 ssh 4 192.168.88.15 167.179.101.42 22 241.17 -222774 ssh 4 192.168.88.15 1.15.149.151 22 25.31 -``` - -对于输出的详细解释,详见 [README.md](README.md) - -对于源代码的详解,具体见 [tcpconnlat.md](tcpconnlat.md) diff --git a/14-tcpstates/README.md b/14-tcpstates/README.md index 7d08e81..bba8df5 100644 --- a/14-tcpstates/README.md +++ b/14-tcpstates/README.md @@ -1,9 +1,5 @@ ## eBPF 入门实践教程: -## 备注 - -对于使用 `libbpf-bootstrap` 的开发,具体见 [tcpstates-libbpf-bootstrap.md](tcpstates-libbpf-bootstrap.md) - ## origin origin from: @@ -49,3 +45,172 @@ process ID and command name: these may show the process that owns the TCP session, depending on whether the state change executes synchronously in process context. If that's not the case, they may show kernel details. +## eBPF入门实践教程:使用 libbpf-bootstrap 开发程序统计 TCP 连接延时 + +## 来源 + +修改自 https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpstates.bpf.c + +## 编译运行 + +- ```git clone https://github.com/libbpf/libbpf-bootstrap libbpf-bootstrap-cloned``` +- 将 [libbpf-bootstrap](libbpf-bootstrap)目录下的文件复制到 ```libbpf-bootstrap-cloned/examples/c```下 +- 修改 ```libbpf-bootstrap-cloned/examples/c/Makefile``` ,在其 ```APPS``` 项后添加 ```tcpstates``` +- 在 ```libbpf-bootstrap-cloned/examples/c``` 下运行 ```make tcpstates``` +- ```sudo ./tcpstates``` + +## 效果 +``` +root@yutong-VirtualBox:~/libbpf-bootstrap/examples/c# ./tcpstates +SKADDR PID COMM LADDR LPORT RADDR RPORT OLDSTATE -> NEWSTATE MS +ffff9bf61bb62bc0 164978 node 192.168.88.15 0 52.178.17.2 443 CLOSE -> SYN_SENT 0.000 +ffff9bf61bb62bc0 0 swapper/0 192.168.88.15 41596 52.178.17.2 443 SYN_SENT -> ESTABLISHED 225.794 +ffff9bf61bb62bc0 0 swapper/0 192.168.88.15 41596 52.178.17.2 443 ESTABLISHED -> CLOSE_WAIT 901.454 +ffff9bf61bb62bc0 164978 node 192.168.88.15 41596 52.178.17.2 443 CLOSE_WAIT -> LAST_ACK 0.793 +ffff9bf61bb62bc0 164978 node 192.168.88.15 41596 52.178.17.2 443 LAST_ACK -> LAST_ACK 0.086 +ffff9bf61bb62bc0 228759 kworker/u6 192.168.88.15 41596 52.178.17.2 443 LAST_ACK -> CLOSE 0.193 +ffff9bf6d8ee88c0 229832 redis-serv 0.0.0.0 6379 0.0.0.0 0 CLOSE -> LISTEN 0.000 +ffff9bf6d8ee88c0 229832 redis-serv 0.0.0.0 6379 0.0.0.0 0 LISTEN -> CLOSE 1.763 +ffff9bf7109d6900 88750 node 127.0.0.1 39755 127.0.0.1 50966 ESTABLISHED -> FIN_WAIT1 0.000 +``` + +对于输出的详细解释,详见 [README.md](README.md) + +## ```tcpstates.bpf.c``` 的解释 + +```tcpstates``` 是一个追踪当前系统上的TCP套接字的TCP状态的程序,主要通过跟踪内核跟踪点 ```inet_sock_set_state``` 来实现。统计数据通过 ```perf_event```向用户态传输。 + +```c +SEC("tracepoint/sock/inet_sock_set_state") +int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx) +``` + +在套接字改变状态处附加一个eBPF跟踪函数。 + + + +```c + if (ctx->protocol != IPPROTO_TCP) + return 0; + + if (target_family && target_family != family) + return 0; + + if (filter_by_sport && !bpf_map_lookup_elem(&sports, &sport)) + return 0; + + if (filter_by_dport && !bpf_map_lookup_elem(&dports, &dport)) + return 0; +``` + +跟踪函数被调用后,先判断当前改变状态的套接字是否满足我们需要的过滤条件,如果不满足则不进行记录。 + + + +```c + tsp = bpf_map_lookup_elem(×tamps, &sk); + ts = bpf_ktime_get_ns(); + if (!tsp) + delta_us = 0; + else + delta_us = (ts - *tsp) / 1000; + + event.skaddr = (__u64)sk; + event.ts_us = ts / 1000; + event.delta_us = delta_us; + event.pid = bpf_get_current_pid_tgid() >> 32; + event.oldstate = ctx->oldstate; + event.newstate = ctx->newstate; + event.family = family; + event.sport = sport; + event.dport = dport; + bpf_get_current_comm(&event.task, sizeof(event.task)); + + if (family == AF_INET) { + bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr); + bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr); + } else { /* family == AF_INET6 */ + bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); + bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); + } +``` + +使用状态改变相关填充event结构体。 + +- 此处使用了```libbpf``` 的 CO-RE 支持。 + + + +```c + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); +``` + +将事件结构体发送至用户态程序。 + + + +```c + if (ctx->newstate == TCP_CLOSE) + bpf_map_delete_elem(×tamps, &sk); + else + bpf_map_update_elem(×tamps, &sk, &ts, BPF_ANY); +``` + +根据这个TCP链接的新状态,决定是更新下时间戳记录还是不再记录它的时间戳。 + +### 对于用户态程序 + +```c + while (!exiting) { + err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS); + if (err < 0 && err != -EINTR) { + warn("error polling perf buffer: %s\n", strerror(-err)); + goto cleanup; + } + /* reset err to return 0 if exiting */ + err = 0; + } +``` + +不停轮询内核程序所发过来的 ```perf event```。 + +```c +static void handle_event(void* ctx, int cpu, void* data, __u32 data_sz) { + char ts[32], saddr[26], daddr[26]; + struct event* e = data; + struct tm* tm; + int family; + time_t t; + + if (emit_timestamp) { + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%H:%M:%S", tm); + printf("%8s ", ts); + } + + inet_ntop(e->family, &e->saddr, saddr, sizeof(saddr)); + inet_ntop(e->family, &e->daddr, daddr, sizeof(daddr)); + if (wide_output) { + family = e->family == AF_INET ? 4 : 6; + printf( + "%-16llx %-7d %-16s %-2d %-26s %-5d %-26s %-5d %-11s -> %-11s " + "%.3f\n", + e->skaddr, e->pid, e->task, family, saddr, e->sport, daddr, + e->dport, tcp_states[e->oldstate], tcp_states[e->newstate], + (double)e->delta_us / 1000); + } else { + printf( + "%-16llx %-7d %-10.10s %-15s %-5d %-15s %-5d %-11s -> %-11s %.3f\n", + e->skaddr, e->pid, e->task, saddr, e->sport, daddr, e->dport, + tcp_states[e->oldstate], tcp_states[e->newstate], + (double)e->delta_us / 1000); + } +} + +static void handle_lost_events(void* ctx, int cpu, __u64 lost_cnt) { + warn("lost %llu events on CPU #%d\n", lost_cnt, cpu); +} +``` + +收到事件后所调用对应的处理函数并进行输出打印。 \ No newline at end of file diff --git a/14-tcpstates/tcpstates-libbpf-bootstrap.md b/14-tcpstates/tcpstates-libbpf-bootstrap.md deleted file mode 100644 index 72222a2..0000000 --- a/14-tcpstates/tcpstates-libbpf-bootstrap.md +++ /dev/null @@ -1,169 +0,0 @@ -## eBPF入门实践教程:使用 libbpf-bootstrap 开发程序统计 TCP 连接延时 - -## 来源 - -修改自 https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpstates.bpf.c - -## 编译运行 - -- ```git clone https://github.com/libbpf/libbpf-bootstrap libbpf-bootstrap-cloned``` -- 将 [libbpf-bootstrap](libbpf-bootstrap)目录下的文件复制到 ```libbpf-bootstrap-cloned/examples/c```下 -- 修改 ```libbpf-bootstrap-cloned/examples/c/Makefile``` ,在其 ```APPS``` 项后添加 ```tcpstates``` -- 在 ```libbpf-bootstrap-cloned/examples/c``` 下运行 ```make tcpstates``` -- ```sudo ./tcpstates``` - -## 效果 -``` -root@yutong-VirtualBox:~/libbpf-bootstrap/examples/c# ./tcpstates -SKADDR PID COMM LADDR LPORT RADDR RPORT OLDSTATE -> NEWSTATE MS -ffff9bf61bb62bc0 164978 node 192.168.88.15 0 52.178.17.2 443 CLOSE -> SYN_SENT 0.000 -ffff9bf61bb62bc0 0 swapper/0 192.168.88.15 41596 52.178.17.2 443 SYN_SENT -> ESTABLISHED 225.794 -ffff9bf61bb62bc0 0 swapper/0 192.168.88.15 41596 52.178.17.2 443 ESTABLISHED -> CLOSE_WAIT 901.454 -ffff9bf61bb62bc0 164978 node 192.168.88.15 41596 52.178.17.2 443 CLOSE_WAIT -> LAST_ACK 0.793 -ffff9bf61bb62bc0 164978 node 192.168.88.15 41596 52.178.17.2 443 LAST_ACK -> LAST_ACK 0.086 -ffff9bf61bb62bc0 228759 kworker/u6 192.168.88.15 41596 52.178.17.2 443 LAST_ACK -> CLOSE 0.193 -ffff9bf6d8ee88c0 229832 redis-serv 0.0.0.0 6379 0.0.0.0 0 CLOSE -> LISTEN 0.000 -ffff9bf6d8ee88c0 229832 redis-serv 0.0.0.0 6379 0.0.0.0 0 LISTEN -> CLOSE 1.763 -ffff9bf7109d6900 88750 node 127.0.0.1 39755 127.0.0.1 50966 ESTABLISHED -> FIN_WAIT1 0.000 -``` - -对于输出的详细解释,详见 [README.md](README.md) - -## ```tcpstates.bpf.c``` 的解释 - -```tcpstates``` 是一个追踪当前系统上的TCP套接字的TCP状态的程序,主要通过跟踪内核跟踪点 ```inet_sock_set_state``` 来实现。统计数据通过 ```perf_event```向用户态传输。 - -```c -SEC("tracepoint/sock/inet_sock_set_state") -int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx) -``` - -在套接字改变状态处附加一个eBPF跟踪函数。 - - - -```c - if (ctx->protocol != IPPROTO_TCP) - return 0; - - if (target_family && target_family != family) - return 0; - - if (filter_by_sport && !bpf_map_lookup_elem(&sports, &sport)) - return 0; - - if (filter_by_dport && !bpf_map_lookup_elem(&dports, &dport)) - return 0; -``` - -跟踪函数被调用后,先判断当前改变状态的套接字是否满足我们需要的过滤条件,如果不满足则不进行记录。 - - - -```c - tsp = bpf_map_lookup_elem(×tamps, &sk); - ts = bpf_ktime_get_ns(); - if (!tsp) - delta_us = 0; - else - delta_us = (ts - *tsp) / 1000; - - event.skaddr = (__u64)sk; - event.ts_us = ts / 1000; - event.delta_us = delta_us; - event.pid = bpf_get_current_pid_tgid() >> 32; - event.oldstate = ctx->oldstate; - event.newstate = ctx->newstate; - event.family = family; - event.sport = sport; - event.dport = dport; - bpf_get_current_comm(&event.task, sizeof(event.task)); - - if (family == AF_INET) { - bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr); - bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr); - } else { /* family == AF_INET6 */ - bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); - bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); - } -``` - -使用状态改变相关填充event结构体。 - -- 此处使用了```libbpf``` 的 CO-RE 支持。 - - - -```c - bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); -``` - -将事件结构体发送至用户态程序。 - - - -```c - if (ctx->newstate == TCP_CLOSE) - bpf_map_delete_elem(×tamps, &sk); - else - bpf_map_update_elem(×tamps, &sk, &ts, BPF_ANY); -``` - -根据这个TCP链接的新状态,决定是更新下时间戳记录还是不再记录它的时间戳。 - -### 对于用户态程序 - -```c - while (!exiting) { - err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS); - if (err < 0 && err != -EINTR) { - warn("error polling perf buffer: %s\n", strerror(-err)); - goto cleanup; - } - /* reset err to return 0 if exiting */ - err = 0; - } -``` - -不停轮询内核程序所发过来的 ```perf event```。 - -```c -static void handle_event(void* ctx, int cpu, void* data, __u32 data_sz) { - char ts[32], saddr[26], daddr[26]; - struct event* e = data; - struct tm* tm; - int family; - time_t t; - - if (emit_timestamp) { - time(&t); - tm = localtime(&t); - strftime(ts, sizeof(ts), "%H:%M:%S", tm); - printf("%8s ", ts); - } - - inet_ntop(e->family, &e->saddr, saddr, sizeof(saddr)); - inet_ntop(e->family, &e->daddr, daddr, sizeof(daddr)); - if (wide_output) { - family = e->family == AF_INET ? 4 : 6; - printf( - "%-16llx %-7d %-16s %-2d %-26s %-5d %-26s %-5d %-11s -> %-11s " - "%.3f\n", - e->skaddr, e->pid, e->task, family, saddr, e->sport, daddr, - e->dport, tcp_states[e->oldstate], tcp_states[e->newstate], - (double)e->delta_us / 1000); - } else { - printf( - "%-16llx %-7d %-10.10s %-15s %-5d %-15s %-5d %-11s -> %-11s %.3f\n", - e->skaddr, e->pid, e->task, saddr, e->sport, daddr, e->dport, - tcp_states[e->oldstate], tcp_states[e->newstate], - (double)e->delta_us / 1000); - } -} - -static void handle_lost_events(void* ctx, int cpu, __u64 lost_cnt) { - warn("lost %llu events on CPU #%d\n", lost_cnt, cpu); -} -``` - -收到事件后所调用对应的处理函数并进行输出打印。 \ No newline at end of file