From 7b37adae0787cbfffa859b7eb12f0705803e978f Mon Sep 17 00:00:00 2001 From: officeyutong <9004058+Officeyutong@users.noreply.github.com> Date: Sun, 22 Jan 2023 00:39:51 +0800 Subject: [PATCH] linited markdown documents --- 13-tcpconnlat/README.md | 24 +++++--- 13-tcpconnlat/tcpconnlat.md | 119 ++++++++++++++++++------------------ 14-tcpstates/README.md | 95 ++++++++++++++-------------- 3 files changed, 122 insertions(+), 116 deletions(-) diff --git a/13-tcpconnlat/README.md b/13-tcpconnlat/README.md index d952d5e..dd4937e 100644 --- a/13-tcpconnlat/README.md +++ b/13-tcpconnlat/README.md @@ -1,4 +1,4 @@ -## eBPF 入门实践教程: +# eBPF 入门实践教程 ## 备注 @@ -8,7 +8,7 @@ origin from: -https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c + ## Compile and Run @@ -30,10 +30,10 @@ TODO: support union in C Demonstrations of tcpconnect, the Linux eBPF/bcc version. - This tool traces the kernel function performing active TCP connections (eg, via a connect() syscall; accept() are passive connections). Some example output (IP addresses changed to protect the innocent): + ```console # ./tcpconnect PID COMM IP SADDR DADDR DPORT @@ -43,6 +43,7 @@ PID COMM IP SADDR DADDR DPORT 1991 telnet 6 ::1 ::1 23 2015 ssh 6 fe80::2000:bff:fe82:3ac fe80::2000:bff:fe82:3ac 22 ``` + This output shows four connections, one from a "telnet" process, two from "curl", and one from "ssh". The output details shows the IP version, source address, destination address, and destination port. This traces attempted @@ -52,8 +53,8 @@ The overhead of this tool should be negligible, since it is only tracing the kernel functions performing connect. It is not tracing every packet and then filtering. - The -t option prints a timestamp column: + ```console # ./tcpconnect -t TIME(s) PID COMM IP SADDR DADDR DPORT @@ -64,6 +65,7 @@ TIME(s) PID COMM IP SADDR DADDR DPORT 90.928 2482 local_agent 4 10.103.219.236 10.102.64.230 7001 90.938 2482 local_agent 4 10.103.219.236 10.115.167.169 7101 ``` + The output shows some periodic connections (or attempts) from a "local_agent" process to various other addresses. A few connections occur every minute. @@ -74,6 +76,7 @@ in this column. Queries for 127.0.0.1 and ::1 are automatically associated with "localhost". If the time between when the DNS response was received and a connect call was traced exceeds 100ms, the tool will print the time delta after the query name. See below for www.domain.com for an example. + ```console # ./tcpconnect -d PID COMM IP SADDR DADDR DPORT QUERY @@ -86,6 +89,7 @@ PID COMM IP SADDR DADDR DPORT QUERY ``` The -L option prints a LPORT column: + ```console # ./tcpconnect -L PID COMM IP SADDR LPORT DADDR DPORT @@ -95,6 +99,7 @@ PID COMM IP SADDR LPORT DADDR DPORT ``` The -U option prints a UID column: + ```console # ./tcpconnect -U UID PID COMM IP SADDR DADDR DPORT @@ -105,14 +110,17 @@ UID PID COMM IP SADDR DADDR DPORT ``` The -u option filtering UID: + ```console # ./tcpconnect -Uu 1000 UID PID COMM IP SADDR DADDR DPORT 1000 31338 telnet 6 ::1 ::1 23 1000 31338 telnet 4 127.0.0.1 127.0.0.1 23 ``` + To spot heavy outbound connections quickly one can use the -c flag. It will count all active connections per source ip and destination ip/port. + ```console # ./tcpconnect.py -c Tracing connect ... Hit Ctrl-C to end @@ -126,17 +134,18 @@ LADDR RADDR RPORT CONNECTS The --cgroupmap option filters based on a cgroup set. It is meant to be used with an externally created map. + ```console # ./tcpconnect --cgroupmap /sys/fs/bpf/test01 ``` -For more details, see docs/special_filtering.md +For more details, see docs/special_filtering.md ## eBPF入门实践教程:使用 libbpf-bootstrap 开发程序统计 TCP 连接延时 ## 来源 -修改自 https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c +修改自 ## 编译运行 @@ -147,7 +156,8 @@ For more details, see docs/special_filtering.md - ```sudo ./tcpconnlat``` ## 效果 -``` + +```plain root@yutong-VirtualBox:~/libbpf-bootstrap/examples/c# ./tcpconnlat PID COMM IP SADDR DADDR DPORT LAT(ms) 222564 wget 4 192.168.88.15 110.242.68.3 80 25.29 diff --git a/13-tcpconnlat/tcpconnlat.md b/13-tcpconnlat/tcpconnlat.md index 9f19bf1..309500b 100644 --- a/13-tcpconnlat/tcpconnlat.md +++ b/13-tcpconnlat/tcpconnlat.md @@ -1,4 +1,6 @@ -## eBPF 入门实践教程:编写 eBPF 程序 tcpconnlat 测量 tcp 连接延时 +# eBPF 入门实践教程:编写 eBPF 程序 tcpconnlat 测量 tcp 连接延时 + +## 代码解释 ### 背景 @@ -36,10 +38,9 @@ tcp 连接的整个过程如图所示: - 半连接队列,也称 SYN 队列; - 全连接队列,也称 accepet 队列; - 服务端收到客户端发起的 SYN 请求后,内核会把该连接存储到半连接队列,并向客户端响应 SYN+ACK,接着客户端会返回 ACK,服务端收到第三次握手的 ACK 后,内核会把连接从半连接队列移除,然后创建新的完全的连接,并将其添加到 accept 队列,等待进程调用 accept 函数时把连接取出来。 -我们的 ebpf 代码实现在 https://github.com/yunwei37/Eunomia/blob/master/bpftools/tcpconnlat/tcpconnlat.bpf.c 中: +我们的 ebpf 代码实现在 中: 它主要使用了 trace_tcp_rcv_state_process 和 kprobe/tcp_v4_connect 这样的跟踪点: @@ -48,19 +49,19 @@ tcp 连接的整个过程如图所示: SEC("kprobe/tcp_v4_connect") int BPF_KPROBE(tcp_v4_connect, struct sock *sk) { - return trace_connect(sk); + return trace_connect(sk); } SEC("kprobe/tcp_v6_connect") int BPF_KPROBE(tcp_v6_connect, struct sock *sk) { - return trace_connect(sk); + return trace_connect(sk); } SEC("kprobe/tcp_rcv_state_process") int BPF_KPROBE(tcp_rcv_state_process, struct sock *sk) { - return handle_tcp_rcv_state_process(ctx, sk); + return handle_tcp_rcv_state_process(ctx, sk); } ``` @@ -68,25 +69,25 @@ int BPF_KPROBE(tcp_rcv_state_process, struct sock *sk) ```c struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, 4096); - __type(key, struct sock *); - __type(value, struct piddata); + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 4096); + __type(key, struct sock *); + __type(value, struct piddata); } start SEC(".maps"); static int trace_connect(struct sock *sk) { - u32 tgid = bpf_get_current_pid_tgid() >> 32; - struct piddata piddata = {}; + u32 tgid = bpf_get_current_pid_tgid() >> 32; + struct piddata piddata = {}; - if (targ_tgid && targ_tgid != tgid) - return 0; + if (targ_tgid && targ_tgid != tgid) + return 0; - bpf_get_current_comm(&piddata.comm, sizeof(piddata.comm)); - piddata.ts = bpf_ktime_get_ns(); - piddata.tgid = tgid; - bpf_map_update_elem(&start, &sk, &piddata, 0); - return 0; + bpf_get_current_comm(&piddata.comm, sizeof(piddata.comm)); + piddata.ts = bpf_ktime_get_ns(); + piddata.tgid = tgid; + bpf_map_update_elem(&start, &sk, &piddata, 0); + return 0; } ``` @@ -95,48 +96,48 @@ static int trace_connect(struct sock *sk) ```c static int handle_tcp_rcv_state_process(void *ctx, struct sock *sk) { - struct piddata *piddatap; - struct event event = {}; - s64 delta; - u64 ts; + struct piddata *piddatap; + struct event event = {}; + s64 delta; + u64 ts; - if (BPF_CORE_READ(sk, __sk_common.skc_state) != TCP_SYN_SENT) - return 0; + if (BPF_CORE_READ(sk, __sk_common.skc_state) != TCP_SYN_SENT) + return 0; - piddatap = bpf_map_lookup_elem(&start, &sk); - if (!piddatap) - return 0; + piddatap = bpf_map_lookup_elem(&start, &sk); + if (!piddatap) + return 0; - ts = bpf_ktime_get_ns(); - delta = (s64)(ts - piddatap->ts); - if (delta < 0) - goto cleanup; + ts = bpf_ktime_get_ns(); + delta = (s64)(ts - piddatap->ts); + if (delta < 0) + goto cleanup; - event.delta_us = delta / 1000U; - if (targ_min_us && event.delta_us < targ_min_us) - goto cleanup; - __builtin_memcpy(&event.comm, piddatap->comm, - sizeof(event.comm)); - event.ts_us = ts / 1000; - event.tgid = piddatap->tgid; - event.lport = BPF_CORE_READ(sk, __sk_common.skc_num); - event.dport = BPF_CORE_READ(sk, __sk_common.skc_dport); - event.af = BPF_CORE_READ(sk, __sk_common.skc_family); - if (event.af == AF_INET) { - event.saddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); - event.daddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_daddr); - } else { - BPF_CORE_READ_INTO(&event.saddr_v6, sk, - __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); - BPF_CORE_READ_INTO(&event.daddr_v6, sk, - __sk_common.skc_v6_daddr.in6_u.u6_addr32); - } - bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, - &event, sizeof(event)); + event.delta_us = delta / 1000U; + if (targ_min_us && event.delta_us < targ_min_us) + goto cleanup; + __builtin_memcpy(&event.comm, piddatap->comm, + sizeof(event.comm)); + event.ts_us = ts / 1000; + event.tgid = piddatap->tgid; + event.lport = BPF_CORE_READ(sk, __sk_common.skc_num); + event.dport = BPF_CORE_READ(sk, __sk_common.skc_dport); + event.af = BPF_CORE_READ(sk, __sk_common.skc_family); + if (event.af == AF_INET) { + event.saddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); + event.daddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_daddr); + } else { + BPF_CORE_READ_INTO(&event.saddr_v6, sk, + __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); + BPF_CORE_READ_INTO(&event.daddr_v6, sk, + __sk_common.skc_v6_daddr.in6_u.u6_addr32); + } + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, + &event, sizeof(event)); cleanup: - bpf_map_delete_elem(&start, &sk); - return 0; + bpf_map_delete_elem(&start, &sk); + return 0; } ``` @@ -162,7 +163,7 @@ PID COMM IP SRC DEST PORT LAT(ms) CONATINER 使用下述查询命令即可看到延时的统计图表: -``` +```plain rate(eunomia_observed_tcpconnlat_v4_histogram_sum[5m]) / rate(eunomia_observed_tcpconnlat_v4_histogram_count[5m]) @@ -178,9 +179,9 @@ PID COMM IP SRC DEST PORT LAT(ms) CONATINER > `Eunomia` 是一个使用 C/C++ 开发的基于 eBPF的轻量级,高性能云原生监控工具,旨在帮助用户了解容器的各项行为、监控可疑的容器安全事件,力求提供覆盖容器全生命周期的轻量级开源监控解决方案。它使用 `Linux` `eBPF` 技术在运行时跟踪您的系统和应用程序,并分析收集的事件以检测可疑的行为模式。目前,它包含性能分析、容器集群网络可视化分析*、容器安全感知告警、一键部署、持久化存储监控等功能,提供了多样化的 ebpf 追踪点。其核心导出器/命令行工具最小仅需要约 4MB 大小的二进制程序,即可在支持的 Linux 内核上启动。 -项目地址:https://github.com/yunwei37/Eunomia +项目地址: ### 参考资料 -1. http://kerneltravel.net/blog/2020/tcpconnlat/ -2. https://network.51cto.com/article/640631.html \ No newline at end of file +1. +2. diff --git a/14-tcpstates/README.md b/14-tcpstates/README.md index bba8df5..dfe6ef5 100644 --- a/14-tcpstates/README.md +++ b/14-tcpstates/README.md @@ -1,10 +1,10 @@ -## eBPF 入门实践教程: +# eBPF 入门实践教程 ## origin origin from: -https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpconnlat.bpf.c + ## Compile and Run @@ -13,6 +13,7 @@ Compile: ```shell docker run -it -v `pwd`/:/src/ yunwei37/ebpm:latest ``` + Run: ```shell @@ -23,9 +24,9 @@ sudo ./ecli run package.json Demonstrations of tcpstates, the Linux BPF/bcc version. - tcpstates prints TCP state change information, including the duration in each state as milliseconds. For example, a single TCP session: + ```console # tcpstates SKADDR C-PID C-COMM LADDR LPORT RADDR RPORT OLDSTATE -> NEWSTATE MS @@ -36,6 +37,7 @@ ffff9fd7e8192000 0 swapper/5 100.66.100.185 63446 52.33.159.26 80 FI ffff9fd7e8192000 0 swapper/5 100.66.100.185 63446 52.33.159.26 80 FIN_WAIT2 -> CLOSE 0.006 ^C ``` + This showed that the most time was spent in the ESTABLISHED state (which then transitioned to FIN_WAIT1), which was 176.042 milliseconds. @@ -49,7 +51,7 @@ process context. If that's not the case, they may show kernel details. ## 来源 -修改自 https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpstates.bpf.c +修改自 ## 编译运行 @@ -60,7 +62,8 @@ process context. If that's not the case, they may show kernel details. - ```sudo ./tcpstates``` ## 效果 -``` + +```plain root@yutong-VirtualBox:~/libbpf-bootstrap/examples/c# ./tcpstates SKADDR PID COMM LADDR LPORT RADDR RPORT OLDSTATE -> NEWSTATE MS ffff9bf61bb62bc0 164978 node 192.168.88.15 0 52.178.17.2 443 CLOSE -> SYN_SENT 0.000 @@ -87,73 +90,65 @@ int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx) 在套接字改变状态处附加一个eBPF跟踪函数。 - - ```c - if (ctx->protocol != IPPROTO_TCP) - return 0; + if (ctx->protocol != IPPROTO_TCP) + return 0; - if (target_family && target_family != family) - return 0; + if (target_family && target_family != family) + return 0; - if (filter_by_sport && !bpf_map_lookup_elem(&sports, &sport)) - return 0; + if (filter_by_sport && !bpf_map_lookup_elem(&sports, &sport)) + return 0; - if (filter_by_dport && !bpf_map_lookup_elem(&dports, &dport)) - return 0; + if (filter_by_dport && !bpf_map_lookup_elem(&dports, &dport)) + return 0; ``` 跟踪函数被调用后,先判断当前改变状态的套接字是否满足我们需要的过滤条件,如果不满足则不进行记录。 - - ```c - tsp = bpf_map_lookup_elem(×tamps, &sk); - ts = bpf_ktime_get_ns(); - if (!tsp) - delta_us = 0; - else - delta_us = (ts - *tsp) / 1000; + tsp = bpf_map_lookup_elem(×tamps, &sk); + ts = bpf_ktime_get_ns(); + if (!tsp) + delta_us = 0; + else + delta_us = (ts - *tsp) / 1000; - event.skaddr = (__u64)sk; - event.ts_us = ts / 1000; - event.delta_us = delta_us; - event.pid = bpf_get_current_pid_tgid() >> 32; - event.oldstate = ctx->oldstate; - event.newstate = ctx->newstate; - event.family = family; - event.sport = sport; - event.dport = dport; - bpf_get_current_comm(&event.task, sizeof(event.task)); + event.skaddr = (__u64)sk; + event.ts_us = ts / 1000; + event.delta_us = delta_us; + event.pid = bpf_get_current_pid_tgid() >> 32; + event.oldstate = ctx->oldstate; + event.newstate = ctx->newstate; + event.family = family; + event.sport = sport; + event.dport = dport; + bpf_get_current_comm(&event.task, sizeof(event.task)); - if (family == AF_INET) { - bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr); - bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr); - } else { /* family == AF_INET6 */ - bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); - bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); - } + if (family == AF_INET) { + bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr); + bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr); + } else { /* family == AF_INET6 */ + bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); + bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); + } ``` 使用状态改变相关填充event结构体。 - 此处使用了```libbpf``` 的 CO-RE 支持。 - - ```c - bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); ``` 将事件结构体发送至用户态程序。 - - ```c - if (ctx->newstate == TCP_CLOSE) - bpf_map_delete_elem(×tamps, &sk); - else - bpf_map_update_elem(×tamps, &sk, &ts, BPF_ANY); + if (ctx->newstate == TCP_CLOSE) + bpf_map_delete_elem(×tamps, &sk); + else + bpf_map_update_elem(×tamps, &sk, &ts, BPF_ANY); ``` 根据这个TCP链接的新状态,决定是更新下时间戳记录还是不再记录它的时间戳。 @@ -213,4 +208,4 @@ static void handle_lost_events(void* ctx, int cpu, __u64 lost_cnt) { } ``` -收到事件后所调用对应的处理函数并进行输出打印。 \ No newline at end of file +收到事件后所调用对应的处理函数并进行输出打印。