From 68a638ef8a531131c6980ac46a5d7cb76d15c5d6 Mon Sep 17 00:00:00 2001 From: officeyutong <9004058+Officeyutong@users.noreply.github.com> Date: Sat, 21 Jan 2023 21:13:21 +0800 Subject: [PATCH] update tutorial 14 --- 14-tcpstates/README.md | 4 + 14-tcpstates/libbpf-bootstrap/tcpstates.bpf.c | 102 ++++ 14-tcpstates/libbpf-bootstrap/tcpstates.c | 546 ++++++++++++++++++ 14-tcpstates/libbpf-bootstrap/tcpstates.h | 23 + 14-tcpstates/tcpstates-libbpf-bootstrap.md | 169 ++++++ 5 files changed, 844 insertions(+) create mode 100644 14-tcpstates/libbpf-bootstrap/tcpstates.bpf.c create mode 100644 14-tcpstates/libbpf-bootstrap/tcpstates.c create mode 100644 14-tcpstates/libbpf-bootstrap/tcpstates.h create mode 100644 14-tcpstates/tcpstates-libbpf-bootstrap.md diff --git a/14-tcpstates/README.md b/14-tcpstates/README.md index 9c67bd2..7d08e81 100644 --- a/14-tcpstates/README.md +++ b/14-tcpstates/README.md @@ -1,5 +1,9 @@ ## eBPF 入门实践教程: +## 备注 + +对于使用 `libbpf-bootstrap` 的开发,具体见 [tcpstates-libbpf-bootstrap.md](tcpstates-libbpf-bootstrap.md) + ## origin origin from: diff --git a/14-tcpstates/libbpf-bootstrap/tcpstates.bpf.c b/14-tcpstates/libbpf-bootstrap/tcpstates.bpf.c new file mode 100644 index 0000000..0f9ed24 --- /dev/null +++ b/14-tcpstates/libbpf-bootstrap/tcpstates.bpf.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2021 Hengqi Chen */ +#include +#include +#include +#include +#include "tcpstates.h" + +#define MAX_ENTRIES 10240 +#define AF_INET 2 +#define AF_INET6 10 + +const volatile bool filter_by_sport = false; +const volatile bool filter_by_dport = false; +const volatile short target_family = 0; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u16); + __type(value, __u16); +} sports SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u16); + __type(value, __u16); +} dports SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, struct sock *); + __type(value, __u64); +} timestamps SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} events SEC(".maps"); + +SEC("tracepoint/sock/inet_sock_set_state") +int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx) +{ + struct sock *sk = (struct sock *)ctx->skaddr; + __u16 family = ctx->family; + __u16 sport = ctx->sport; + __u16 dport = ctx->dport; + __u64 *tsp, delta_us, ts; + struct event event = {}; + + if (ctx->protocol != IPPROTO_TCP) + return 0; + + if (target_family && target_family != family) + return 0; + + if (filter_by_sport && !bpf_map_lookup_elem(&sports, &sport)) + return 0; + + if (filter_by_dport && !bpf_map_lookup_elem(&dports, &dport)) + return 0; + + tsp = bpf_map_lookup_elem(×tamps, &sk); + ts = bpf_ktime_get_ns(); + if (!tsp) + delta_us = 0; + else + delta_us = (ts - *tsp) / 1000; + + event.skaddr = (__u64)sk; + event.ts_us = ts / 1000; + event.delta_us = delta_us; + event.pid = bpf_get_current_pid_tgid() >> 32; + event.oldstate = ctx->oldstate; + event.newstate = ctx->newstate; + event.family = family; + event.sport = sport; + event.dport = dport; + bpf_get_current_comm(&event.task, sizeof(event.task)); + + if (family == AF_INET) { + bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr); + bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr); + } else { /* family == AF_INET6 */ + bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); + bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); + } + + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); + + if (ctx->newstate == TCP_CLOSE) + bpf_map_delete_elem(×tamps, &sk); + else + bpf_map_update_elem(×tamps, &sk, &ts, BPF_ANY); + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/14-tcpstates/libbpf-bootstrap/tcpstates.c b/14-tcpstates/libbpf-bootstrap/tcpstates.c new file mode 100644 index 0000000..ecdeb67 --- /dev/null +++ b/14-tcpstates/libbpf-bootstrap/tcpstates.c @@ -0,0 +1,546 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * tcpstates Trace TCP session state changes with durations. + * Copyright (c) 2021 Hengqi Chen + * + * Based on tcpstates(8) from BCC by Brendan Gregg. + * 18-Dec-2021 Hengqi Chen Created this. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +// #include "btf_helpers.h" +#include "tcpstates.h" +#include "tcpstates.skel.h" +// #include "trace_helpers.h" + +#define PERF_BUFFER_PAGES 16 +#define PERF_POLL_TIMEOUT_MS 100 +#define warn(...) fprintf(stderr, __VA_ARGS__) + +static volatile sig_atomic_t exiting = 0; + +static bool emit_timestamp = false; +static short target_family = 0; +static char* target_sports = NULL; +static char* target_dports = NULL; +static bool wide_output = false; +static bool verbose = false; +static const char* tcp_states[] = { + [1] = "ESTABLISHED", [2] = "SYN_SENT", [3] = "SYN_RECV", + [4] = "FIN_WAIT1", [5] = "FIN_WAIT2", [6] = "TIME_WAIT", + [7] = "CLOSE", [8] = "CLOSE_WAIT", [9] = "LAST_ACK", + [10] = "LISTEN", [11] = "CLOSING", [12] = "NEW_SYN_RECV", + [13] = "UNKNOWN", +}; + +const char* argp_program_version = "tcpstates 1.0"; +const char* argp_program_bug_address = + "https://github.com/iovisor/bcc/tree/master/libbpf-tools"; +const char argp_program_doc[] = + "Trace TCP session state changes and durations.\n" + "\n" + "USAGE: tcpstates [-4] [-6] [-T] [-L lport] [-D dport]\n" + "\n" + "EXAMPLES:\n" + " tcpstates # trace all TCP state changes\n" + " tcpstates -T # include timestamps\n" + " tcpstates -L 80 # only trace local port 80\n" + " tcpstates -D 80 # only trace remote port 80\n"; + +static const struct argp_option opts[] = { + {"verbose", 'v', NULL, 0, "Verbose debug output"}, + {"timestamp", 'T', NULL, 0, "Include timestamp on output"}, + {"ipv4", '4', NULL, 0, "Trace IPv4 family only"}, + {"ipv6", '6', NULL, 0, "Trace IPv6 family only"}, + {"wide", 'w', NULL, 0, "Wide column output (fits IPv6 addresses)"}, + {"localport", 'L', "LPORT", 0, + "Comma-separated list of local ports to trace."}, + {"remoteport", 'D', "DPORT", 0, + "Comma-separated list of remote ports to trace."}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, + {}, +}; + +static error_t parse_arg(int key, char* arg, struct argp_state* state) { + long port_num; + char* port; + + switch (key) { + case 'v': + verbose = true; + break; + case 'T': + emit_timestamp = true; + break; + case '4': + target_family = AF_INET; + break; + case '6': + target_family = AF_INET6; + break; + case 'w': + wide_output = true; + break; + case 'L': + if (!arg) { + warn("No ports specified\n"); + argp_usage(state); + } + target_sports = strdup(arg); + port = strtok(arg, ","); + while (port) { + port_num = strtol(port, NULL, 10); + if (errno || port_num <= 0 || port_num > 65536) { + warn("Invalid ports: %s\n", arg); + argp_usage(state); + } + port = strtok(NULL, ","); + } + break; + case 'D': + if (!arg) { + warn("No ports specified\n"); + argp_usage(state); + } + target_dports = strdup(arg); + port = strtok(arg, ","); + while (port) { + port_num = strtol(port, NULL, 10); + if (errno || port_num <= 0 || port_num > 65536) { + warn("Invalid ports: %s\n", arg); + argp_usage(state); + } + port = strtok(NULL, ","); + } + break; + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static int libbpf_print_fn(enum libbpf_print_level level, + const char* format, + va_list args) { + if (level == LIBBPF_DEBUG && !verbose) + return 0; + + return vfprintf(stderr, format, args); +} + +static void sig_int(int signo) { + exiting = 1; +} + +static void handle_event(void* ctx, int cpu, void* data, __u32 data_sz) { + char ts[32], saddr[26], daddr[26]; + struct event* e = data; + struct tm* tm; + int family; + time_t t; + + if (emit_timestamp) { + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%H:%M:%S", tm); + printf("%8s ", ts); + } + + inet_ntop(e->family, &e->saddr, saddr, sizeof(saddr)); + inet_ntop(e->family, &e->daddr, daddr, sizeof(daddr)); + if (wide_output) { + family = e->family == AF_INET ? 4 : 6; + printf( + "%-16llx %-7d %-16s %-2d %-26s %-5d %-26s %-5d %-11s -> %-11s " + "%.3f\n", + e->skaddr, e->pid, e->task, family, saddr, e->sport, daddr, + e->dport, tcp_states[e->oldstate], tcp_states[e->newstate], + (double)e->delta_us / 1000); + } else { + printf( + "%-16llx %-7d %-10.10s %-15s %-5d %-15s %-5d %-11s -> %-11s %.3f\n", + e->skaddr, e->pid, e->task, saddr, e->sport, daddr, e->dport, + tcp_states[e->oldstate], tcp_states[e->newstate], + (double)e->delta_us / 1000); + } +} + +static void handle_lost_events(void* ctx, int cpu, __u64 lost_cnt) { + warn("lost %llu events on CPU #%d\n", lost_cnt, cpu); +} + +extern unsigned char _binary_min_core_btfs_tar_gz_start[] __attribute__((weak)); +extern unsigned char _binary_min_core_btfs_tar_gz_end[] __attribute__((weak)); + + +/* tar header from + * https://github.com/tklauser/libtar/blob/v1.2.20/lib/libtar.h#L39-L60 */ +struct tar_header { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char chksum[8]; + char typeflag; + char linkname[100]; + char magic[6]; + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char prefix[155]; + char padding[12]; +}; + +static char* tar_file_start(struct tar_header* tar, + const char* name, + int* length) { + while (tar->name[0]) { + sscanf(tar->size, "%o", length); + if (!strcmp(tar->name, name)) + return (char*)(tar + 1); + tar += 1 + (*length + 511) / 512; + } + return NULL; +} +#define FIELD_LEN 65 +#define ID_FMT "ID=%64s" +#define VERSION_FMT "VERSION_ID=\"%64s" + +struct os_info { + char id[FIELD_LEN]; + char version[FIELD_LEN]; + char arch[FIELD_LEN]; + char kernel_release[FIELD_LEN]; +}; + +static struct os_info* get_os_info() { + struct os_info* info = NULL; + struct utsname u; + size_t len = 0; + ssize_t read; + char* line = NULL; + FILE* f; + + if (uname(&u) == -1) + return NULL; + + f = fopen("/etc/os-release", "r"); + if (!f) + return NULL; + + info = calloc(1, sizeof(*info)); + if (!info) + goto out; + + strncpy(info->kernel_release, u.release, FIELD_LEN); + strncpy(info->arch, u.machine, FIELD_LEN); + + while ((read = getline(&line, &len, f)) != -1) { + if (sscanf(line, ID_FMT, info->id) == 1) + continue; + + if (sscanf(line, VERSION_FMT, info->version) == 1) { + /* remove '"' suffix */ + info->version[strlen(info->version) - 1] = 0; + continue; + } + } + +out: + free(line); + fclose(f); + + return info; +} +#define INITIAL_BUF_SIZE (1024 * 1024 * 4) /* 4MB */ + +/* adapted from https://zlib.net/zlib_how.html */ +static int inflate_gz(unsigned char* src, + int src_size, + unsigned char** dst, + int* dst_size) { + size_t size = INITIAL_BUF_SIZE; + size_t next_size = size; + z_stream strm; + void* tmp; + int ret; + + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + + ret = inflateInit2(&strm, 16 + MAX_WBITS); + if (ret != Z_OK) + return -EINVAL; + + *dst = malloc(size); + if (!*dst) + return -ENOMEM; + + strm.next_in = src; + strm.avail_in = src_size; + + /* run inflate() on input until it returns Z_STREAM_END */ + do { + strm.next_out = *dst + strm.total_out; + strm.avail_out = next_size; + ret = inflate(&strm, Z_NO_FLUSH); + if (ret != Z_OK && ret != Z_STREAM_END) + goto out_err; + /* we need more space */ + if (strm.avail_out == 0) { + next_size = size; + size *= 2; + tmp = realloc(*dst, size); + if (!tmp) { + ret = -ENOMEM; + goto out_err; + } + *dst = tmp; + } + } while (ret != Z_STREAM_END); + + *dst_size = strm.total_out; + + /* clean up and return */ + ret = inflateEnd(&strm); + if (ret != Z_OK) { + ret = -EINVAL; + goto out_err; + } + return 0; + +out_err: + free(*dst); + *dst = NULL; + return ret; +} +struct btf *btf__load_vmlinux_btf(void); +void btf__free(struct btf *btf); +static bool vmlinux_btf_exists(void) { + struct btf* btf; + int err; + + btf = btf__load_vmlinux_btf(); + err = libbpf_get_error(btf); + if (err) + return false; + + btf__free(btf); + return true; +} + +static int ensure_core_btf(struct bpf_object_open_opts* opts) { + char name_fmt[] = "./%s/%s/%s/%s.btf"; + char btf_path[] = "/tmp/bcc-libbpf-tools.btf.XXXXXX"; + struct os_info* info = NULL; + unsigned char* dst_buf = NULL; + char* file_start; + int dst_size = 0; + char name[100]; + FILE* dst = NULL; + int ret; + + /* do nothing if the system provides BTF */ + if (vmlinux_btf_exists()) + return 0; + + /* compiled without min core btfs */ + if (!_binary_min_core_btfs_tar_gz_start) + return -EOPNOTSUPP; + + info = get_os_info(); + if (!info) + return -errno; + + ret = mkstemp(btf_path); + if (ret < 0) { + ret = -errno; + goto out; + } + + dst = fdopen(ret, "wb"); + if (!dst) { + ret = -errno; + goto out; + } + + ret = snprintf(name, sizeof(name), name_fmt, info->id, info->version, + info->arch, info->kernel_release); + if (ret < 0 || ret == sizeof(name)) { + ret = -EINVAL; + goto out; + } + + ret = inflate_gz( + _binary_min_core_btfs_tar_gz_start, + _binary_min_core_btfs_tar_gz_end - _binary_min_core_btfs_tar_gz_start, + &dst_buf, &dst_size); + if (ret < 0) + goto out; + + ret = 0; + file_start = tar_file_start((struct tar_header*)dst_buf, name, &dst_size); + if (!file_start) { + ret = -EINVAL; + goto out; + } + + if (fwrite(file_start, 1, dst_size, dst) != dst_size) { + ret = -ferror(dst); + goto out; + } + + opts->btf_custom_path = strdup(btf_path); + if (!opts->btf_custom_path) + ret = -ENOMEM; + +out: + free(info); + fclose(dst); + free(dst_buf); + + return ret; +} + +static void cleanup_core_btf(struct bpf_object_open_opts* opts) { + if (!opts) + return; + + if (!opts->btf_custom_path) + return; + + unlink(opts->btf_custom_path); + free((void*)opts->btf_custom_path); +} + +int main(int argc, char** argv) { + LIBBPF_OPTS(bpf_object_open_opts, open_opts); + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + struct perf_buffer* pb = NULL; + struct tcpstates_bpf* obj; + int err, port_map_fd; + short port_num; + char* port; + + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + libbpf_set_print(libbpf_print_fn); + + err = ensure_core_btf(&open_opts); + if (err) { + warn("failed to fetch necessary BTF for CO-RE: %s\n", strerror(-err)); + return 1; + } + + obj = tcpstates_bpf__open_opts(&open_opts); + if (!obj) { + warn("failed to open BPF object\n"); + return 1; + } + + obj->rodata->filter_by_sport = target_sports != NULL; + obj->rodata->filter_by_dport = target_dports != NULL; + obj->rodata->target_family = target_family; + + err = tcpstates_bpf__load(obj); + if (err) { + warn("failed to load BPF object: %d\n", err); + goto cleanup; + } + + if (target_sports) { + port_map_fd = bpf_map__fd(obj->maps.sports); + port = strtok(target_sports, ","); + while (port) { + port_num = strtol(port, NULL, 10); + bpf_map_update_elem(port_map_fd, &port_num, &port_num, BPF_ANY); + port = strtok(NULL, ","); + } + } + if (target_dports) { + port_map_fd = bpf_map__fd(obj->maps.dports); + port = strtok(target_dports, ","); + while (port) { + port_num = strtol(port, NULL, 10); + bpf_map_update_elem(port_map_fd, &port_num, &port_num, BPF_ANY); + port = strtok(NULL, ","); + } + } + + err = tcpstates_bpf__attach(obj); + if (err) { + warn("failed to attach BPF programs: %d\n", err); + goto cleanup; + } + + pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES, + handle_event, handle_lost_events, NULL, NULL); + if (!pb) { + err = -errno; + warn("failed to open perf buffer: %d\n", err); + goto cleanup; + } + + if (signal(SIGINT, sig_int) == SIG_ERR) { + warn("can't set signal handler: %s\n", strerror(errno)); + err = 1; + goto cleanup; + } + + if (emit_timestamp) + printf("%-8s ", "TIME(s)"); + if (wide_output) + printf( + "%-16s %-7s %-16s %-2s %-26s %-5s %-26s %-5s %-11s -> %-11s %s\n", + "SKADDR", "PID", "COMM", "IP", "LADDR", "LPORT", "RADDR", "RPORT", + "OLDSTATE", "NEWSTATE", "MS"); + else + printf("%-16s %-7s %-10s %-15s %-5s %-15s %-5s %-11s -> %-11s %s\n", + "SKADDR", "PID", "COMM", "LADDR", "LPORT", "RADDR", "RPORT", + "OLDSTATE", "NEWSTATE", "MS"); + + while (!exiting) { + err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS); + if (err < 0 && err != -EINTR) { + warn("error polling perf buffer: %s\n", strerror(-err)); + goto cleanup; + } + /* reset err to return 0 if exiting */ + err = 0; + } + +cleanup: + perf_buffer__free(pb); + tcpstates_bpf__destroy(obj); + cleanup_core_btf(&open_opts); + + return err != 0; +} diff --git a/14-tcpstates/libbpf-bootstrap/tcpstates.h b/14-tcpstates/libbpf-bootstrap/tcpstates.h new file mode 100644 index 0000000..31f2b61 --- /dev/null +++ b/14-tcpstates/libbpf-bootstrap/tcpstates.h @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2021 Hengqi Chen */ +#ifndef __TCPSTATES_H +#define __TCPSTATES_H + +#define TASK_COMM_LEN 16 + +struct event { + unsigned __int128 saddr; + unsigned __int128 daddr; + __u64 skaddr; + __u64 ts_us; + __u64 delta_us; + __u32 pid; + int oldstate; + int newstate; + __u16 family; + __u16 sport; + __u16 dport; + char task[TASK_COMM_LEN]; +}; + +#endif /* __TCPSTATES_H */ diff --git a/14-tcpstates/tcpstates-libbpf-bootstrap.md b/14-tcpstates/tcpstates-libbpf-bootstrap.md new file mode 100644 index 0000000..72222a2 --- /dev/null +++ b/14-tcpstates/tcpstates-libbpf-bootstrap.md @@ -0,0 +1,169 @@ +## eBPF入门实践教程:使用 libbpf-bootstrap 开发程序统计 TCP 连接延时 + +## 来源 + +修改自 https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpstates.bpf.c + +## 编译运行 + +- ```git clone https://github.com/libbpf/libbpf-bootstrap libbpf-bootstrap-cloned``` +- 将 [libbpf-bootstrap](libbpf-bootstrap)目录下的文件复制到 ```libbpf-bootstrap-cloned/examples/c```下 +- 修改 ```libbpf-bootstrap-cloned/examples/c/Makefile``` ,在其 ```APPS``` 项后添加 ```tcpstates``` +- 在 ```libbpf-bootstrap-cloned/examples/c``` 下运行 ```make tcpstates``` +- ```sudo ./tcpstates``` + +## 效果 +``` +root@yutong-VirtualBox:~/libbpf-bootstrap/examples/c# ./tcpstates +SKADDR PID COMM LADDR LPORT RADDR RPORT OLDSTATE -> NEWSTATE MS +ffff9bf61bb62bc0 164978 node 192.168.88.15 0 52.178.17.2 443 CLOSE -> SYN_SENT 0.000 +ffff9bf61bb62bc0 0 swapper/0 192.168.88.15 41596 52.178.17.2 443 SYN_SENT -> ESTABLISHED 225.794 +ffff9bf61bb62bc0 0 swapper/0 192.168.88.15 41596 52.178.17.2 443 ESTABLISHED -> CLOSE_WAIT 901.454 +ffff9bf61bb62bc0 164978 node 192.168.88.15 41596 52.178.17.2 443 CLOSE_WAIT -> LAST_ACK 0.793 +ffff9bf61bb62bc0 164978 node 192.168.88.15 41596 52.178.17.2 443 LAST_ACK -> LAST_ACK 0.086 +ffff9bf61bb62bc0 228759 kworker/u6 192.168.88.15 41596 52.178.17.2 443 LAST_ACK -> CLOSE 0.193 +ffff9bf6d8ee88c0 229832 redis-serv 0.0.0.0 6379 0.0.0.0 0 CLOSE -> LISTEN 0.000 +ffff9bf6d8ee88c0 229832 redis-serv 0.0.0.0 6379 0.0.0.0 0 LISTEN -> CLOSE 1.763 +ffff9bf7109d6900 88750 node 127.0.0.1 39755 127.0.0.1 50966 ESTABLISHED -> FIN_WAIT1 0.000 +``` + +对于输出的详细解释,详见 [README.md](README.md) + +## ```tcpstates.bpf.c``` 的解释 + +```tcpstates``` 是一个追踪当前系统上的TCP套接字的TCP状态的程序,主要通过跟踪内核跟踪点 ```inet_sock_set_state``` 来实现。统计数据通过 ```perf_event```向用户态传输。 + +```c +SEC("tracepoint/sock/inet_sock_set_state") +int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx) +``` + +在套接字改变状态处附加一个eBPF跟踪函数。 + + + +```c + if (ctx->protocol != IPPROTO_TCP) + return 0; + + if (target_family && target_family != family) + return 0; + + if (filter_by_sport && !bpf_map_lookup_elem(&sports, &sport)) + return 0; + + if (filter_by_dport && !bpf_map_lookup_elem(&dports, &dport)) + return 0; +``` + +跟踪函数被调用后,先判断当前改变状态的套接字是否满足我们需要的过滤条件,如果不满足则不进行记录。 + + + +```c + tsp = bpf_map_lookup_elem(×tamps, &sk); + ts = bpf_ktime_get_ns(); + if (!tsp) + delta_us = 0; + else + delta_us = (ts - *tsp) / 1000; + + event.skaddr = (__u64)sk; + event.ts_us = ts / 1000; + event.delta_us = delta_us; + event.pid = bpf_get_current_pid_tgid() >> 32; + event.oldstate = ctx->oldstate; + event.newstate = ctx->newstate; + event.family = family; + event.sport = sport; + event.dport = dport; + bpf_get_current_comm(&event.task, sizeof(event.task)); + + if (family == AF_INET) { + bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr); + bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr); + } else { /* family == AF_INET6 */ + bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); + bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); + } +``` + +使用状态改变相关填充event结构体。 + +- 此处使用了```libbpf``` 的 CO-RE 支持。 + + + +```c + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); +``` + +将事件结构体发送至用户态程序。 + + + +```c + if (ctx->newstate == TCP_CLOSE) + bpf_map_delete_elem(×tamps, &sk); + else + bpf_map_update_elem(×tamps, &sk, &ts, BPF_ANY); +``` + +根据这个TCP链接的新状态,决定是更新下时间戳记录还是不再记录它的时间戳。 + +### 对于用户态程序 + +```c + while (!exiting) { + err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS); + if (err < 0 && err != -EINTR) { + warn("error polling perf buffer: %s\n", strerror(-err)); + goto cleanup; + } + /* reset err to return 0 if exiting */ + err = 0; + } +``` + +不停轮询内核程序所发过来的 ```perf event```。 + +```c +static void handle_event(void* ctx, int cpu, void* data, __u32 data_sz) { + char ts[32], saddr[26], daddr[26]; + struct event* e = data; + struct tm* tm; + int family; + time_t t; + + if (emit_timestamp) { + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%H:%M:%S", tm); + printf("%8s ", ts); + } + + inet_ntop(e->family, &e->saddr, saddr, sizeof(saddr)); + inet_ntop(e->family, &e->daddr, daddr, sizeof(daddr)); + if (wide_output) { + family = e->family == AF_INET ? 4 : 6; + printf( + "%-16llx %-7d %-16s %-2d %-26s %-5d %-26s %-5d %-11s -> %-11s " + "%.3f\n", + e->skaddr, e->pid, e->task, family, saddr, e->sport, daddr, + e->dport, tcp_states[e->oldstate], tcp_states[e->newstate], + (double)e->delta_us / 1000); + } else { + printf( + "%-16llx %-7d %-10.10s %-15s %-5d %-15s %-5d %-11s -> %-11s %.3f\n", + e->skaddr, e->pid, e->task, saddr, e->sport, daddr, e->dport, + tcp_states[e->oldstate], tcp_states[e->newstate], + (double)e->delta_us / 1000); + } +} + +static void handle_lost_events(void* ctx, int cpu, __u64 lost_cnt) { + warn("lost %llu events on CPU #%d\n", lost_cnt, cpu); +} +``` + +收到事件后所调用对应的处理函数并进行输出打印。 \ No newline at end of file