mirror of
https://github.com/eunomia-bpf/bpf-developer-tutorial.git
synced 2026-02-03 02:04:30 +08:00
update tutorial 14
This commit is contained in:
@@ -1,5 +1,9 @@
|
||||
## eBPF 入门实践教程:
|
||||
|
||||
## 备注
|
||||
|
||||
对于使用 `libbpf-bootstrap` 的开发,具体见 [tcpstates-libbpf-bootstrap.md](tcpstates-libbpf-bootstrap.md)
|
||||
|
||||
## origin
|
||||
|
||||
origin from:
|
||||
|
||||
102
14-tcpstates/libbpf-bootstrap/tcpstates.bpf.c
Normal file
102
14-tcpstates/libbpf-bootstrap/tcpstates.bpf.c
Normal file
@@ -0,0 +1,102 @@
|
||||
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
/* Copyright (c) 2021 Hengqi Chen */
|
||||
#include <vmlinux.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include "tcpstates.h"
|
||||
|
||||
#define MAX_ENTRIES 10240
|
||||
#define AF_INET 2
|
||||
#define AF_INET6 10
|
||||
|
||||
const volatile bool filter_by_sport = false;
|
||||
const volatile bool filter_by_dport = false;
|
||||
const volatile short target_family = 0;
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(max_entries, MAX_ENTRIES);
|
||||
__type(key, __u16);
|
||||
__type(value, __u16);
|
||||
} sports SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(max_entries, MAX_ENTRIES);
|
||||
__type(key, __u16);
|
||||
__type(value, __u16);
|
||||
} dports SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__uint(max_entries, MAX_ENTRIES);
|
||||
__type(key, struct sock *);
|
||||
__type(value, __u64);
|
||||
} timestamps SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(__u32));
|
||||
__uint(value_size, sizeof(__u32));
|
||||
} events SEC(".maps");
|
||||
|
||||
SEC("tracepoint/sock/inet_sock_set_state")
|
||||
int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx)
|
||||
{
|
||||
struct sock *sk = (struct sock *)ctx->skaddr;
|
||||
__u16 family = ctx->family;
|
||||
__u16 sport = ctx->sport;
|
||||
__u16 dport = ctx->dport;
|
||||
__u64 *tsp, delta_us, ts;
|
||||
struct event event = {};
|
||||
|
||||
if (ctx->protocol != IPPROTO_TCP)
|
||||
return 0;
|
||||
|
||||
if (target_family && target_family != family)
|
||||
return 0;
|
||||
|
||||
if (filter_by_sport && !bpf_map_lookup_elem(&sports, &sport))
|
||||
return 0;
|
||||
|
||||
if (filter_by_dport && !bpf_map_lookup_elem(&dports, &dport))
|
||||
return 0;
|
||||
|
||||
tsp = bpf_map_lookup_elem(×tamps, &sk);
|
||||
ts = bpf_ktime_get_ns();
|
||||
if (!tsp)
|
||||
delta_us = 0;
|
||||
else
|
||||
delta_us = (ts - *tsp) / 1000;
|
||||
|
||||
event.skaddr = (__u64)sk;
|
||||
event.ts_us = ts / 1000;
|
||||
event.delta_us = delta_us;
|
||||
event.pid = bpf_get_current_pid_tgid() >> 32;
|
||||
event.oldstate = ctx->oldstate;
|
||||
event.newstate = ctx->newstate;
|
||||
event.family = family;
|
||||
event.sport = sport;
|
||||
event.dport = dport;
|
||||
bpf_get_current_comm(&event.task, sizeof(event.task));
|
||||
|
||||
if (family == AF_INET) {
|
||||
bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr);
|
||||
bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr);
|
||||
} else { /* family == AF_INET6 */
|
||||
bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
|
||||
bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
|
||||
}
|
||||
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
|
||||
|
||||
if (ctx->newstate == TCP_CLOSE)
|
||||
bpf_map_delete_elem(×tamps, &sk);
|
||||
else
|
||||
bpf_map_update_elem(×tamps, &sk, &ts, BPF_ANY);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char LICENSE[] SEC("license") = "Dual BSD/GPL";
|
||||
546
14-tcpstates/libbpf-bootstrap/tcpstates.c
Normal file
546
14-tcpstates/libbpf-bootstrap/tcpstates.c
Normal file
@@ -0,0 +1,546 @@
|
||||
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
|
||||
/*
|
||||
* tcpstates Trace TCP session state changes with durations.
|
||||
* Copyright (c) 2021 Hengqi Chen
|
||||
*
|
||||
* Based on tcpstates(8) from BCC by Brendan Gregg.
|
||||
* 18-Dec-2021 Hengqi Chen Created this.
|
||||
*/
|
||||
#include <argp.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#include <string.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <zlib.h>
|
||||
// #include "btf_helpers.h"
|
||||
#include "tcpstates.h"
|
||||
#include "tcpstates.skel.h"
|
||||
// #include "trace_helpers.h"
|
||||
|
||||
#define PERF_BUFFER_PAGES 16
|
||||
#define PERF_POLL_TIMEOUT_MS 100
|
||||
#define warn(...) fprintf(stderr, __VA_ARGS__)
|
||||
|
||||
static volatile sig_atomic_t exiting = 0;
|
||||
|
||||
static bool emit_timestamp = false;
|
||||
static short target_family = 0;
|
||||
static char* target_sports = NULL;
|
||||
static char* target_dports = NULL;
|
||||
static bool wide_output = false;
|
||||
static bool verbose = false;
|
||||
static const char* tcp_states[] = {
|
||||
[1] = "ESTABLISHED", [2] = "SYN_SENT", [3] = "SYN_RECV",
|
||||
[4] = "FIN_WAIT1", [5] = "FIN_WAIT2", [6] = "TIME_WAIT",
|
||||
[7] = "CLOSE", [8] = "CLOSE_WAIT", [9] = "LAST_ACK",
|
||||
[10] = "LISTEN", [11] = "CLOSING", [12] = "NEW_SYN_RECV",
|
||||
[13] = "UNKNOWN",
|
||||
};
|
||||
|
||||
const char* argp_program_version = "tcpstates 1.0";
|
||||
const char* argp_program_bug_address =
|
||||
"https://github.com/iovisor/bcc/tree/master/libbpf-tools";
|
||||
const char argp_program_doc[] =
|
||||
"Trace TCP session state changes and durations.\n"
|
||||
"\n"
|
||||
"USAGE: tcpstates [-4] [-6] [-T] [-L lport] [-D dport]\n"
|
||||
"\n"
|
||||
"EXAMPLES:\n"
|
||||
" tcpstates # trace all TCP state changes\n"
|
||||
" tcpstates -T # include timestamps\n"
|
||||
" tcpstates -L 80 # only trace local port 80\n"
|
||||
" tcpstates -D 80 # only trace remote port 80\n";
|
||||
|
||||
static const struct argp_option opts[] = {
|
||||
{"verbose", 'v', NULL, 0, "Verbose debug output"},
|
||||
{"timestamp", 'T', NULL, 0, "Include timestamp on output"},
|
||||
{"ipv4", '4', NULL, 0, "Trace IPv4 family only"},
|
||||
{"ipv6", '6', NULL, 0, "Trace IPv6 family only"},
|
||||
{"wide", 'w', NULL, 0, "Wide column output (fits IPv6 addresses)"},
|
||||
{"localport", 'L', "LPORT", 0,
|
||||
"Comma-separated list of local ports to trace."},
|
||||
{"remoteport", 'D', "DPORT", 0,
|
||||
"Comma-separated list of remote ports to trace."},
|
||||
{NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"},
|
||||
{},
|
||||
};
|
||||
|
||||
static error_t parse_arg(int key, char* arg, struct argp_state* state) {
|
||||
long port_num;
|
||||
char* port;
|
||||
|
||||
switch (key) {
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
case 'T':
|
||||
emit_timestamp = true;
|
||||
break;
|
||||
case '4':
|
||||
target_family = AF_INET;
|
||||
break;
|
||||
case '6':
|
||||
target_family = AF_INET6;
|
||||
break;
|
||||
case 'w':
|
||||
wide_output = true;
|
||||
break;
|
||||
case 'L':
|
||||
if (!arg) {
|
||||
warn("No ports specified\n");
|
||||
argp_usage(state);
|
||||
}
|
||||
target_sports = strdup(arg);
|
||||
port = strtok(arg, ",");
|
||||
while (port) {
|
||||
port_num = strtol(port, NULL, 10);
|
||||
if (errno || port_num <= 0 || port_num > 65536) {
|
||||
warn("Invalid ports: %s\n", arg);
|
||||
argp_usage(state);
|
||||
}
|
||||
port = strtok(NULL, ",");
|
||||
}
|
||||
break;
|
||||
case 'D':
|
||||
if (!arg) {
|
||||
warn("No ports specified\n");
|
||||
argp_usage(state);
|
||||
}
|
||||
target_dports = strdup(arg);
|
||||
port = strtok(arg, ",");
|
||||
while (port) {
|
||||
port_num = strtol(port, NULL, 10);
|
||||
if (errno || port_num <= 0 || port_num > 65536) {
|
||||
warn("Invalid ports: %s\n", arg);
|
||||
argp_usage(state);
|
||||
}
|
||||
port = strtok(NULL, ",");
|
||||
}
|
||||
break;
|
||||
case 'h':
|
||||
argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
|
||||
break;
|
||||
default:
|
||||
return ARGP_ERR_UNKNOWN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int libbpf_print_fn(enum libbpf_print_level level,
|
||||
const char* format,
|
||||
va_list args) {
|
||||
if (level == LIBBPF_DEBUG && !verbose)
|
||||
return 0;
|
||||
|
||||
return vfprintf(stderr, format, args);
|
||||
}
|
||||
|
||||
static void sig_int(int signo) {
|
||||
exiting = 1;
|
||||
}
|
||||
|
||||
static void handle_event(void* ctx, int cpu, void* data, __u32 data_sz) {
|
||||
char ts[32], saddr[26], daddr[26];
|
||||
struct event* e = data;
|
||||
struct tm* tm;
|
||||
int family;
|
||||
time_t t;
|
||||
|
||||
if (emit_timestamp) {
|
||||
time(&t);
|
||||
tm = localtime(&t);
|
||||
strftime(ts, sizeof(ts), "%H:%M:%S", tm);
|
||||
printf("%8s ", ts);
|
||||
}
|
||||
|
||||
inet_ntop(e->family, &e->saddr, saddr, sizeof(saddr));
|
||||
inet_ntop(e->family, &e->daddr, daddr, sizeof(daddr));
|
||||
if (wide_output) {
|
||||
family = e->family == AF_INET ? 4 : 6;
|
||||
printf(
|
||||
"%-16llx %-7d %-16s %-2d %-26s %-5d %-26s %-5d %-11s -> %-11s "
|
||||
"%.3f\n",
|
||||
e->skaddr, e->pid, e->task, family, saddr, e->sport, daddr,
|
||||
e->dport, tcp_states[e->oldstate], tcp_states[e->newstate],
|
||||
(double)e->delta_us / 1000);
|
||||
} else {
|
||||
printf(
|
||||
"%-16llx %-7d %-10.10s %-15s %-5d %-15s %-5d %-11s -> %-11s %.3f\n",
|
||||
e->skaddr, e->pid, e->task, saddr, e->sport, daddr, e->dport,
|
||||
tcp_states[e->oldstate], tcp_states[e->newstate],
|
||||
(double)e->delta_us / 1000);
|
||||
}
|
||||
}
|
||||
|
||||
static void handle_lost_events(void* ctx, int cpu, __u64 lost_cnt) {
|
||||
warn("lost %llu events on CPU #%d\n", lost_cnt, cpu);
|
||||
}
|
||||
|
||||
extern unsigned char _binary_min_core_btfs_tar_gz_start[] __attribute__((weak));
|
||||
extern unsigned char _binary_min_core_btfs_tar_gz_end[] __attribute__((weak));
|
||||
|
||||
|
||||
/* tar header from
|
||||
* https://github.com/tklauser/libtar/blob/v1.2.20/lib/libtar.h#L39-L60 */
|
||||
struct tar_header {
|
||||
char name[100];
|
||||
char mode[8];
|
||||
char uid[8];
|
||||
char gid[8];
|
||||
char size[12];
|
||||
char mtime[12];
|
||||
char chksum[8];
|
||||
char typeflag;
|
||||
char linkname[100];
|
||||
char magic[6];
|
||||
char version[2];
|
||||
char uname[32];
|
||||
char gname[32];
|
||||
char devmajor[8];
|
||||
char devminor[8];
|
||||
char prefix[155];
|
||||
char padding[12];
|
||||
};
|
||||
|
||||
static char* tar_file_start(struct tar_header* tar,
|
||||
const char* name,
|
||||
int* length) {
|
||||
while (tar->name[0]) {
|
||||
sscanf(tar->size, "%o", length);
|
||||
if (!strcmp(tar->name, name))
|
||||
return (char*)(tar + 1);
|
||||
tar += 1 + (*length + 511) / 512;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
#define FIELD_LEN 65
|
||||
#define ID_FMT "ID=%64s"
|
||||
#define VERSION_FMT "VERSION_ID=\"%64s"
|
||||
|
||||
struct os_info {
|
||||
char id[FIELD_LEN];
|
||||
char version[FIELD_LEN];
|
||||
char arch[FIELD_LEN];
|
||||
char kernel_release[FIELD_LEN];
|
||||
};
|
||||
|
||||
static struct os_info* get_os_info() {
|
||||
struct os_info* info = NULL;
|
||||
struct utsname u;
|
||||
size_t len = 0;
|
||||
ssize_t read;
|
||||
char* line = NULL;
|
||||
FILE* f;
|
||||
|
||||
if (uname(&u) == -1)
|
||||
return NULL;
|
||||
|
||||
f = fopen("/etc/os-release", "r");
|
||||
if (!f)
|
||||
return NULL;
|
||||
|
||||
info = calloc(1, sizeof(*info));
|
||||
if (!info)
|
||||
goto out;
|
||||
|
||||
strncpy(info->kernel_release, u.release, FIELD_LEN);
|
||||
strncpy(info->arch, u.machine, FIELD_LEN);
|
||||
|
||||
while ((read = getline(&line, &len, f)) != -1) {
|
||||
if (sscanf(line, ID_FMT, info->id) == 1)
|
||||
continue;
|
||||
|
||||
if (sscanf(line, VERSION_FMT, info->version) == 1) {
|
||||
/* remove '"' suffix */
|
||||
info->version[strlen(info->version) - 1] = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
free(line);
|
||||
fclose(f);
|
||||
|
||||
return info;
|
||||
}
|
||||
#define INITIAL_BUF_SIZE (1024 * 1024 * 4) /* 4MB */
|
||||
|
||||
/* adapted from https://zlib.net/zlib_how.html */
|
||||
static int inflate_gz(unsigned char* src,
|
||||
int src_size,
|
||||
unsigned char** dst,
|
||||
int* dst_size) {
|
||||
size_t size = INITIAL_BUF_SIZE;
|
||||
size_t next_size = size;
|
||||
z_stream strm;
|
||||
void* tmp;
|
||||
int ret;
|
||||
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
|
||||
ret = inflateInit2(&strm, 16 + MAX_WBITS);
|
||||
if (ret != Z_OK)
|
||||
return -EINVAL;
|
||||
|
||||
*dst = malloc(size);
|
||||
if (!*dst)
|
||||
return -ENOMEM;
|
||||
|
||||
strm.next_in = src;
|
||||
strm.avail_in = src_size;
|
||||
|
||||
/* run inflate() on input until it returns Z_STREAM_END */
|
||||
do {
|
||||
strm.next_out = *dst + strm.total_out;
|
||||
strm.avail_out = next_size;
|
||||
ret = inflate(&strm, Z_NO_FLUSH);
|
||||
if (ret != Z_OK && ret != Z_STREAM_END)
|
||||
goto out_err;
|
||||
/* we need more space */
|
||||
if (strm.avail_out == 0) {
|
||||
next_size = size;
|
||||
size *= 2;
|
||||
tmp = realloc(*dst, size);
|
||||
if (!tmp) {
|
||||
ret = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
*dst = tmp;
|
||||
}
|
||||
} while (ret != Z_STREAM_END);
|
||||
|
||||
*dst_size = strm.total_out;
|
||||
|
||||
/* clean up and return */
|
||||
ret = inflateEnd(&strm);
|
||||
if (ret != Z_OK) {
|
||||
ret = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
free(*dst);
|
||||
*dst = NULL;
|
||||
return ret;
|
||||
}
|
||||
struct btf *btf__load_vmlinux_btf(void);
|
||||
void btf__free(struct btf *btf);
|
||||
static bool vmlinux_btf_exists(void) {
|
||||
struct btf* btf;
|
||||
int err;
|
||||
|
||||
btf = btf__load_vmlinux_btf();
|
||||
err = libbpf_get_error(btf);
|
||||
if (err)
|
||||
return false;
|
||||
|
||||
btf__free(btf);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int ensure_core_btf(struct bpf_object_open_opts* opts) {
|
||||
char name_fmt[] = "./%s/%s/%s/%s.btf";
|
||||
char btf_path[] = "/tmp/bcc-libbpf-tools.btf.XXXXXX";
|
||||
struct os_info* info = NULL;
|
||||
unsigned char* dst_buf = NULL;
|
||||
char* file_start;
|
||||
int dst_size = 0;
|
||||
char name[100];
|
||||
FILE* dst = NULL;
|
||||
int ret;
|
||||
|
||||
/* do nothing if the system provides BTF */
|
||||
if (vmlinux_btf_exists())
|
||||
return 0;
|
||||
|
||||
/* compiled without min core btfs */
|
||||
if (!_binary_min_core_btfs_tar_gz_start)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
info = get_os_info();
|
||||
if (!info)
|
||||
return -errno;
|
||||
|
||||
ret = mkstemp(btf_path);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
goto out;
|
||||
}
|
||||
|
||||
dst = fdopen(ret, "wb");
|
||||
if (!dst) {
|
||||
ret = -errno;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = snprintf(name, sizeof(name), name_fmt, info->id, info->version,
|
||||
info->arch, info->kernel_release);
|
||||
if (ret < 0 || ret == sizeof(name)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = inflate_gz(
|
||||
_binary_min_core_btfs_tar_gz_start,
|
||||
_binary_min_core_btfs_tar_gz_end - _binary_min_core_btfs_tar_gz_start,
|
||||
&dst_buf, &dst_size);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
file_start = tar_file_start((struct tar_header*)dst_buf, name, &dst_size);
|
||||
if (!file_start) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fwrite(file_start, 1, dst_size, dst) != dst_size) {
|
||||
ret = -ferror(dst);
|
||||
goto out;
|
||||
}
|
||||
|
||||
opts->btf_custom_path = strdup(btf_path);
|
||||
if (!opts->btf_custom_path)
|
||||
ret = -ENOMEM;
|
||||
|
||||
out:
|
||||
free(info);
|
||||
fclose(dst);
|
||||
free(dst_buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void cleanup_core_btf(struct bpf_object_open_opts* opts) {
|
||||
if (!opts)
|
||||
return;
|
||||
|
||||
if (!opts->btf_custom_path)
|
||||
return;
|
||||
|
||||
unlink(opts->btf_custom_path);
|
||||
free((void*)opts->btf_custom_path);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
LIBBPF_OPTS(bpf_object_open_opts, open_opts);
|
||||
static const struct argp argp = {
|
||||
.options = opts,
|
||||
.parser = parse_arg,
|
||||
.doc = argp_program_doc,
|
||||
};
|
||||
struct perf_buffer* pb = NULL;
|
||||
struct tcpstates_bpf* obj;
|
||||
int err, port_map_fd;
|
||||
short port_num;
|
||||
char* port;
|
||||
|
||||
err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
|
||||
libbpf_set_print(libbpf_print_fn);
|
||||
|
||||
err = ensure_core_btf(&open_opts);
|
||||
if (err) {
|
||||
warn("failed to fetch necessary BTF for CO-RE: %s\n", strerror(-err));
|
||||
return 1;
|
||||
}
|
||||
|
||||
obj = tcpstates_bpf__open_opts(&open_opts);
|
||||
if (!obj) {
|
||||
warn("failed to open BPF object\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
obj->rodata->filter_by_sport = target_sports != NULL;
|
||||
obj->rodata->filter_by_dport = target_dports != NULL;
|
||||
obj->rodata->target_family = target_family;
|
||||
|
||||
err = tcpstates_bpf__load(obj);
|
||||
if (err) {
|
||||
warn("failed to load BPF object: %d\n", err);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (target_sports) {
|
||||
port_map_fd = bpf_map__fd(obj->maps.sports);
|
||||
port = strtok(target_sports, ",");
|
||||
while (port) {
|
||||
port_num = strtol(port, NULL, 10);
|
||||
bpf_map_update_elem(port_map_fd, &port_num, &port_num, BPF_ANY);
|
||||
port = strtok(NULL, ",");
|
||||
}
|
||||
}
|
||||
if (target_dports) {
|
||||
port_map_fd = bpf_map__fd(obj->maps.dports);
|
||||
port = strtok(target_dports, ",");
|
||||
while (port) {
|
||||
port_num = strtol(port, NULL, 10);
|
||||
bpf_map_update_elem(port_map_fd, &port_num, &port_num, BPF_ANY);
|
||||
port = strtok(NULL, ",");
|
||||
}
|
||||
}
|
||||
|
||||
err = tcpstates_bpf__attach(obj);
|
||||
if (err) {
|
||||
warn("failed to attach BPF programs: %d\n", err);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES,
|
||||
handle_event, handle_lost_events, NULL, NULL);
|
||||
if (!pb) {
|
||||
err = -errno;
|
||||
warn("failed to open perf buffer: %d\n", err);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (signal(SIGINT, sig_int) == SIG_ERR) {
|
||||
warn("can't set signal handler: %s\n", strerror(errno));
|
||||
err = 1;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (emit_timestamp)
|
||||
printf("%-8s ", "TIME(s)");
|
||||
if (wide_output)
|
||||
printf(
|
||||
"%-16s %-7s %-16s %-2s %-26s %-5s %-26s %-5s %-11s -> %-11s %s\n",
|
||||
"SKADDR", "PID", "COMM", "IP", "LADDR", "LPORT", "RADDR", "RPORT",
|
||||
"OLDSTATE", "NEWSTATE", "MS");
|
||||
else
|
||||
printf("%-16s %-7s %-10s %-15s %-5s %-15s %-5s %-11s -> %-11s %s\n",
|
||||
"SKADDR", "PID", "COMM", "LADDR", "LPORT", "RADDR", "RPORT",
|
||||
"OLDSTATE", "NEWSTATE", "MS");
|
||||
|
||||
while (!exiting) {
|
||||
err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS);
|
||||
if (err < 0 && err != -EINTR) {
|
||||
warn("error polling perf buffer: %s\n", strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
/* reset err to return 0 if exiting */
|
||||
err = 0;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
perf_buffer__free(pb);
|
||||
tcpstates_bpf__destroy(obj);
|
||||
cleanup_core_btf(&open_opts);
|
||||
|
||||
return err != 0;
|
||||
}
|
||||
23
14-tcpstates/libbpf-bootstrap/tcpstates.h
Normal file
23
14-tcpstates/libbpf-bootstrap/tcpstates.h
Normal file
@@ -0,0 +1,23 @@
|
||||
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
/* Copyright (c) 2021 Hengqi Chen */
|
||||
#ifndef __TCPSTATES_H
|
||||
#define __TCPSTATES_H
|
||||
|
||||
#define TASK_COMM_LEN 16
|
||||
|
||||
struct event {
|
||||
unsigned __int128 saddr;
|
||||
unsigned __int128 daddr;
|
||||
__u64 skaddr;
|
||||
__u64 ts_us;
|
||||
__u64 delta_us;
|
||||
__u32 pid;
|
||||
int oldstate;
|
||||
int newstate;
|
||||
__u16 family;
|
||||
__u16 sport;
|
||||
__u16 dport;
|
||||
char task[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
#endif /* __TCPSTATES_H */
|
||||
169
14-tcpstates/tcpstates-libbpf-bootstrap.md
Normal file
169
14-tcpstates/tcpstates-libbpf-bootstrap.md
Normal file
@@ -0,0 +1,169 @@
|
||||
## eBPF入门实践教程:使用 libbpf-bootstrap 开发程序统计 TCP 连接延时
|
||||
|
||||
## 来源
|
||||
|
||||
修改自 https://github.com/iovisor/bcc/blob/master/libbpf-tools/tcpstates.bpf.c
|
||||
|
||||
## 编译运行
|
||||
|
||||
- ```git clone https://github.com/libbpf/libbpf-bootstrap libbpf-bootstrap-cloned```
|
||||
- 将 [libbpf-bootstrap](libbpf-bootstrap)目录下的文件复制到 ```libbpf-bootstrap-cloned/examples/c```下
|
||||
- 修改 ```libbpf-bootstrap-cloned/examples/c/Makefile``` ,在其 ```APPS``` 项后添加 ```tcpstates```
|
||||
- 在 ```libbpf-bootstrap-cloned/examples/c``` 下运行 ```make tcpstates```
|
||||
- ```sudo ./tcpstates```
|
||||
|
||||
## 效果
|
||||
```
|
||||
root@yutong-VirtualBox:~/libbpf-bootstrap/examples/c# ./tcpstates
|
||||
SKADDR PID COMM LADDR LPORT RADDR RPORT OLDSTATE -> NEWSTATE MS
|
||||
ffff9bf61bb62bc0 164978 node 192.168.88.15 0 52.178.17.2 443 CLOSE -> SYN_SENT 0.000
|
||||
ffff9bf61bb62bc0 0 swapper/0 192.168.88.15 41596 52.178.17.2 443 SYN_SENT -> ESTABLISHED 225.794
|
||||
ffff9bf61bb62bc0 0 swapper/0 192.168.88.15 41596 52.178.17.2 443 ESTABLISHED -> CLOSE_WAIT 901.454
|
||||
ffff9bf61bb62bc0 164978 node 192.168.88.15 41596 52.178.17.2 443 CLOSE_WAIT -> LAST_ACK 0.793
|
||||
ffff9bf61bb62bc0 164978 node 192.168.88.15 41596 52.178.17.2 443 LAST_ACK -> LAST_ACK 0.086
|
||||
ffff9bf61bb62bc0 228759 kworker/u6 192.168.88.15 41596 52.178.17.2 443 LAST_ACK -> CLOSE 0.193
|
||||
ffff9bf6d8ee88c0 229832 redis-serv 0.0.0.0 6379 0.0.0.0 0 CLOSE -> LISTEN 0.000
|
||||
ffff9bf6d8ee88c0 229832 redis-serv 0.0.0.0 6379 0.0.0.0 0 LISTEN -> CLOSE 1.763
|
||||
ffff9bf7109d6900 88750 node 127.0.0.1 39755 127.0.0.1 50966 ESTABLISHED -> FIN_WAIT1 0.000
|
||||
```
|
||||
|
||||
对于输出的详细解释,详见 [README.md](README.md)
|
||||
|
||||
## ```tcpstates.bpf.c``` 的解释
|
||||
|
||||
```tcpstates``` 是一个追踪当前系统上的TCP套接字的TCP状态的程序,主要通过跟踪内核跟踪点 ```inet_sock_set_state``` 来实现。统计数据通过 ```perf_event```向用户态传输。
|
||||
|
||||
```c
|
||||
SEC("tracepoint/sock/inet_sock_set_state")
|
||||
int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx)
|
||||
```
|
||||
|
||||
在套接字改变状态处附加一个eBPF跟踪函数。
|
||||
|
||||
|
||||
|
||||
```c
|
||||
if (ctx->protocol != IPPROTO_TCP)
|
||||
return 0;
|
||||
|
||||
if (target_family && target_family != family)
|
||||
return 0;
|
||||
|
||||
if (filter_by_sport && !bpf_map_lookup_elem(&sports, &sport))
|
||||
return 0;
|
||||
|
||||
if (filter_by_dport && !bpf_map_lookup_elem(&dports, &dport))
|
||||
return 0;
|
||||
```
|
||||
|
||||
跟踪函数被调用后,先判断当前改变状态的套接字是否满足我们需要的过滤条件,如果不满足则不进行记录。
|
||||
|
||||
|
||||
|
||||
```c
|
||||
tsp = bpf_map_lookup_elem(×tamps, &sk);
|
||||
ts = bpf_ktime_get_ns();
|
||||
if (!tsp)
|
||||
delta_us = 0;
|
||||
else
|
||||
delta_us = (ts - *tsp) / 1000;
|
||||
|
||||
event.skaddr = (__u64)sk;
|
||||
event.ts_us = ts / 1000;
|
||||
event.delta_us = delta_us;
|
||||
event.pid = bpf_get_current_pid_tgid() >> 32;
|
||||
event.oldstate = ctx->oldstate;
|
||||
event.newstate = ctx->newstate;
|
||||
event.family = family;
|
||||
event.sport = sport;
|
||||
event.dport = dport;
|
||||
bpf_get_current_comm(&event.task, sizeof(event.task));
|
||||
|
||||
if (family == AF_INET) {
|
||||
bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr);
|
||||
bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr);
|
||||
} else { /* family == AF_INET6 */
|
||||
bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
|
||||
bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
|
||||
}
|
||||
```
|
||||
|
||||
使用状态改变相关填充event结构体。
|
||||
|
||||
- 此处使用了```libbpf``` 的 CO-RE 支持。
|
||||
|
||||
|
||||
|
||||
```c
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
|
||||
```
|
||||
|
||||
将事件结构体发送至用户态程序。
|
||||
|
||||
|
||||
|
||||
```c
|
||||
if (ctx->newstate == TCP_CLOSE)
|
||||
bpf_map_delete_elem(×tamps, &sk);
|
||||
else
|
||||
bpf_map_update_elem(×tamps, &sk, &ts, BPF_ANY);
|
||||
```
|
||||
|
||||
根据这个TCP链接的新状态,决定是更新下时间戳记录还是不再记录它的时间戳。
|
||||
|
||||
### 对于用户态程序
|
||||
|
||||
```c
|
||||
while (!exiting) {
|
||||
err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS);
|
||||
if (err < 0 && err != -EINTR) {
|
||||
warn("error polling perf buffer: %s\n", strerror(-err));
|
||||
goto cleanup;
|
||||
}
|
||||
/* reset err to return 0 if exiting */
|
||||
err = 0;
|
||||
}
|
||||
```
|
||||
|
||||
不停轮询内核程序所发过来的 ```perf event```。
|
||||
|
||||
```c
|
||||
static void handle_event(void* ctx, int cpu, void* data, __u32 data_sz) {
|
||||
char ts[32], saddr[26], daddr[26];
|
||||
struct event* e = data;
|
||||
struct tm* tm;
|
||||
int family;
|
||||
time_t t;
|
||||
|
||||
if (emit_timestamp) {
|
||||
time(&t);
|
||||
tm = localtime(&t);
|
||||
strftime(ts, sizeof(ts), "%H:%M:%S", tm);
|
||||
printf("%8s ", ts);
|
||||
}
|
||||
|
||||
inet_ntop(e->family, &e->saddr, saddr, sizeof(saddr));
|
||||
inet_ntop(e->family, &e->daddr, daddr, sizeof(daddr));
|
||||
if (wide_output) {
|
||||
family = e->family == AF_INET ? 4 : 6;
|
||||
printf(
|
||||
"%-16llx %-7d %-16s %-2d %-26s %-5d %-26s %-5d %-11s -> %-11s "
|
||||
"%.3f\n",
|
||||
e->skaddr, e->pid, e->task, family, saddr, e->sport, daddr,
|
||||
e->dport, tcp_states[e->oldstate], tcp_states[e->newstate],
|
||||
(double)e->delta_us / 1000);
|
||||
} else {
|
||||
printf(
|
||||
"%-16llx %-7d %-10.10s %-15s %-5d %-15s %-5d %-11s -> %-11s %.3f\n",
|
||||
e->skaddr, e->pid, e->task, saddr, e->sport, daddr, e->dport,
|
||||
tcp_states[e->oldstate], tcp_states[e->newstate],
|
||||
(double)e->delta_us / 1000);
|
||||
}
|
||||
}
|
||||
|
||||
static void handle_lost_events(void* ctx, int cpu, __u64 lost_cnt) {
|
||||
warn("lost %llu events on CPU #%d\n", lost_cnt, cpu);
|
||||
}
|
||||
```
|
||||
|
||||
收到事件后所调用对应的处理函数并进行输出打印。
|
||||
Reference in New Issue
Block a user