From d2cd2e5348375989a6a662ec3e45682d6f205798 Mon Sep 17 00:00:00 2001 From: yunwei37 <1067852565@qq.com> Date: Sun, 7 May 2023 03:49:30 +0800 Subject: [PATCH] update part of tcpstates --- src/14-tcpstates/.gitignore | 3 +- src/14-tcpstates/Makefile | 141 ++++++++++++++++++ src/14-tcpstates/README.md | 127 ++++++++++++++-- .../libbpf-bootstrap/tcpstates.bpf.c | 102 ------------- src/14-tcpstates/tcpstates.bpf.c | 27 ++-- src/14-tcpstates/tcpstates.bpf.h | 24 --- .../{libbpf-bootstrap => }/tcpstates.c | 0 .../{libbpf-bootstrap => }/tcpstates.h | 0 8 files changed, 268 insertions(+), 156 deletions(-) create mode 100644 src/14-tcpstates/Makefile delete mode 100644 src/14-tcpstates/libbpf-bootstrap/tcpstates.bpf.c delete mode 100644 src/14-tcpstates/tcpstates.bpf.h rename src/14-tcpstates/{libbpf-bootstrap => }/tcpstates.c (100%) rename src/14-tcpstates/{libbpf-bootstrap => }/tcpstates.h (100%) diff --git a/src/14-tcpstates/.gitignore b/src/14-tcpstates/.gitignore index c610807..5912c0b 100644 --- a/src/14-tcpstates/.gitignore +++ b/src/14-tcpstates/.gitignore @@ -2,4 +2,5 @@ package.json eunomia-exporter ecli - \ No newline at end of file +tcpstates +.output diff --git a/src/14-tcpstates/Makefile b/src/14-tcpstates/Makefile new file mode 100644 index 0000000..862b2bd --- /dev/null +++ b/src/14-tcpstates/Makefile @@ -0,0 +1,141 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +OUTPUT := .output +CLANG ?= clang +LIBBPF_SRC := $(abspath ../../libbpf/src) +BPFTOOL_SRC := $(abspath ../../bpftool/src) +LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a) +BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool) +BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool +LIBBLAZESYM_SRC := $(abspath ../../blazesym/) +LIBBLAZESYM_OBJ := $(abspath $(OUTPUT)/libblazesym.a) +LIBBLAZESYM_HEADER := $(abspath $(OUTPUT)/blazesym.h) +ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \ + | sed 's/arm.*/arm/' \ + | sed 's/aarch64/arm64/' \ + | sed 's/ppc64le/powerpc/' \ + | sed 's/mips.*/mips/' \ + | sed 's/riscv64/riscv/' \ + | sed 's/loongarch64/loongarch/') +VMLINUX := ../../vmlinux/$(ARCH)/vmlinux.h +# Use our own libbpf API headers and Linux UAPI headers distributed with +# libbpf to avoid dependency on system-wide headers, which could be missing or +# outdated +INCLUDES := -I$(OUTPUT) -I../../libbpf/include/uapi -I$(dir $(VMLINUX)) +CFLAGS := -g -Wall +ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS) + +APPS = tcpstates # minimal minimal_legacy uprobe kprobe fentry usdt sockfilter tc ksyscall + +CARGO ?= $(shell which cargo) +ifeq ($(strip $(CARGO)),) +BZS_APPS := +else +BZS_APPS := # profile +APPS += $(BZS_APPS) +# Required by libblazesym +ALL_LDFLAGS += -lrt -ldl -lpthread -lm +endif + +# Get Clang's default includes on this system. We'll explicitly add these dirs +# to the includes list when compiling with `-target bpf` because otherwise some +# architecture-specific dirs will be "missing" on some architectures/distros - +# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h, +# sys/cdefs.h etc. might be missing. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - &1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') + +ifeq ($(V),1) + Q = + msg = +else + Q = @ + msg = @printf ' %-8s %s%s\n' \ + "$(1)" \ + "$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \ + "$(if $(3), $(3))"; + MAKEFLAGS += --no-print-directory +endif + +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +$(call allow-override,CC,$(CROSS_COMPILE)cc) +$(call allow-override,LD,$(CROSS_COMPILE)ld) + +.PHONY: all +all: $(APPS) + +.PHONY: clean +clean: + $(call msg,CLEAN) + $(Q)rm -rf $(OUTPUT) $(APPS) + +$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $@ + +# Build libbpf +$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf + $(call msg,LIB,$@) + $(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \ + OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \ + INCLUDEDIR= LIBDIR= UAPIDIR= \ + install + +# Build bpftool +$(BPFTOOL): | $(BPFTOOL_OUTPUT) + $(call msg,BPFTOOL,$@) + $(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap + + +$(LIBBLAZESYM_SRC)/target/release/libblazesym.a:: + $(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --features=cheader,dont-generate-test-files --release + +$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT) + $(call msg,LIB, $@) + $(Q)cp $(LIBBLAZESYM_SRC)/target/release/libblazesym.a $@ + +$(LIBBLAZESYM_HEADER): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT) + $(call msg,LIB,$@) + $(Q)cp $(LIBBLAZESYM_SRC)/target/release/blazesym.h $@ + +# Build BPF code +$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL) + $(call msg,BPF,$@) + $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \ + $(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \ + -c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@) + $(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@) + +# Generate BPF skeletons +$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL) + $(call msg,GEN-SKEL,$@) + $(Q)$(BPFTOOL) gen skeleton $< > $@ + +# Build user-space code +$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h + +$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT) + $(call msg,CC,$@) + $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ + +$(patsubst %,$(OUTPUT)/%.o,$(BZS_APPS)): $(LIBBLAZESYM_HEADER) + +$(BZS_APPS): $(LIBBLAZESYM_OBJ) + +# Build application binary +$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT) + $(call msg,BINARY,$@) + $(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@ + +# delete failed targets +.DELETE_ON_ERROR: + +# keep intermediate (.skel.h, .bpf.o, etc) targets +.SECONDARY: diff --git a/src/14-tcpstates/README.md b/src/14-tcpstates/README.md index 82cc424..6ebdb8a 100644 --- a/src/14-tcpstates/README.md +++ b/src/14-tcpstates/README.md @@ -1,5 +1,112 @@ # eBPF入门实践教程:使用 libbpf-bootstrap 开发程序统计 TCP 连接延时 +## 内核态代码 + +```c +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2021 Hengqi Chen */ +#include +#include +#include +#include +#include "tcpstates.h" + +#define MAX_ENTRIES 10240 +#define AF_INET 2 +#define AF_INET6 10 + +const volatile bool filter_by_sport = false; +const volatile bool filter_by_dport = false; +const volatile short target_family = 0; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u16); + __type(value, __u16); +} sports SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u16); + __type(value, __u16); +} dports SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, struct sock *); + __type(value, __u64); +} timestamps SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} events SEC(".maps"); + +SEC("tracepoint/sock/inet_sock_set_state") +int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx) +{ + struct sock *sk = (struct sock *)ctx->skaddr; + __u16 family = ctx->family; + __u16 sport = ctx->sport; + __u16 dport = ctx->dport; + __u64 *tsp, delta_us, ts; + struct event event = {}; + + if (ctx->protocol != IPPROTO_TCP) + return 0; + + if (target_family && target_family != family) + return 0; + + if (filter_by_sport && !bpf_map_lookup_elem(&sports, &sport)) + return 0; + + if (filter_by_dport && !bpf_map_lookup_elem(&dports, &dport)) + return 0; + + tsp = bpf_map_lookup_elem(×tamps, &sk); + ts = bpf_ktime_get_ns(); + if (!tsp) + delta_us = 0; + else + delta_us = (ts - *tsp) / 1000; + + event.skaddr = (__u64)sk; + event.ts_us = ts / 1000; + event.delta_us = delta_us; + event.pid = bpf_get_current_pid_tgid() >> 32; + event.oldstate = ctx->oldstate; + event.newstate = ctx->newstate; + event.family = family; + event.sport = sport; + event.dport = dport; + bpf_get_current_comm(&event.task, sizeof(event.task)); + + if (family == AF_INET) { + bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr); + bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr); + } else { /* family == AF_INET6 */ + bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); + bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); + } + + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); + + if (ctx->newstate == TCP_CLOSE) + bpf_map_delete_elem(×tamps, &sk); + else + bpf_map_update_elem(×tamps, &sk, &ts, BPF_ANY); + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; +``` + ```tcpstates``` 是一个追踪当前系统上的TCP套接字的TCP状态的程序,主要通过跟踪内核跟踪点 ```inet_sock_set_state``` 来实现。统计数据通过 ```perf_event```向用户态传输。 ```c @@ -131,16 +238,14 @@ static void handle_lost_events(void* ctx, int cpu, __u64 lost_cnt) { ## 编译运行 -- ```git clone https://github.com/libbpf/libbpf-bootstrap libbpf-bootstrap-cloned``` -- 将 [libbpf-bootstrap](libbpf-bootstrap)目录下的文件复制到 ```libbpf-bootstrap-cloned/examples/c```下 -- 修改 ```libbpf-bootstrap-cloned/examples/c/Makefile``` ,在其 ```APPS``` 项后添加 ```tcpstates``` -- 在 ```libbpf-bootstrap-cloned/examples/c``` 下运行 ```make tcpstates``` -- ```sudo ./tcpstates``` - -## 效果 - -```plain -root@yutong-VirtualBox:~/libbpf-bootstrap/examples/c# ./tcpstates +```console +$ make +... + BPF .output/tcpstates.bpf.o + GEN-SKEL .output/tcpstates.skel.h + CC .output/tcpstates.o + BINARY tcpstates +$ sudo ./tcpstates SKADDR PID COMM LADDR LPORT RADDR RPORT OLDSTATE -> NEWSTATE MS ffff9bf61bb62bc0 164978 node 192.168.88.15 0 52.178.17.2 443 CLOSE -> SYN_SENT 0.000 ffff9bf61bb62bc0 0 swapper/0 192.168.88.15 41596 52.178.17.2 443 SYN_SENT -> ESTABLISHED 225.794 @@ -153,8 +258,6 @@ ffff9bf6d8ee88c0 229832 redis-serv 0.0.0.0 6379 0.0.0.0 0 ffff9bf7109d6900 88750 node 127.0.0.1 39755 127.0.0.1 50966 ESTABLISHED -> FIN_WAIT1 0.000 ``` -对于输出的详细解释,详见 [README.md](README.md) - ## 总结 这里的代码修改自 diff --git a/src/14-tcpstates/libbpf-bootstrap/tcpstates.bpf.c b/src/14-tcpstates/libbpf-bootstrap/tcpstates.bpf.c deleted file mode 100644 index 0f9ed24..0000000 --- a/src/14-tcpstates/libbpf-bootstrap/tcpstates.bpf.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -/* Copyright (c) 2021 Hengqi Chen */ -#include -#include -#include -#include -#include "tcpstates.h" - -#define MAX_ENTRIES 10240 -#define AF_INET 2 -#define AF_INET6 10 - -const volatile bool filter_by_sport = false; -const volatile bool filter_by_dport = false; -const volatile short target_family = 0; - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, MAX_ENTRIES); - __type(key, __u16); - __type(value, __u16); -} sports SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, MAX_ENTRIES); - __type(key, __u16); - __type(value, __u16); -} dports SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, MAX_ENTRIES); - __type(key, struct sock *); - __type(value, __u64); -} timestamps SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); -} events SEC(".maps"); - -SEC("tracepoint/sock/inet_sock_set_state") -int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx) -{ - struct sock *sk = (struct sock *)ctx->skaddr; - __u16 family = ctx->family; - __u16 sport = ctx->sport; - __u16 dport = ctx->dport; - __u64 *tsp, delta_us, ts; - struct event event = {}; - - if (ctx->protocol != IPPROTO_TCP) - return 0; - - if (target_family && target_family != family) - return 0; - - if (filter_by_sport && !bpf_map_lookup_elem(&sports, &sport)) - return 0; - - if (filter_by_dport && !bpf_map_lookup_elem(&dports, &dport)) - return 0; - - tsp = bpf_map_lookup_elem(×tamps, &sk); - ts = bpf_ktime_get_ns(); - if (!tsp) - delta_us = 0; - else - delta_us = (ts - *tsp) / 1000; - - event.skaddr = (__u64)sk; - event.ts_us = ts / 1000; - event.delta_us = delta_us; - event.pid = bpf_get_current_pid_tgid() >> 32; - event.oldstate = ctx->oldstate; - event.newstate = ctx->newstate; - event.family = family; - event.sport = sport; - event.dport = dport; - bpf_get_current_comm(&event.task, sizeof(event.task)); - - if (family == AF_INET) { - bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr); - bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr); - } else { /* family == AF_INET6 */ - bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); - bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); - } - - bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); - - if (ctx->newstate == TCP_CLOSE) - bpf_map_delete_elem(×tamps, &sk); - else - bpf_map_update_elem(×tamps, &sk, &ts, BPF_ANY); - - return 0; -} - -char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/src/14-tcpstates/tcpstates.bpf.c b/src/14-tcpstates/tcpstates.bpf.c index b479ca4..0f9ed24 100644 --- a/src/14-tcpstates/tcpstates.bpf.c +++ b/src/14-tcpstates/tcpstates.bpf.c @@ -4,42 +4,38 @@ #include #include #include -#include "tcpstates.bpf.h" +#include "tcpstates.h" -#define MAX_ENTRIES 10240 -#define AF_INET 2 -#define AF_INET6 10 +#define MAX_ENTRIES 10240 +#define AF_INET 2 +#define AF_INET6 10 const volatile bool filter_by_sport = false; const volatile bool filter_by_dport = false; const volatile short target_family = 0; -struct -{ +struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, MAX_ENTRIES); __type(key, __u16); __type(value, __u16); } sports SEC(".maps"); -struct -{ +struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, MAX_ENTRIES); __type(key, __u16); __type(value, __u16); } dports SEC(".maps"); -struct -{ +struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, MAX_ENTRIES); __type(key, struct sock *); __type(value, __u64); } timestamps SEC(".maps"); -struct -{ +struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __uint(key_size, sizeof(__u32)); __uint(value_size, sizeof(__u32)); @@ -85,13 +81,10 @@ int handle_set_state(struct trace_event_raw_inet_sock_set_state *ctx) event.dport = dport; bpf_get_current_comm(&event.task, sizeof(event.task)); - if (family == AF_INET) - { + if (family == AF_INET) { bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_rcv_saddr); bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_daddr); - } - else - { /* family == AF_INET6 */ + } else { /* family == AF_INET6 */ bpf_probe_read_kernel(&event.saddr, sizeof(event.saddr), &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); bpf_probe_read_kernel(&event.daddr, sizeof(event.daddr), &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); } diff --git a/src/14-tcpstates/tcpstates.bpf.h b/src/14-tcpstates/tcpstates.bpf.h deleted file mode 100644 index 9084301..0000000 --- a/src/14-tcpstates/tcpstates.bpf.h +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -/* Copyright (c) 2021 Hengqi Chen */ -#ifndef __TCPSTATES_H -#define __TCPSTATES_H - -#define TASK_COMM_LEN 16 - -struct event -{ - unsigned __int128 saddr; - unsigned __int128 daddr; - __u64 skaddr; - __u64 ts_us; - __u64 delta_us; - __u32 pid; - int oldstate; - int newstate; - __u16 family; - __u16 sport; - __u16 dport; - char task[TASK_COMM_LEN]; -}; - -#endif /* __TCPSTATES_H */ diff --git a/src/14-tcpstates/libbpf-bootstrap/tcpstates.c b/src/14-tcpstates/tcpstates.c similarity index 100% rename from src/14-tcpstates/libbpf-bootstrap/tcpstates.c rename to src/14-tcpstates/tcpstates.c diff --git a/src/14-tcpstates/libbpf-bootstrap/tcpstates.h b/src/14-tcpstates/tcpstates.h similarity index 100% rename from src/14-tcpstates/libbpf-bootstrap/tcpstates.h rename to src/14-tcpstates/tcpstates.h