From 2a05e13578d7e0a041b2a78733361e5ad2e43b39 Mon Sep 17 00:00:00 2001 From: yunwei37 <1067852565@qq.com> Date: Sun, 7 May 2023 00:59:20 +0800 Subject: [PATCH] add submodule from libbpf-bootstrap --- .gitmodules | 9 ++ blazesym | 1 + src/12-profile/Makefile | 138 ++++++++++++++++++++++ src/12-profile/README.md | 107 +++++++++-------- src/12-profile/profile.c | 244 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 449 insertions(+), 50 deletions(-) create mode 100644 .gitmodules create mode 160000 blazesym create mode 100644 src/12-profile/Makefile create mode 100644 src/12-profile/profile.c diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e4cbc0c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,9 @@ +[submodule "libbpf"] + path = libbpf + url = https://github.com/libbpf/libbpf.git +[submodule "bpftool"] + path = bpftool + url = https://github.com/libbpf/bpftool +[submodule "blazesym"] + path = blazesym + url = https://github.com/libbpf/blazesym diff --git a/blazesym b/blazesym new file mode 160000 index 0000000..35abf6e --- /dev/null +++ b/blazesym @@ -0,0 +1 @@ +Subproject commit 35abf6e1cd6a035f7b476eeb8fb5cd49af31cd9b diff --git a/src/12-profile/Makefile b/src/12-profile/Makefile new file mode 100644 index 0000000..518bea7 --- /dev/null +++ b/src/12-profile/Makefile @@ -0,0 +1,138 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +OUTPUT := .output +CLANG ?= clang +LIBBPF_SRC := $(abspath ../../libbpf/src) +BPFTOOL_SRC := $(abspath ../../bpftool/src) +LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a) +BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool) +BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool +ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \ + | sed 's/arm.*/arm/' \ + | sed 's/aarch64/arm64/' \ + | sed 's/ppc64le/powerpc/' \ + | sed 's/mips.*/mips/' \ + | sed 's/riscv64/riscv/' \ + | sed 's/loongarch64/loongarch/') +VMLINUX := ../../vmlinux/$(ARCH)/vmlinux.h +# Use our own libbpf API headers and Linux UAPI headers distributed with +# libbpf to avoid dependency on system-wide headers, which could be missing or +# outdated +INCLUDES := -I$(OUTPUT) -I../../libbpf/include/uapi -I$(dir $(VMLINUX)) +CFLAGS := -g -Wall +ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS) + +APPS = minimal minimal_legacy bootstrap uprobe kprobe fentry usdt sockfilter tc ksyscall + +CARGO ?= $(shell which cargo) +ifeq ($(strip $(CARGO)),) +BZS_APPS := +else +BZS_APPS := profile +APPS += $(BZS_APPS) +# Required by libblazesym +ALL_LDFLAGS += -lrt -ldl -lpthread -lm +endif + +# Get Clang's default includes on this system. We'll explicitly add these dirs +# to the includes list when compiling with `-target bpf` because otherwise some +# architecture-specific dirs will be "missing" on some architectures/distros - +# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h, +# sys/cdefs.h etc. might be missing. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - &1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') + +ifeq ($(V),1) + Q = + msg = +else + Q = @ + msg = @printf ' %-8s %s%s\n' \ + "$(1)" \ + "$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \ + "$(if $(3), $(3))"; + MAKEFLAGS += --no-print-directory +endif + +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +$(call allow-override,CC,$(CROSS_COMPILE)cc) +$(call allow-override,LD,$(CROSS_COMPILE)ld) + +.PHONY: all +all: $(APPS) + +.PHONY: clean +clean: + $(call msg,CLEAN) + $(Q)rm -rf $(OUTPUT) $(APPS) + +$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $@ + +# Build libbpf +$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf + $(call msg,LIB,$@) + $(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \ + OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \ + INCLUDEDIR= LIBDIR= UAPIDIR= \ + install + +# Build bpftool +$(BPFTOOL): | $(BPFTOOL_OUTPUT) + $(call msg,BPFTOOL,$@) + $(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap + + +$(LIBBLAZESYM_SRC)/target/release/libblazesym.a:: + $(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --features=cheader,dont-generate-test-files --release + +$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT) + $(call msg,LIB, $@) + $(Q)cp $(LIBBLAZESYM_SRC)/target/release/libblazesym.a $@ + +$(LIBBLAZESYM_HEADER): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT) + $(call msg,LIB,$@) + $(Q)cp $(LIBBLAZESYM_SRC)/target/release/blazesym.h $@ + +# Build BPF code +$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL) + $(call msg,BPF,$@) + $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \ + $(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \ + -c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@) + $(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@) + +# Generate BPF skeletons +$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL) + $(call msg,GEN-SKEL,$@) + $(Q)$(BPFTOOL) gen skeleton $< > $@ + +# Build user-space code +$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h + +$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT) + $(call msg,CC,$@) + $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ + +$(patsubst %,$(OUTPUT)/%.o,$(BZS_APPS)): $(LIBBLAZESYM_HEADER) + +$(BZS_APPS): $(LIBBLAZESYM_OBJ) + +# Build application binary +$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT) + $(call msg,BINARY,$@) + $(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@ + +# delete failed targets +.DELETE_ON_ERROR: + +# keep intermediate (.skel.h, .bpf.o, etc) targets +.SECONDARY: diff --git a/src/12-profile/README.md b/src/12-profile/README.md index f48a4f4..4147cd6 100644 --- a/src/12-profile/README.md +++ b/src/12-profile/README.md @@ -1,83 +1,89 @@ -## eBPF 入门实践教程:编写 eBPF 程序 profile 进行性能分析 +# eBPF 入门实践教程:编写 eBPF 程序 profile 进行性能分析 -### 背景 +## 背景 `profile` 是一款用户追踪程序执行调用流程的工具,类似于perf中的 -g 指令。但是相较于perf而言, `profile`的功能更为细化,它可以选择用户需要追踪的层面,比如在用户态层面进行追踪,或是在内核态进行追踪。 -### 实现原理 +## 实现原理 -`profile` 的实现依赖于linux中的perf_event。在注入ebpf程序前,`profile` 工具会先将 perf_event +`profile` 的实现依赖于linux中的perf_event。在注入ebpf程序前,`profile` 工具会先将 perf_event 注册好。 + ```c static int open_and_attach_perf_event(int freq, struct bpf_program *prog, - struct bpf_link *links[]) + struct bpf_link *links[]) { - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .freq = env.freq, - .sample_freq = env.sample_freq, - .config = PERF_COUNT_SW_CPU_CLOCK, - }; - int i, fd; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .freq = env.freq, + .sample_freq = env.sample_freq, + .config = PERF_COUNT_SW_CPU_CLOCK, + }; + int i, fd; - for (i = 0; i < nr_cpus; i++) { - if (env.cpu != -1 && env.cpu != i) - continue; + for (i = 0; i < nr_cpus; i++) { + if (env.cpu != -1 && env.cpu != i) + continue; - fd = syscall(__NR_perf_event_open, &attr, -1, i, -1, 0); - if (fd < 0) { - /* Ignore CPU that is offline */ - if (errno == ENODEV) - continue; - fprintf(stderr, "failed to init perf sampling: %s\n", - strerror(errno)); - return -1; - } - links[i] = bpf_program__attach_perf_event(prog, fd); - if (!links[i]) { - fprintf(stderr, "failed to attach perf event on cpu: " - "%d\n", i); - links[i] = NULL; - close(fd); - return -1; - } - } + fd = syscall(__NR_perf_event_open, &attr, -1, i, -1, 0); + if (fd < 0) { + /* Ignore CPU that is offline */ + if (errno == ENODEV) + continue; + fprintf(stderr, "failed to init perf sampling: %s\n", + strerror(errno)); + return -1; + } + links[i] = bpf_program__attach_perf_event(prog, fd); + if (!links[i]) { + fprintf(stderr, "failed to attach perf event on cpu: " + "%d\n", i); + links[i] = NULL; + close(fd); + return -1; + } + } - return 0; + return 0; } ``` + 其ebpf程序实现逻辑是对程序的堆栈进行定时采样,从而捕获程序的执行流程。 + ```c SEC("perf_event") int profile(void *ctx) { - int pid = bpf_get_current_pid_tgid() >> 32; - int cpu_id = bpf_get_smp_processor_id(); - struct stacktrace_event *event; - int cp; + int pid = bpf_get_current_pid_tgid() >> 32; + int cpu_id = bpf_get_smp_processor_id(); + struct stacktrace_event *event; + int cp; - event = bpf_ringbuf_reserve(&events, sizeof(*event), 0); - if (!event) - return 1; + event = bpf_ringbuf_reserve(&events, sizeof(*event), 0); + if (!event) + return 1; - event->pid = pid; - event->cpu_id = cpu_id; + event->pid = pid; + event->cpu_id = cpu_id; - if (bpf_get_current_comm(event->comm, sizeof(event->comm))) - event->comm[0] = 0; + if (bpf_get_current_comm(event->comm, sizeof(event->comm))) + event->comm[0] = 0; - event->kstack_sz = bpf_get_stack(ctx, event->kstack, sizeof(event->kstack), 0); + event->kstack_sz = bpf_get_stack(ctx, event->kstack, sizeof(event->kstack), 0); - event->ustack_sz = bpf_get_stack(ctx, event->ustack, sizeof(event->ustack), BPF_F_USER_STACK); + event->ustack_sz = bpf_get_stack(ctx, event->ustack, sizeof(event->ustack), BPF_F_USER_STACK); - bpf_ringbuf_submit(event, 0); + bpf_ringbuf_submit(event, 0); - return 0; + return 0; } ``` + 通过这种方式,它可以根据用户指令,简单的决定追踪用户态层面的执行流程或是内核态层面的执行流程。 -### 编译运行 + +## 编译运行 + ```console $ git clone https://github.com/libbpf/libbpf-bootstrap.git --recurse-submodules $ cd examples/c @@ -105,4 +111,5 @@ Userspace: ``` ### 总结 + `profile` 实现了对程序执行流程的分析,在debug等操作中可以极大的帮助开发者提高效率。 diff --git a/src/12-profile/profile.c b/src/12-profile/profile.c new file mode 100644 index 0000000..dc65a32 --- /dev/null +++ b/src/12-profile/profile.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2022 Facebook */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "profile.skel.h" +#include "profile.h" +#include "blazesym.h" + +/* + * This function is from libbpf, but it is not a public API and can only be + * used for demonstration. We can use this here because we statically link + * against the libbpf built from submodule during build. + */ +extern int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); + +static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, + int cpu, int group_fd, unsigned long flags) +{ + int ret; + + ret = syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); + return ret; +} + +static struct blazesym *symbolizer; + +static void show_stack_trace(__u64 *stack, int stack_sz, pid_t pid) +{ + const struct blazesym_result *result; + const struct blazesym_csym *sym; + sym_src_cfg src; + int i, j; + + if (pid) { + src.src_type = SRC_T_PROCESS; + src.params.process.pid = pid; + } else { + src.src_type = SRC_T_KERNEL; + src.params.kernel.kallsyms = NULL; + src.params.kernel.kernel_image = NULL; + } + + result = blazesym_symbolize(symbolizer, &src, 1, (const uint64_t *)stack, stack_sz); + + for (i = 0; i < stack_sz; i++) { + if (!result || result->size <= i || !result->entries[i].size) { + printf(" %d [<%016llx>]\n", i, stack[i]); + continue; + } + + if (result->entries[i].size == 1) { + sym = &result->entries[i].syms[0]; + if (sym->path && sym->path[0]) { + printf(" %d [<%016llx>] %s+0x%llx %s:%ld\n", + i, stack[i], sym->symbol, + stack[i] - sym->start_address, + sym->path, sym->line_no); + } else { + printf(" %d [<%016llx>] %s+0x%llx\n", + i, stack[i], sym->symbol, + stack[i] - sym->start_address); + } + continue; + } + + printf(" %d [<%016llx>]\n", i, stack[i]); + for (j = 0; j < result->entries[i].size; j++) { + sym = &result->entries[i].syms[j]; + if (sym->path && sym->path[0]) { + printf(" %s+0x%llx %s:%ld\n", + sym->symbol, stack[i] - sym->start_address, + sym->path, sym->line_no); + } else { + printf(" %s+0x%llx\n", sym->symbol, + stack[i] - sym->start_address); + } + } + } + + blazesym_result_free(result); +} + +/* Receive events from the ring buffer. */ +static int event_handler(void *_ctx, void *data, size_t size) +{ + struct stacktrace_event *event = data; + + if (event->kstack_sz <= 0 && event->ustack_sz <= 0) + return 1; + + printf("COMM: %s (pid=%d) @ CPU %d\n", event->comm, event->pid, event->cpu_id); + + if (event->kstack_sz > 0) { + printf("Kernel:\n"); + show_stack_trace(event->kstack, event->kstack_sz / sizeof(__u64), 0); + } else { + printf("No Kernel Stack\n"); + } + + if (event->ustack_sz > 0) { + printf("Userspace:\n"); + show_stack_trace(event->ustack, event->ustack_sz / sizeof(__u64), event->pid); + } else { + printf("No Userspace Stack\n"); + } + + printf("\n"); + return 0; +} + +static void show_help(const char *progname) +{ + printf("Usage: %s [-f ] [-h]\n", progname); +} + +int main(int argc, char * const argv[]) +{ + const char *online_cpus_file = "/sys/devices/system/cpu/online"; + int freq = 1, pid = -1, cpu; + struct profile_bpf *skel = NULL; + struct perf_event_attr attr; + struct bpf_link **links = NULL; + struct ring_buffer *ring_buf = NULL; + int num_cpus, num_online_cpus; + int *pefds = NULL, pefd; + int argp, i, err = 0; + bool *online_mask = NULL; + + while ((argp = getopt(argc, argv, "hf:")) != -1) { + switch (argp) { + case 'f': + freq = atoi(optarg); + if (freq < 1) + freq = 1; + break; + + case 'h': + default: + show_help(argv[0]); + return 1; + } + } + + err = parse_cpu_mask_file(online_cpus_file, &online_mask, &num_online_cpus); + if (err) { + fprintf(stderr, "Fail to get online CPU numbers: %d\n", err); + goto cleanup; + } + + num_cpus = libbpf_num_possible_cpus(); + if (num_cpus <= 0) { + fprintf(stderr, "Fail to get the number of processors\n"); + err = -1; + goto cleanup; + } + + skel = profile_bpf__open_and_load(); + if (!skel) { + fprintf(stderr, "Fail to open and load BPF skeleton\n"); + err = -1; + goto cleanup; + } + + symbolizer = blazesym_new(); + if (!symbolizer) { + fprintf(stderr, "Fail to create a symbolizer\n"); + err = -1; + goto cleanup; + } + + /* Prepare ring buffer to receive events from the BPF program. */ + ring_buf = ring_buffer__new(bpf_map__fd(skel->maps.events), event_handler, NULL, NULL); + if (!ring_buf) { + err = -1; + goto cleanup; + } + + pefds = malloc(num_cpus * sizeof(int)); + for (i = 0; i < num_cpus; i++) { + pefds[i] = -1; + } + + links = calloc(num_cpus, sizeof(struct bpf_link *)); + + memset(&attr, 0, sizeof(attr)); + attr.type = PERF_TYPE_HARDWARE; + attr.size = sizeof(attr); + attr.config = PERF_COUNT_HW_CPU_CYCLES; + attr.sample_freq = freq; + attr.freq = 1; + + for (cpu = 0; cpu < num_cpus; cpu++) { + /* skip offline/not present CPUs */ + if (cpu >= num_online_cpus || !online_mask[cpu]) + continue; + + /* Set up performance monitoring on a CPU/Core */ + pefd = perf_event_open(&attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC); + if (pefd < 0) { + fprintf(stderr, "Fail to set up performance monitor on a CPU/Core\n"); + err = -1; + goto cleanup; + } + pefds[cpu] = pefd; + + /* Attach a BPF program on a CPU */ + links[cpu] = bpf_program__attach_perf_event(skel->progs.profile, pefd); + if (!links[cpu]) { + err = -1; + goto cleanup; + } + } + + /* Wait and receive stack traces */ + while (ring_buffer__poll(ring_buf, -1) >= 0) { + } + +cleanup: + if (links) { + for (cpu = 0; cpu < num_cpus; cpu++) + bpf_link__destroy(links[cpu]); + free(links); + } + if (pefds) { + for (i = 0; i < num_cpus; i++) { + if (pefds[i] >= 0) + close(pefds[i]); + } + free(pefds); + } + ring_buffer__free(ring_buf); + profile_bpf__destroy(skel); + blazesym_free(symbolizer); + free(online_mask); + return -err; +}