add submodule from libbpf-bootstrap

2026-02-03 18:24:27 +08:00 · 2023-05-07 00:59:20 +08:00
parent 973a051e46
commit 2a05e13578
5 changed files with 449 additions and 50 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,9 @@
+[submodule "libbpf"]
+	path = libbpf
+	url = https://github.com/libbpf/libbpf.git
+[submodule "bpftool"]
+	path = bpftool
+	url = https://github.com/libbpf/bpftool
+[submodule "blazesym"]
+	path = blazesym
+	url = https://github.com/libbpf/blazesym
--- a/1
+++ b/1
--- a/src/12-profile/Makefile
+++ b/src/12-profile/Makefile
@@ -0,0 +1,138 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+OUTPUT := .output
+CLANG ?= clang
+LIBBPF_SRC := $(abspath ../../libbpf/src)
+BPFTOOL_SRC := $(abspath ../../bpftool/src)
+LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a)
+BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool)
+BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool
+ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \
+			 | sed 's/arm.*/arm/' \
+			 | sed 's/aarch64/arm64/' \
+			 | sed 's/ppc64le/powerpc/' \
+			 | sed 's/mips.*/mips/' \
+			 | sed 's/riscv64/riscv/' \
+			 | sed 's/loongarch64/loongarch/')
+VMLINUX := ../../vmlinux/$(ARCH)/vmlinux.h
+# Use our own libbpf API headers and Linux UAPI headers distributed with
+# libbpf to avoid dependency on system-wide headers, which could be missing or
+# outdated
+INCLUDES := -I$(OUTPUT) -I../../libbpf/include/uapi -I$(dir $(VMLINUX))
+CFLAGS := -g -Wall
+ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)
+
+APPS = minimal minimal_legacy bootstrap uprobe kprobe fentry usdt sockfilter tc ksyscall
+
+CARGO ?= $(shell which cargo)
+ifeq ($(strip $(CARGO)),)
+BZS_APPS :=
+else
+BZS_APPS := profile
+APPS += $(BZS_APPS)
+# Required by libblazesym
+ALL_LDFLAGS += -lrt -ldl -lpthread -lm
+endif
+
+# Get Clang's default includes on this system. We'll explicitly add these dirs
+# to the includes list when compiling with `-target bpf` because otherwise some
+# architecture-specific dirs will be "missing" on some architectures/distros -
+# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h,
+# sys/cdefs.h etc. might be missing.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
+ifeq ($(V),1)
+	Q =
+	msg =
+else
+	Q = @
+	msg = @printf '  %-8s %s%s\n'					\
+		      "$(1)"						\
+		      "$(patsubst $(abspath $(OUTPUT))/%,%,$(2))"	\
+		      "$(if $(3), $(3))";
+	MAKEFLAGS += --no-print-directory
+endif
+
+define allow-override
+  $(if $(or $(findstring environment,$(origin $(1))),\
+            $(findstring command line,$(origin $(1)))),,\
+    $(eval $(1) = $(2)))
+endef
+
+$(call allow-override,CC,$(CROSS_COMPILE)cc)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
+
+.PHONY: all
+all: $(APPS)
+
+.PHONY: clean
+clean:
+	$(call msg,CLEAN)
+	$(Q)rm -rf $(OUTPUT) $(APPS)
+
+$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT):
+	$(call msg,MKDIR,$@)
+	$(Q)mkdir -p $@
+
+# Build libbpf
+$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf
+	$(call msg,LIB,$@)
+	$(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1		      \
+		    OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@)		      \
+		    INCLUDEDIR= LIBDIR= UAPIDIR=			      \
+		    install
+
+# Build bpftool
+$(BPFTOOL): | $(BPFTOOL_OUTPUT)
+	$(call msg,BPFTOOL,$@)
+	$(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap
+
+
+$(LIBBLAZESYM_SRC)/target/release/libblazesym.a::
+	$(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --features=cheader,dont-generate-test-files --release
+
+$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT)
+	$(call msg,LIB, $@)
+	$(Q)cp $(LIBBLAZESYM_SRC)/target/release/libblazesym.a $@
+
+$(LIBBLAZESYM_HEADER): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT)
+	$(call msg,LIB,$@)
+	$(Q)cp $(LIBBLAZESYM_SRC)/target/release/blazesym.h $@
+
+# Build BPF code
+$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL)
+	$(call msg,BPF,$@)
+	$(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH)		      \
+		     $(INCLUDES) $(CLANG_BPF_SYS_INCLUDES)		      \
+		     -c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
+	$(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
+
+# Generate BPF skeletons
+$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL)
+	$(call msg,GEN-SKEL,$@)
+	$(Q)$(BPFTOOL) gen skeleton $< > $@
+
+# Build user-space code
+$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h
+
+$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT)
+	$(call msg,CC,$@)
+	$(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
+
+$(patsubst %,$(OUTPUT)/%.o,$(BZS_APPS)): $(LIBBLAZESYM_HEADER)
+
+$(BZS_APPS): $(LIBBLAZESYM_OBJ)
+
+# Build application binary
+$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT)
+	$(call msg,BINARY,$@)
+	$(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@
+
+# delete failed targets
+.DELETE_ON_ERROR:
+
+# keep intermediate (.skel.h, .bpf.o, etc) targets
+.SECONDARY:
--- a/src/12-profile/README.md
+++ b/src/12-profile/README.md
@@ -1,83 +1,89 @@
-## eBPF 入门实践教程：编写 eBPF 程序 profile 进行性能分析
+# eBPF 入门实践教程：编写 eBPF 程序 profile 进行性能分析

-### 背景
+## 背景

 `profile` 是一款用户追踪程序执行调用流程的工具，类似于perf中的 -g 指令。但是相较于perf而言，
 `profile`的功能更为细化，它可以选择用户需要追踪的层面，比如在用户态层面进行追踪，或是在内核态进行追踪。

-### 实现原理
+## 实现原理

-`profile` 的实现依赖于linux中的perf_event。在注入ebpf程序前，`profile` 工具会先将 perf_event 
+`profile` 的实现依赖于linux中的perf_event。在注入ebpf程序前，`profile` 工具会先将 perf_event
 注册好。
+
 ```c
 static int open_and_attach_perf_event(int freq, struct bpf_program *prog,
-				      struct bpf_link *links[])
+                      struct bpf_link *links[])
 {
-	struct perf_event_attr attr = {
-		.type = PERF_TYPE_SOFTWARE,
-		.freq = env.freq,
-		.sample_freq = env.sample_freq,
-		.config = PERF_COUNT_SW_CPU_CLOCK,
-	};
-	int i, fd;
+    struct perf_event_attr attr = {
+        .type = PERF_TYPE_SOFTWARE,
+        .freq = env.freq,
+        .sample_freq = env.sample_freq,
+        .config = PERF_COUNT_SW_CPU_CLOCK,
+    };
+    int i, fd;

-	for (i = 0; i < nr_cpus; i++) {
-		if (env.cpu != -1 && env.cpu != i)
-			continue;
+    for (i = 0; i < nr_cpus; i++) {
+        if (env.cpu != -1 && env.cpu != i)
+            continue;

-		fd = syscall(__NR_perf_event_open, &attr, -1, i, -1, 0);
-		if (fd < 0) {
-			/* Ignore CPU that is offline */
-			if (errno == ENODEV)
-				continue;
-			fprintf(stderr, "failed to init perf sampling: %s\n",
-				strerror(errno));
-			return -1;
-		}
-		links[i] = bpf_program__attach_perf_event(prog, fd);
-		if (!links[i]) {
-			fprintf(stderr, "failed to attach perf event on cpu: "
-				"%d\n", i);
-			links[i] = NULL;
-			close(fd);
-			return -1;
-		}
-	}
+        fd = syscall(__NR_perf_event_open, &attr, -1, i, -1, 0);
+        if (fd < 0) {
+            /* Ignore CPU that is offline */
+            if (errno == ENODEV)
+                continue;
+            fprintf(stderr, "failed to init perf sampling: %s\n",
+                strerror(errno));
+            return -1;
+        }
+        links[i] = bpf_program__attach_perf_event(prog, fd);
+        if (!links[i]) {
+            fprintf(stderr, "failed to attach perf event on cpu: "
+                "%d\n", i);
+            links[i] = NULL;
+            close(fd);
+            return -1;
+        }
+    }

-	return 0;
+    return 0;
 }
 ```
+
 其ebpf程序实现逻辑是对程序的堆栈进行定时采样，从而捕获程序的执行流程。
+
 ```c
 SEC("perf_event")
 int profile(void *ctx)
 {
-	int pid = bpf_get_current_pid_tgid() >> 32;
-	int cpu_id = bpf_get_smp_processor_id();
-	struct stacktrace_event *event;
-	int cp;
+    int pid = bpf_get_current_pid_tgid() >> 32;
+    int cpu_id = bpf_get_smp_processor_id();
+    struct stacktrace_event *event;
+    int cp;

-	event = bpf_ringbuf_reserve(&events, sizeof(*event), 0);
-	if (!event)
-		return 1;
+    event = bpf_ringbuf_reserve(&events, sizeof(*event), 0);
+    if (!event)
+        return 1;

-	event->pid = pid;
-	event->cpu_id = cpu_id;
+    event->pid = pid;
+    event->cpu_id = cpu_id;

-	if (bpf_get_current_comm(event->comm, sizeof(event->comm)))
-		event->comm[0] = 0;
+    if (bpf_get_current_comm(event->comm, sizeof(event->comm)))
+        event->comm[0] = 0;

-	event->kstack_sz = bpf_get_stack(ctx, event->kstack, sizeof(event->kstack), 0);
+    event->kstack_sz = bpf_get_stack(ctx, event->kstack, sizeof(event->kstack), 0);

-	event->ustack_sz = bpf_get_stack(ctx, event->ustack, sizeof(event->ustack), BPF_F_USER_STACK);
+    event->ustack_sz = bpf_get_stack(ctx, event->ustack, sizeof(event->ustack), BPF_F_USER_STACK);

-	bpf_ringbuf_submit(event, 0);
+    bpf_ringbuf_submit(event, 0);

-	return 0;
+    return 0;
 }
 ```
+
 通过这种方式，它可以根据用户指令，简单的决定追踪用户态层面的执行流程或是内核态层面的执行流程。
-### 编译运行
+
+## 编译运行
+
 ```console
 $ git clone https://github.com/libbpf/libbpf-bootstrap.git --recurse-submodules 
 $ cd examples/c
@@ -105,4 +111,5 @@ Userspace:
 ```

 ### 总结
+
 `profile` 实现了对程序执行流程的分析，在debug等操作中可以极大的帮助开发者提高效率。
--- a/src/12-profile/profile.c
+++ b/src/12-profile/profile.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2022 Facebook */
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/syscall.h>
+#include <sys/sysinfo.h>
+#include <linux/perf_event.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "profile.skel.h"
+#include "profile.h"
+#include "blazesym.h"
+
+/*
+ * This function is from libbpf, but it is not a public API and can only be
+ * used for demonstration. We can use this here because we statically link
+ * against the libbpf built from submodule during build.
+ */
+extern int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
+
+static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
+			    int cpu, int group_fd, unsigned long flags)
+{
+	int ret;
+
+	ret = syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
+	return ret;
+}
+
+static struct blazesym *symbolizer;
+
+static void show_stack_trace(__u64 *stack, int stack_sz, pid_t pid)
+{
+	const struct blazesym_result *result;
+	const struct blazesym_csym *sym;
+	sym_src_cfg src;
+	int i, j;
+
+	if (pid) {
+		src.src_type = SRC_T_PROCESS;
+		src.params.process.pid = pid;
+	} else {
+		src.src_type = SRC_T_KERNEL;
+		src.params.kernel.kallsyms = NULL;
+		src.params.kernel.kernel_image = NULL;
+	}
+
+	result = blazesym_symbolize(symbolizer, &src, 1, (const uint64_t *)stack, stack_sz);
+
+	for (i = 0; i < stack_sz; i++) {
+		if (!result || result->size <= i || !result->entries[i].size) {
+			printf("  %d [<%016llx>]\n", i, stack[i]);
+			continue;
+		}
+
+		if (result->entries[i].size == 1) {
+			sym = &result->entries[i].syms[0];
+			if (sym->path && sym->path[0]) {
+				printf("  %d [<%016llx>] %s+0x%llx %s:%ld\n",
+				       i, stack[i], sym->symbol,
+				       stack[i] - sym->start_address,
+				       sym->path, sym->line_no);
+			} else {
+				printf("  %d [<%016llx>] %s+0x%llx\n",
+				       i, stack[i], sym->symbol,
+				       stack[i] - sym->start_address);
+			}
+			continue;
+		}
+
+		printf("  %d [<%016llx>]\n", i, stack[i]);
+		for (j = 0; j < result->entries[i].size; j++) {
+			sym = &result->entries[i].syms[j];
+			if (sym->path && sym->path[0]) {
+				printf("        %s+0x%llx %s:%ld\n",
+				       sym->symbol, stack[i] - sym->start_address,
+				       sym->path, sym->line_no);
+			} else {
+				printf("        %s+0x%llx\n", sym->symbol,
+				       stack[i] - sym->start_address);
+			}
+		}
+	}
+
+	blazesym_result_free(result);
+}
+
+/* Receive events from the ring buffer. */
+static int event_handler(void *_ctx, void *data, size_t size)
+{
+	struct stacktrace_event *event = data;
+
+	if (event->kstack_sz <= 0 && event->ustack_sz <= 0)
+		return 1;
+
+	printf("COMM: %s (pid=%d) @ CPU %d\n", event->comm, event->pid, event->cpu_id);
+
+	if (event->kstack_sz > 0) {
+		printf("Kernel:\n");
+		show_stack_trace(event->kstack, event->kstack_sz / sizeof(__u64), 0);
+	} else {
+		printf("No Kernel Stack\n");
+	}
+
+	if (event->ustack_sz > 0) {
+		printf("Userspace:\n");
+		show_stack_trace(event->ustack, event->ustack_sz / sizeof(__u64), event->pid);
+	} else {
+		printf("No Userspace Stack\n");
+	}
+
+	printf("\n");
+	return 0;
+}
+
+static void show_help(const char *progname)
+{
+	printf("Usage: %s [-f <frequency>] [-h]\n", progname);
+}
+
+int main(int argc, char * const argv[])
+{
+	const char *online_cpus_file = "/sys/devices/system/cpu/online";
+	int freq = 1, pid = -1, cpu;
+	struct profile_bpf *skel = NULL;
+	struct perf_event_attr attr;
+	struct bpf_link **links = NULL;
+	struct ring_buffer *ring_buf = NULL;
+	int num_cpus, num_online_cpus;
+	int *pefds = NULL, pefd;
+	int argp, i, err = 0;
+	bool *online_mask = NULL;
+
+	while ((argp = getopt(argc, argv, "hf:")) != -1) {
+		switch (argp) {
+		case 'f':
+			freq = atoi(optarg);
+			if (freq < 1)
+				freq = 1;
+			break;
+
+		case 'h':
+		default:
+			show_help(argv[0]);
+			return 1;
+		}
+	}
+
+	err = parse_cpu_mask_file(online_cpus_file, &online_mask, &num_online_cpus);
+	if (err) {
+		fprintf(stderr, "Fail to get online CPU numbers: %d\n", err);
+		goto cleanup;
+	}
+
+	num_cpus = libbpf_num_possible_cpus();
+	if (num_cpus <= 0) {
+		fprintf(stderr, "Fail to get the number of processors\n");
+		err = -1;
+		goto cleanup;
+	}
+
+	skel = profile_bpf__open_and_load();
+	if (!skel) {
+		fprintf(stderr, "Fail to open and load BPF skeleton\n");
+		err = -1;
+		goto cleanup;
+	}
+
+	symbolizer = blazesym_new();
+	if (!symbolizer) {
+		fprintf(stderr, "Fail to create a symbolizer\n");
+		err = -1;
+		goto cleanup;
+	}
+
+	/* Prepare ring buffer to receive events from the BPF program. */
+	ring_buf = ring_buffer__new(bpf_map__fd(skel->maps.events), event_handler, NULL, NULL);
+	if (!ring_buf) {
+		err = -1;
+		goto cleanup;
+	}
+
+	pefds = malloc(num_cpus * sizeof(int));
+	for (i = 0; i < num_cpus; i++) {
+		pefds[i] = -1;
+	}
+
+	links = calloc(num_cpus, sizeof(struct bpf_link *));
+
+	memset(&attr, 0, sizeof(attr));
+	attr.type = PERF_TYPE_HARDWARE;
+	attr.size = sizeof(attr);
+	attr.config = PERF_COUNT_HW_CPU_CYCLES;
+	attr.sample_freq = freq;
+	attr.freq = 1;
+
+	for (cpu = 0; cpu < num_cpus; cpu++) {
+		/* skip offline/not present CPUs */
+		if (cpu >= num_online_cpus || !online_mask[cpu])
+			continue;
+
+		/* Set up performance monitoring on a CPU/Core */
+		pefd = perf_event_open(&attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
+		if (pefd < 0) {
+			fprintf(stderr, "Fail to set up performance monitor on a CPU/Core\n");
+			err = -1;
+			goto cleanup;
+		}
+		pefds[cpu] = pefd;
+
+		/* Attach a BPF program on a CPU */
+		links[cpu] = bpf_program__attach_perf_event(skel->progs.profile, pefd);
+		if (!links[cpu]) {
+			err = -1;
+			goto cleanup;
+		}
+	}
+	
+	/* Wait and receive stack traces */
+	while (ring_buffer__poll(ring_buf, -1) >= 0) {
+	}
+
+cleanup:
+	if (links) {
+		for (cpu = 0; cpu < num_cpus; cpu++)
+			bpf_link__destroy(links[cpu]);
+		free(links);
+	}
+	if (pefds) {
+		for (i = 0; i < num_cpus; i++) {
+			if (pefds[i] >= 0)
+				close(pefds[i]);
+		}
+		free(pefds);
+	}
+	ring_buffer__free(ring_buf);
+	profile_bpf__destroy(skel);
+	blazesym_free(symbolizer);
+	free(online_mask);
+	return -err;
+}