add javagc and memleak code

This commit is contained in:
yunwei37
2023-05-31 01:31:24 +08:00
committed by 云微
parent ad567ea830
commit 26823beedd
15 changed files with 3334 additions and 94 deletions

8
src/15-javagc/.gitignore vendored Normal file
View File

@@ -0,0 +1,8 @@
.vscode
package.json
*.o
*.skel.json
*.skel.yaml
package.yaml
ecli
javagc

141
src/15-javagc/Makefile Normal file
View File

@@ -0,0 +1,141 @@
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
OUTPUT := .output
CLANG ?= clang
LIBBPF_SRC := $(abspath ../../libbpf/src)
BPFTOOL_SRC := $(abspath ../../bpftool/src)
LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a)
BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool)
BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool
LIBBLAZESYM_SRC := $(abspath ../../blazesym/)
LIBBLAZESYM_OBJ := $(abspath $(OUTPUT)/libblazesym.a)
LIBBLAZESYM_HEADER := $(abspath $(OUTPUT)/blazesym.h)
ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \
| sed 's/arm.*/arm/' \
| sed 's/aarch64/arm64/' \
| sed 's/ppc64le/powerpc/' \
| sed 's/mips.*/mips/' \
| sed 's/riscv64/riscv/' \
| sed 's/loongarch64/loongarch/')
VMLINUX := ../../vmlinux/$(ARCH)/vmlinux.h
# Use our own libbpf API headers and Linux UAPI headers distributed with
# libbpf to avoid dependency on system-wide headers, which could be missing or
# outdated
INCLUDES := -I$(OUTPUT) -I../../libbpf/include/uapi -I$(dir $(VMLINUX))
CFLAGS := -g -Wall
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)
APPS = javagc # minimal minimal_legacy uprobe kprobe fentry usdt sockfilter tc ksyscall
CARGO ?= $(shell which cargo)
ifeq ($(strip $(CARGO)),)
BZS_APPS :=
else
BZS_APPS := # profile
APPS += $(BZS_APPS)
# Required by libblazesym
ALL_LDFLAGS += -lrt -ldl -lpthread -lm
endif
# Get Clang's default includes on this system. We'll explicitly add these dirs
# to the includes list when compiling with `-target bpf` because otherwise some
# architecture-specific dirs will be "missing" on some architectures/distros -
# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h,
# sys/cdefs.h etc. might be missing.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
ifeq ($(V),1)
Q =
msg =
else
Q = @
msg = @printf ' %-8s %s%s\n' \
"$(1)" \
"$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \
"$(if $(3), $(3))";
MAKEFLAGS += --no-print-directory
endif
define allow-override
$(if $(or $(findstring environment,$(origin $(1))),\
$(findstring command line,$(origin $(1)))),,\
$(eval $(1) = $(2)))
endef
$(call allow-override,CC,$(CROSS_COMPILE)cc)
$(call allow-override,LD,$(CROSS_COMPILE)ld)
.PHONY: all
all: $(APPS)
.PHONY: clean
clean:
$(call msg,CLEAN)
$(Q)rm -rf $(OUTPUT) $(APPS)
$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT):
$(call msg,MKDIR,$@)
$(Q)mkdir -p $@
# Build libbpf
$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf
$(call msg,LIB,$@)
$(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \
OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \
INCLUDEDIR= LIBDIR= UAPIDIR= \
install
# Build bpftool
$(BPFTOOL): | $(BPFTOOL_OUTPUT)
$(call msg,BPFTOOL,$@)
$(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap
$(LIBBLAZESYM_SRC)/target/release/libblazesym.a::
$(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --features=cheader,dont-generate-test-files --release
$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT)
$(call msg,LIB, $@)
$(Q)cp $(LIBBLAZESYM_SRC)/target/release/libblazesym.a $@
$(LIBBLAZESYM_HEADER): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT)
$(call msg,LIB,$@)
$(Q)cp $(LIBBLAZESYM_SRC)/target/release/blazesym.h $@
# Build BPF code
$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL)
$(call msg,BPF,$@)
$(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \
$(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \
-c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
$(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
# Generate BPF skeletons
$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL)
$(call msg,GEN-SKEL,$@)
$(Q)$(BPFTOOL) gen skeleton $< > $@
# Build user-space code
$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h
$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT)
$(call msg,CC,$@)
$(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
$(patsubst %,$(OUTPUT)/%.o,$(BZS_APPS)): $(LIBBLAZESYM_HEADER)
$(BZS_APPS): $(LIBBLAZESYM_OBJ)
# Build application binary
$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT)
$(call msg,BINARY,$@)
$(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@
# delete failed targets
.DELETE_ON_ERROR:
# keep intermediate (.skel.h, .bpf.o, etc) targets
.SECONDARY:

View File

@@ -1,3 +1,35 @@
# eBPF 入门实践教程:使用 usdt 捕获用户态 Java GC 事件耗时
## usdt 介绍
TODO
## java GC
TODO
## 安装依赖
构建示例需要 clang、libelf 和 zlib。包名在不同的发行版中可能会有所不同。
在 Ubuntu/Debian 上,你需要执行以下命令:
```shell
sudo apt install clang libelf1 libelf-dev zlib1g-dev
```
在 CentOS/Fedora 上,你需要执行以下命令:
```shell
sudo dnf install clang elfutils-libelf elfutils-libelf-devel zlib-devel
```
## 编译运行
编译运行上述代码:
TODO
## 总结
TODO

View File

@@ -0,0 +1,81 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (c) 2022 Chen Tao */
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include <bpf/usdt.bpf.h>
#include "javagc.h"
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 100);
__type(key, uint32_t);
__type(value, struct data_t);
} data_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__type(key, int);
__type(value, int);
} perf_map SEC(".maps");
__u32 time;
static int gc_start(struct pt_regs *ctx)
{
struct data_t data = {};
data.cpu = bpf_get_smp_processor_id();
data.pid = bpf_get_current_pid_tgid() >> 32;
data.ts = bpf_ktime_get_ns();
bpf_map_update_elem(&data_map, &data.pid, &data, 0);
return 0;
}
static int gc_end(struct pt_regs *ctx)
{
struct data_t data = {};
struct data_t *p;
__u32 val;
data.cpu = bpf_get_smp_processor_id();
data.pid = bpf_get_current_pid_tgid() >> 32;
data.ts = bpf_ktime_get_ns();
p = bpf_map_lookup_elem(&data_map, &data.pid);
if (!p)
return 0;
val = data.ts - p->ts;
if (val > time) {
data.ts = val;
bpf_perf_event_output(ctx, &perf_map, BPF_F_CURRENT_CPU, &data, sizeof(data));
}
bpf_map_delete_elem(&data_map, &data.pid);
return 0;
}
SEC("usdt")
int handle_gc_start(struct pt_regs *ctx)
{
return gc_start(ctx);
}
SEC("usdt")
int handle_gc_end(struct pt_regs *ctx)
{
return gc_end(ctx);
}
SEC("usdt")
int handle_mem_pool_gc_start(struct pt_regs *ctx)
{
return gc_start(ctx);
}
SEC("usdt")
int handle_mem_pool_gc_end(struct pt_regs *ctx)
{
return gc_end(ctx);
}
char LICENSE[] SEC("license") = "Dual BSD/GPL";

243
src/15-javagc/javagc.c Normal file
View File

@@ -0,0 +1,243 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/*
* Copyright (c) 2022 Chen Tao
* Based on ugc from BCC by Sasha Goldshtein
* Create: Wed Jun 29 16:00:19 2022
*/
#include <stdio.h>
#include <ctype.h>
#include <argp.h>
#include <signal.h>
#include <unistd.h>
#include <time.h>
#include <sys/resource.h>
#include <bpf/libbpf.h>
#include <errno.h>
#include "javagc.skel.h"
#include "javagc.h"
#define BINARY_PATH_SIZE (256)
#define PERF_BUFFER_PAGES (32)
#define PERF_POLL_TIMEOUT_MS (200)
static struct env {
pid_t pid;
int time;
bool exiting;
bool verbose;
} env = {
.pid = -1,
.time = 1000,
.exiting = false,
.verbose = false,
};
const char *argp_program_version = "javagc 0.1";
const char *argp_program_bug_address =
"https://github.com/iovisor/bcc/tree/master/libbpf-tools";
const char argp_program_doc[] =
"Monitor javagc time cost.\n"
"\n"
"USAGE: javagc [--help] [-p PID] [-t GC time]\n"
"\n"
"EXAMPLES:\n"
"javagc -p 185 # trace PID 185 only\n"
"javagc -p 185 -t 100 # trace PID 185 java gc time beyond 100us\n";
static const struct argp_option opts[] = {
{ "pid", 'p', "PID", 0, "Trace this PID only" },
{ "time", 't', "TIME", 0, "Java gc time" },
{ "verbose", 'v', NULL, 0, "Verbose debug output" },
{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
{},
};
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
int err = 0;
switch (key) {
case 'h':
argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
break;
case 'v':
env.verbose = true;
break;
case 'p':
errno = 0;
env.pid = strtol(arg, NULL, 10);
if (errno) {
err = errno;
fprintf(stderr, "invalid PID: %s\n", arg);
argp_usage(state);
}
break;
case 't':
errno = 0;
env.time = strtol(arg, NULL, 10);
if (errno) {
err = errno;
fprintf(stderr, "invalid time: %s\n", arg);
argp_usage(state);
}
break;
default:
return ARGP_ERR_UNKNOWN;
}
return err;
}
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
{
if (level == LIBBPF_DEBUG && ! env.verbose)
return 0;
return vfprintf(stderr, format, args);
}
static void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
{
struct data_t *e = (struct data_t *)data;
struct tm *tm = NULL;
char ts[16];
time_t t;
time(&t);
tm = localtime(&t);
strftime(ts, sizeof(ts), "%H:%M:%S", tm);
printf("%-8s %-7d %-7d %-7lld\n", ts, e->cpu, e->pid, e->ts/1000);
}
static void handle_lost_events(void *ctx, int cpu, __u64 data_sz)
{
printf("lost data\n");
}
static void sig_handler(int sig)
{
env.exiting = true;
}
static int get_jvmso_path(char *path)
{
char mode[16], line[128], buf[64];
size_t seg_start, seg_end, seg_off;
FILE *f;
int i = 0;
sprintf(buf, "/proc/%d/maps", env.pid);
f = fopen(buf, "r");
if (!f)
return -1;
while (fscanf(f, "%zx-%zx %s %zx %*s %*d%[^\n]\n",
&seg_start, &seg_end, mode, &seg_off, line) == 5) {
i = 0;
while (isblank(line[i]))
i++;
if (strstr(line + i, "libjvm.so")) {
break;
}
}
strcpy(path, line + i);
fclose(f);
return 0;
}
int main(int argc, char **argv)
{
static const struct argp argp = {
.options = opts,
.parser = parse_arg,
.doc = argp_program_doc,
};
char binary_path[BINARY_PATH_SIZE] = {0};
struct javagc_bpf *skel = NULL;
int err;
struct perf_buffer *pb = NULL;
err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
if (err)
return err;
/*
* libbpf will auto load the so if it in /usr/lib64 /usr/lib etc,
* but the jvmso not there.
*/
err = get_jvmso_path(binary_path);
if (err)
return err;
libbpf_set_print(libbpf_print_fn);
skel = javagc_bpf__open();
if (!skel) {
fprintf(stderr, "Failed to open BPF skeleton\n");
return 1;
}
skel->bss->time = env.time * 1000;
err = javagc_bpf__load(skel);
if (err) {
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
goto cleanup;
}
skel->links.handle_mem_pool_gc_start = bpf_program__attach_usdt(skel->progs.handle_gc_start, env.pid,
binary_path, "hotspot", "mem__pool__gc__begin", NULL);
if (!skel->links.handle_mem_pool_gc_start) {
err = errno;
fprintf(stderr, "attach usdt mem__pool__gc__begin failed: %s\n", strerror(err));
goto cleanup;
}
skel->links.handle_mem_pool_gc_end = bpf_program__attach_usdt(skel->progs.handle_gc_end, env.pid,
binary_path, "hotspot", "mem__pool__gc__end", NULL);
if (!skel->links.handle_mem_pool_gc_end) {
err = errno;
fprintf(stderr, "attach usdt mem__pool__gc__end failed: %s\n", strerror(err));
goto cleanup;
}
skel->links.handle_gc_start = bpf_program__attach_usdt(skel->progs.handle_gc_start, env.pid,
binary_path, "hotspot", "gc__begin", NULL);
if (!skel->links.handle_gc_start) {
err = errno;
fprintf(stderr, "attach usdt gc__begin failed: %s\n", strerror(err));
goto cleanup;
}
skel->links.handle_gc_end = bpf_program__attach_usdt(skel->progs.handle_gc_end, env.pid,
binary_path, "hotspot", "gc__end", NULL);
if (!skel->links.handle_gc_end) {
err = errno;
fprintf(stderr, "attach usdt gc__end failed: %s\n", strerror(err));
goto cleanup;
}
signal(SIGINT, sig_handler);
printf("Tracing javagc time... Hit Ctrl-C to end.\n");
printf("%-8s %-7s %-7s %-7s\n",
"TIME", "CPU", "PID", "GC TIME");
pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_map), PERF_BUFFER_PAGES,
handle_event, handle_lost_events, NULL, NULL);
while (!env.exiting) {
err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS);
if (err < 0 && err != -EINTR) {
fprintf(stderr, "error polling perf buffer: %s\n", strerror(-err));
goto cleanup;
}
/* reset err to return 0 if exiting */
err = 0;
}
cleanup:
perf_buffer__free(pb);
javagc_bpf__destroy(skel);
return err != 0;
}

12
src/15-javagc/javagc.h Normal file
View File

@@ -0,0 +1,12 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (c) 2022 Chen Tao */
#ifndef __JAVAGC_H
#define __JAVAGC_H
struct data_t {
__u32 cpu;
__u32 pid;
__u64 ts;
};
#endif /* __JAVAGC_H */

8
src/16-memleak/.gitignore vendored Normal file
View File

@@ -0,0 +1,8 @@
.vscode
package.json
*.o
*.skel.json
*.skel.yaml
package.yaml
ecli
memleak

141
src/16-memleak/Makefile Normal file
View File

@@ -0,0 +1,141 @@
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
OUTPUT := .output
CLANG ?= clang
LIBBPF_SRC := $(abspath ../../libbpf/src)
BPFTOOL_SRC := $(abspath ../../bpftool/src)
LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a)
BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool)
BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool
LIBBLAZESYM_SRC := $(abspath ../../blazesym/)
LIBBLAZESYM_OBJ := $(abspath $(OUTPUT)/libblazesym.a)
LIBBLAZESYM_HEADER := $(abspath $(OUTPUT)/blazesym.h)
ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \
| sed 's/arm.*/arm/' \
| sed 's/aarch64/arm64/' \
| sed 's/ppc64le/powerpc/' \
| sed 's/mips.*/mips/' \
| sed 's/riscv64/riscv/' \
| sed 's/loongarch64/loongarch/')
VMLINUX := ../../vmlinux/$(ARCH)/vmlinux.h
# Use our own libbpf API headers and Linux UAPI headers distributed with
# libbpf to avoid dependency on system-wide headers, which could be missing or
# outdated
INCLUDES := -I$(OUTPUT) -I../../libbpf/include/uapi -I$(dir $(VMLINUX))
CFLAGS := -g -Wall
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)
APPS = memleak # minimal minimal_legacy uprobe kprobe fentry usdt sockfilter tc ksyscall
CARGO ?= $(shell which cargo)
ifeq ($(strip $(CARGO)),)
BZS_APPS :=
else
BZS_APPS := # profile
APPS += $(BZS_APPS)
# Required by libblazesym
ALL_LDFLAGS += -lrt -ldl -lpthread -lm
endif
# Get Clang's default includes on this system. We'll explicitly add these dirs
# to the includes list when compiling with `-target bpf` because otherwise some
# architecture-specific dirs will be "missing" on some architectures/distros -
# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h,
# sys/cdefs.h etc. might be missing.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
ifeq ($(V),1)
Q =
msg =
else
Q = @
msg = @printf ' %-8s %s%s\n' \
"$(1)" \
"$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \
"$(if $(3), $(3))";
MAKEFLAGS += --no-print-directory
endif
define allow-override
$(if $(or $(findstring environment,$(origin $(1))),\
$(findstring command line,$(origin $(1)))),,\
$(eval $(1) = $(2)))
endef
$(call allow-override,CC,$(CROSS_COMPILE)cc)
$(call allow-override,LD,$(CROSS_COMPILE)ld)
.PHONY: all
all: $(APPS)
.PHONY: clean
clean:
$(call msg,CLEAN)
$(Q)rm -rf $(OUTPUT) $(APPS)
$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT):
$(call msg,MKDIR,$@)
$(Q)mkdir -p $@
# Build libbpf
$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf
$(call msg,LIB,$@)
$(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \
OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \
INCLUDEDIR= LIBDIR= UAPIDIR= \
install
# Build bpftool
$(BPFTOOL): | $(BPFTOOL_OUTPUT)
$(call msg,BPFTOOL,$@)
$(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap
$(LIBBLAZESYM_SRC)/target/release/libblazesym.a::
$(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --features=cheader,dont-generate-test-files --release
$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT)
$(call msg,LIB, $@)
$(Q)cp $(LIBBLAZESYM_SRC)/target/release/libblazesym.a $@
$(LIBBLAZESYM_HEADER): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT)
$(call msg,LIB,$@)
$(Q)cp $(LIBBLAZESYM_SRC)/target/release/blazesym.h $@
# Build BPF code
$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL)
$(call msg,BPF,$@)
$(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \
$(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \
-c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
$(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
# Generate BPF skeletons
$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL)
$(call msg,GEN-SKEL,$@)
$(Q)$(BPFTOOL) gen skeleton $< > $@
# Build user-space code
$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h
$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT)
$(call msg,CC,$@)
$(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
$(patsubst %,$(OUTPUT)/%.o,$(BZS_APPS)): $(LIBBLAZESYM_HEADER)
$(BZS_APPS): $(LIBBLAZESYM_OBJ)
# Build application binary
$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT)
$(call msg,BINARY,$@)
$(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@
# delete failed targets
.DELETE_ON_ERROR:
# keep intermediate (.skel.h, .bpf.o, etc) targets
.SECONDARY:

View File

@@ -18,53 +18,55 @@
```c
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, pid_t);
__type(value, u64);
__uint(max_entries, 10240);
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, pid_t);
__type(value, u64);
__uint(max_entries, 10240);
} sizes SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u64); /* address */
__type(value, struct alloc_info);
__uint(max_entries, ALLOCS_MAX_ENTRIES);
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u64); /* address */
__type(value, struct alloc_info);
__uint(max_entries, ALLOCS_MAX_ENTRIES);
} allocs SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u64); /* stack id */
__type(value, union combined_alloc_info);
__uint(max_entries, COMBINED_ALLOCS_MAX_ENTRIES);
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u64); /* stack id */
__type(value, union combined_alloc_info);
__uint(max_entries, COMBINED_ALLOCS_MAX_ENTRIES);
} combined_allocs SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u64);
__type(value, u64);
__uint(max_entries, 10240);
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u64);
__type(value, u64);
__uint(max_entries, 10240);
} memptrs SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__type(key, u32);
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__type(key, u32);
} stack_traces SEC(".maps");
struct alloc_info {
__u64 size;
__u64 timestamp_ns;
int stack_id;
__u64 size;
__u64 timestamp_ns;
int stack_id;
};
union combined_alloc_info {
struct {
__u64 total_size : 40;
__u64 number_of_allocs : 24;
};
__u64 bits;
struct {
__u64 total_size : 40;
__u64 number_of_allocs : 24;
};
__u64 bits;
};
```
这段代码定义了memleak工具中使用的5个BPF Map
+ sizes用于记录程序中每个内存分配请求的大小
+ allocs用于跟踪每个内存分配请求的详细信息包括请求的大小、堆栈信息等
+ combined_allocs的键是堆栈的唯一标识符(stack id)值是一个combined_alloc_info联合体用于记录该堆栈的内存分配总大小和内存分配数量
@@ -76,29 +78,30 @@ union combined_alloc_info {
```c
static int gen_alloc_enter(size_t size)
{
if (size < min_size || size > max_size)
return 0;
if (size < min_size || size > max_size)
return 0;
if (sample_rate > 1) {
if (bpf_ktime_get_ns() % sample_rate != 0)
return 0;
}
if (sample_rate > 1) {
if (bpf_ktime_get_ns() % sample_rate != 0)
return 0;
}
const pid_t pid = bpf_get_current_pid_tgid() >> 32;
bpf_map_update_elem(&sizes, &pid, &size, BPF_ANY);
const pid_t pid = bpf_get_current_pid_tgid() >> 32;
bpf_map_update_elem(&sizes, &pid, &size, BPF_ANY);
if (trace_all)
bpf_printk("alloc entered, size = %lu\n", size);
if (trace_all)
bpf_printk("alloc entered, size = %lu\n", size);
return 0;
return 0;
}
SEC("uprobe")
int BPF_KPROBE(malloc_enter, size_t size)
{
return gen_alloc_enter(size);
return gen_alloc_enter(size);
}
```
这个函数用于处理内存分配请求的进入事件。它会首先检查内存分配请求的大小是否在指定的范围内如果不在范围内则直接返回0表示不处理该事件。如果启用了采样率(sample_rate > 1)则该函数会采样内存分配请求的进入事件。如果当前时间戳不是采样周期的倍数则也会直接返回0表示不处理该事件。接下来该函数会获取当前线程的PID并将其存储在pid变量中。然后它会将当前线程的pid和请求的内存分配大小存储在sizes map中以便后续收集和分析内存分配信息。如果开启了跟踪模式(trace_all)该函数会通过bpf_printk打印日志信息以便用户实时监控内存分配的情况。
最后定义了BPF_KPROBE(malloc_enter, size_t size)它会在malloc函数被调用时被BPF uprobe拦截执行并通过gen_alloc_enter来记录内存分配大小。
@@ -106,59 +109,59 @@ int BPF_KPROBE(malloc_enter, size_t size)
```c
static void update_statistics_add(u64 stack_id, u64 sz)
{
union combined_alloc_info *existing_cinfo;
union combined_alloc_info *existing_cinfo;
existing_cinfo = bpf_map_lookup_or_try_init(&combined_allocs, &stack_id, &initial_cinfo);
if (!existing_cinfo)
return;
existing_cinfo = bpf_map_lookup_or_try_init(&combined_allocs, &stack_id, &initial_cinfo);
if (!existing_cinfo)
return;
const union combined_alloc_info incremental_cinfo = {
.total_size = sz,
.number_of_allocs = 1
};
const union combined_alloc_info incremental_cinfo = {
.total_size = sz,
.number_of_allocs = 1
};
__sync_fetch_and_add(&existing_cinfo->bits, incremental_cinfo.bits);
__sync_fetch_and_add(&existing_cinfo->bits, incremental_cinfo.bits);
}
static int gen_alloc_exit2(void *ctx, u64 address)
{
const pid_t pid = bpf_get_current_pid_tgid() >> 32;
struct alloc_info info;
const pid_t pid = bpf_get_current_pid_tgid() >> 32;
struct alloc_info info;
const u64* size = bpf_map_lookup_elem(&sizes, &pid);
if (!size)
return 0; // missed alloc entry
const u64* size = bpf_map_lookup_elem(&sizes, &pid);
if (!size)
return 0; // missed alloc entry
__builtin_memset(&info, 0, sizeof(info));
__builtin_memset(&info, 0, sizeof(info));
info.size = *size;
bpf_map_delete_elem(&sizes, &pid);
info.size = *size;
bpf_map_delete_elem(&sizes, &pid);
if (address != 0) {
info.timestamp_ns = bpf_ktime_get_ns();
if (address != 0) {
info.timestamp_ns = bpf_ktime_get_ns();
info.stack_id = bpf_get_stackid(ctx, &stack_traces, stack_flags);
info.stack_id = bpf_get_stackid(ctx, &stack_traces, stack_flags);
bpf_map_update_elem(&allocs, &address, &info, BPF_ANY);
bpf_map_update_elem(&allocs, &address, &info, BPF_ANY);
update_statistics_add(info.stack_id, info.size);
}
update_statistics_add(info.stack_id, info.size);
}
if (trace_all) {
bpf_printk("alloc exited, size = %lu, result = %lx\n",
info.size, address);
}
if (trace_all) {
bpf_printk("alloc exited, size = %lu, result = %lx\n",
info.size, address);
}
return 0;
return 0;
}
static int gen_alloc_exit(struct pt_regs *ctx)
{
return gen_alloc_exit2(ctx, PT_REGS_RC(ctx));
return gen_alloc_exit2(ctx, PT_REGS_RC(ctx));
}
SEC("uretprobe")
int BPF_KRETPROBE(malloc_exit)
{
return gen_alloc_exit(ctx);
return gen_alloc_exit(ctx);
}
```
@@ -175,51 +178,53 @@ update_statistics_add函数的主要作用是更新内存分配的统计信息
在gen_alloc_exit函数中将ctx参数传递给gen_alloc_exit2函数并将它的返回值作为自己的返回值。这里使用了PT_REGS_RC宏获取函数返回值。
最后定义的BPF_KRETPROBE(malloc_exit)是一个kretprobe类型的函数用于在malloc函数返回时执行。并调用gen_alloc_exit函数跟踪内存分配和释放的请求。
```c
static void update_statistics_del(u64 stack_id, u64 sz)
{
union combined_alloc_info *existing_cinfo;
union combined_alloc_info *existing_cinfo;
existing_cinfo = bpf_map_lookup_elem(&combined_allocs, &stack_id);
if (!existing_cinfo) {
bpf_printk("failed to lookup combined allocs\n");
existing_cinfo = bpf_map_lookup_elem(&combined_allocs, &stack_id);
if (!existing_cinfo) {
bpf_printk("failed to lookup combined allocs\n");
return;
}
return;
}
const union combined_alloc_info decremental_cinfo = {
.total_size = sz,
.number_of_allocs = 1
};
const union combined_alloc_info decremental_cinfo = {
.total_size = sz,
.number_of_allocs = 1
};
__sync_fetch_and_sub(&existing_cinfo->bits, decremental_cinfo.bits);
__sync_fetch_and_sub(&existing_cinfo->bits, decremental_cinfo.bits);
}
static int gen_free_enter(const void *address)
{
const u64 addr = (u64)address;
const u64 addr = (u64)address;
const struct alloc_info *info = bpf_map_lookup_elem(&allocs, &addr);
if (!info)
return 0;
const struct alloc_info *info = bpf_map_lookup_elem(&allocs, &addr);
if (!info)
return 0;
bpf_map_delete_elem(&allocs, &addr);
update_statistics_del(info->stack_id, info->size);
bpf_map_delete_elem(&allocs, &addr);
update_statistics_del(info->stack_id, info->size);
if (trace_all) {
bpf_printk("free entered, address = %lx, size = %lu\n",
address, info->size);
}
if (trace_all) {
bpf_printk("free entered, address = %lx, size = %lu\n",
address, info->size);
}
return 0;
return 0;
}
SEC("uprobe")
int BPF_KPROBE(free_enter, void *address)
{
return gen_free_enter(address);
return gen_free_enter(address);
}
```
gen_free_enter函数接收一个地址参数该函数首先使用allocs map查找该地址对应的内存分配信息。如果未找到则表示该地址没有被分配该函数返回0。如果找到了对应的内存分配信息则使用bpf_map_delete_elem从allocs map中删除该信息。
接下来调用update_statistics_del函数用于更新内存分配的统计信息它接收堆栈ID和内存块大小作为参数。首先在combined_allocs map中查找堆栈ID对应的内存分配统计信息。如果没有找到则输出一条日志表示查找失败并且函数直接返回。如果找到了对应的内存分配统计信息则使用原子操作从内存分配统计信息中减去该内存块大小和1表示减少了1个内存块。这是因为堆栈ID对应的内存块数量减少了1而堆栈ID对应的内存块总大小也减少了该内存块的大小。
@@ -253,4 +258,4 @@ Tracing outstanding memory allocs... Hit Ctrl-C to end
memleak是一个内存泄漏监控工具可以用来跟踪内存分配和释放时间对应的调用栈信息。随着时间的推移这个工具可以显示长期不被释放的内存。
这份代码来自于https://github.com/iovisor/bcc/blob/master/libbpf-tools/memleak.bpf.c
这份代码来自于<https://github.com/iovisor/bcc/blob/master/libbpf-tools/memleak.bpf.c>

View File

@@ -0,0 +1,169 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (c) 2021 Hengqi Chen */
#ifndef __CORE_FIXES_BPF_H
#define __CORE_FIXES_BPF_H
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
/**
* commit 2f064a59a1 ("sched: Change task_struct::state") changes
* the name of task_struct::state to task_struct::__state
* see:
* https://github.com/torvalds/linux/commit/2f064a59a1
*/
struct task_struct___o {
volatile long int state;
} __attribute__((preserve_access_index));
struct task_struct___x {
unsigned int __state;
} __attribute__((preserve_access_index));
static __always_inline __s64 get_task_state(void *task)
{
struct task_struct___x *t = task;
if (bpf_core_field_exists(t->__state))
return BPF_CORE_READ(t, __state);
return BPF_CORE_READ((struct task_struct___o *)task, state);
}
/**
* commit 309dca309fc3 ("block: store a block_device pointer in struct bio")
* adds a new member bi_bdev which is a pointer to struct block_device
* see:
* https://github.com/torvalds/linux/commit/309dca309fc3
*/
struct bio___o {
struct gendisk *bi_disk;
} __attribute__((preserve_access_index));
struct bio___x {
struct block_device *bi_bdev;
} __attribute__((preserve_access_index));
static __always_inline struct gendisk *get_gendisk(void *bio)
{
struct bio___x *b = bio;
if (bpf_core_field_exists(b->bi_bdev))
return BPF_CORE_READ(b, bi_bdev, bd_disk);
return BPF_CORE_READ((struct bio___o *)bio, bi_disk);
}
/**
* commit d5869fdc189f ("block: introduce block_rq_error tracepoint")
* adds a new tracepoint block_rq_error and it shares the same arguments
* with tracepoint block_rq_complete. As a result, the kernel BTF now has
* a `struct trace_event_raw_block_rq_completion` instead of
* `struct trace_event_raw_block_rq_complete`.
* see:
* https://github.com/torvalds/linux/commit/d5869fdc189f
*/
struct trace_event_raw_block_rq_complete___x {
dev_t dev;
sector_t sector;
unsigned int nr_sector;
} __attribute__((preserve_access_index));
struct trace_event_raw_block_rq_completion___x {
dev_t dev;
sector_t sector;
unsigned int nr_sector;
} __attribute__((preserve_access_index));
static __always_inline bool has_block_rq_completion()
{
if (bpf_core_type_exists(struct trace_event_raw_block_rq_completion___x))
return true;
return false;
}
/**
* commit d152c682f03c ("block: add an explicit ->disk backpointer to the
* request_queue") and commit f3fa33acca9f ("block: remove the ->rq_disk
* field in struct request") make some changes to `struct request` and
* `struct request_queue`. Now, to get the `struct gendisk *` field in a CO-RE
* way, we need both `struct request` and `struct request_queue`.
* see:
* https://github.com/torvalds/linux/commit/d152c682f03c
* https://github.com/torvalds/linux/commit/f3fa33acca9f
*/
struct request_queue___x {
struct gendisk *disk;
} __attribute__((preserve_access_index));
struct request___x {
struct request_queue___x *q;
struct gendisk *rq_disk;
} __attribute__((preserve_access_index));
static __always_inline struct gendisk *get_disk(void *request)
{
struct request___x *r = request;
if (bpf_core_field_exists(r->rq_disk))
return BPF_CORE_READ(r, rq_disk);
return BPF_CORE_READ(r, q, disk);
}
/**
* commit 6521f8917082("namei: prepare for idmapped mounts") add `struct
* user_namespace *mnt_userns` as vfs_create() and vfs_unlink() first argument.
* At the same time, struct renamedata {} add `struct user_namespace
* *old_mnt_userns` item. Now, to kprobe vfs_create()/vfs_unlink() in a CO-RE
* way, determine whether there is a `old_mnt_userns` field for `struct
* renamedata` to decide which input parameter of the vfs_create() to use as
* `dentry`.
* see:
* https://github.com/torvalds/linux/commit/6521f8917082
*/
struct renamedata___x {
struct user_namespace *old_mnt_userns;
} __attribute__((preserve_access_index));
static __always_inline bool renamedata_has_old_mnt_userns_field(void)
{
if (bpf_core_field_exists(struct renamedata___x, old_mnt_userns))
return true;
return false;
}
/**
* commit 3544de8ee6e4("mm, tracing: record slab name for kmem_cache_free()")
* replaces `trace_event_raw_kmem_free` with `trace_event_raw_kfree` and adds
* `tracepoint_kmem_cache_free` to enhance the information recorded for
* `kmem_cache_free`.
* see:
* https://github.com/torvalds/linux/commit/3544de8ee6e4
*/
struct trace_event_raw_kmem_free___x {
const void *ptr;
} __attribute__((preserve_access_index));
struct trace_event_raw_kfree___x {
const void *ptr;
} __attribute__((preserve_access_index));
struct trace_event_raw_kmem_cache_free___x {
const void *ptr;
} __attribute__((preserve_access_index));
static __always_inline bool has_kfree()
{
if (bpf_core_type_exists(struct trace_event_raw_kfree___x))
return true;
return false;
}
static __always_inline bool has_kmem_cache_free()
{
if (bpf_core_type_exists(struct trace_event_raw_kmem_cache_free___x))
return true;
return false;
}
#endif /* __CORE_FIXES_BPF_H */

26
src/16-memleak/maps.bpf.h Normal file
View File

@@ -0,0 +1,26 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
// Copyright (c) 2020 Anton Protopopov
#ifndef __MAPS_BPF_H
#define __MAPS_BPF_H
#include <bpf/bpf_helpers.h>
#include <asm-generic/errno.h>
static __always_inline void *
bpf_map_lookup_or_try_init(void *map, const void *key, const void *init)
{
void *val;
long err;
val = bpf_map_lookup_elem(map, key);
if (val)
return val;
err = bpf_map_update_elem(map, key, init, BPF_NOEXIST);
if (err && err != -EEXIST)
return 0;
return bpf_map_lookup_elem(map, key);
}
#endif /* __MAPS_BPF_H */

View File

@@ -337,7 +337,7 @@ int memleak__kfree(void *ctx)
ptr = BPF_CORE_READ(args, ptr);
}
return gen_free_enter((void *)ptr);
return gen_free_enter(ptr);
}
SEC("tracepoint/kmem/kmem_cache_alloc")
@@ -375,7 +375,7 @@ int memleak__kmem_cache_free(void *ctx)
ptr = BPF_CORE_READ(args, ptr);
}
return gen_free_enter((void *)ptr);
return gen_free_enter(ptr);
}
SEC("tracepoint/kmem/mm_page_alloc")

1068
src/16-memleak/memleak.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,104 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __TRACE_HELPERS_H
#define __TRACE_HELPERS_H
#include <stdbool.h>
#define NSEC_PER_SEC 1000000000ULL
struct ksym {
const char *name;
unsigned long addr;
};
struct ksyms;
struct ksyms *ksyms__load(void);
void ksyms__free(struct ksyms *ksyms);
const struct ksym *ksyms__map_addr(const struct ksyms *ksyms,
unsigned long addr);
const struct ksym *ksyms__get_symbol(const struct ksyms *ksyms,
const char *name);
struct sym {
const char *name;
unsigned long start;
unsigned long size;
unsigned long offset;
};
struct syms;
struct syms *syms__load_pid(int tgid);
struct syms *syms__load_file(const char *fname);
void syms__free(struct syms *syms);
const struct sym *syms__map_addr(const struct syms *syms, unsigned long addr);
const struct sym *syms__map_addr_dso(const struct syms *syms, unsigned long addr,
char **dso_name, unsigned long *dso_offset);
struct syms_cache;
struct syms_cache *syms_cache__new(int nr);
struct syms *syms_cache__get_syms(struct syms_cache *syms_cache, int tgid);
void syms_cache__free(struct syms_cache *syms_cache);
struct partition {
char *name;
unsigned int dev;
};
struct partitions;
struct partitions *partitions__load(void);
void partitions__free(struct partitions *partitions);
const struct partition *
partitions__get_by_dev(const struct partitions *partitions, unsigned int dev);
const struct partition *
partitions__get_by_name(const struct partitions *partitions, const char *name);
void print_log2_hist(unsigned int *vals, int vals_size, const char *val_type);
void print_linear_hist(unsigned int *vals, int vals_size, unsigned int base,
unsigned int step, const char *val_type);
unsigned long long get_ktime_ns(void);
bool is_kernel_module(const char *name);
/*
* When attempting to use kprobe/kretprobe, please check out new fentry/fexit
* probes, as they provide better performance and usability. But in some
* situations we have to fallback to kprobe/kretprobe probes. This helper
* is used to detect fentry/fexit support for the specified kernel function.
*
* 1. A gap between kernel versions, kernel BTF is exposed
* starting from 5.4 kernel. but fentry/fexit is actually
* supported starting from 5.5.
* 2. Whether kernel supports module BTF or not
*
* *name* is the name of a kernel function to be attached to, which can be
* from vmlinux or a kernel module.
* *mod* is a hint that indicates the *name* may reside in module BTF,
* if NULL, it means *name* belongs to vmlinux.
*/
bool fentry_can_attach(const char *name, const char *mod);
/*
* The name of a kernel function to be attached to may be changed between
* kernel releases. This helper is used to confirm whether the target kernel
* uses a certain function name before attaching.
*
* It is achieved by scaning
* /sys/kernel/debug/tracing/available_filter_functions
* If this file does not exist, it fallbacks to parse /proc/kallsyms,
* which is slower.
*/
bool kprobe_exists(const char *name);
bool tracepoint_exists(const char *category, const char *event);
bool vmlinux_btf_exists(void);
bool module_btf_exists(const char *mod);
bool probe_tp_btf(const char *name);
bool probe_ringbuf();
#endif /* __TRACE_HELPERS_H */