From 96d1dca99d16337d5dfdaf8b35b077e3c9dff0b4 Mon Sep 17 00:00:00 2001 From: yunwei37 <1067852565@qq.com> Date: Wed, 31 May 2023 01:38:43 +0800 Subject: [PATCH] add code for biopattern --- src/17-biopattern/.gitignore | 8 + src/17-biopattern/Makefile | 145 +++++++++ src/17-biopattern/biopattern.bpf.c | 57 ++++ src/17-biopattern/biopattern.c | 239 +++++++++++++++ src/17-biopattern/biopattern.h | 14 + src/17-biopattern/core_fixes.bpf.h | 169 +++++++++++ src/17-biopattern/maps.bpf.h | 26 ++ src/17-biopattern/trace_helpers.c | 452 +++++++++++++++++++++++++++++ src/17-biopattern/trace_helpers.h | 104 +++++++ 9 files changed, 1214 insertions(+) create mode 100644 src/17-biopattern/.gitignore create mode 100644 src/17-biopattern/Makefile create mode 100644 src/17-biopattern/biopattern.bpf.c create mode 100644 src/17-biopattern/biopattern.c create mode 100644 src/17-biopattern/biopattern.h create mode 100644 src/17-biopattern/core_fixes.bpf.h create mode 100644 src/17-biopattern/maps.bpf.h create mode 100644 src/17-biopattern/trace_helpers.c create mode 100644 src/17-biopattern/trace_helpers.h diff --git a/src/17-biopattern/.gitignore b/src/17-biopattern/.gitignore new file mode 100644 index 0000000..f79e42b --- /dev/null +++ b/src/17-biopattern/.gitignore @@ -0,0 +1,8 @@ +.vscode +package.json +*.o +*.skel.json +*.skel.yaml +package.yaml +ecli +biopattern diff --git a/src/17-biopattern/Makefile b/src/17-biopattern/Makefile new file mode 100644 index 0000000..9171a00 --- /dev/null +++ b/src/17-biopattern/Makefile @@ -0,0 +1,145 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +OUTPUT := .output +CLANG ?= clang +LIBBPF_SRC := $(abspath ../../libbpf/src) +BPFTOOL_SRC := $(abspath ../../bpftool/src) +LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a) +BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool) +BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool +LIBBLAZESYM_SRC := $(abspath ../../blazesym/) +LIBBLAZESYM_OBJ := $(abspath $(OUTPUT)/libblazesym.a) +LIBBLAZESYM_HEADER := $(abspath $(OUTPUT)/blazesym.h) +ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \ + | sed 's/arm.*/arm/' \ + | sed 's/aarch64/arm64/' \ + | sed 's/ppc64le/powerpc/' \ + | sed 's/mips.*/mips/' \ + | sed 's/riscv64/riscv/' \ + | sed 's/loongarch64/loongarch/') +VMLINUX := ../../vmlinux/$(ARCH)/vmlinux.h +# Use our own libbpf API headers and Linux UAPI headers distributed with +# libbpf to avoid dependency on system-wide headers, which could be missing or +# outdated +INCLUDES := -I$(OUTPUT) -I../../libbpf/include/uapi -I$(dir $(VMLINUX)) +CFLAGS := -g -Wall +ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS) + +APPS = biopattern # minimal minimal_legacy uprobe kprobe fentry usdt sockfilter tc ksyscall + +CARGO ?= $(shell which cargo) +ifeq ($(strip $(CARGO)),) +BZS_APPS := +else +BZS_APPS := # profile +APPS += $(BZS_APPS) +# Required by libblazesym +ALL_LDFLAGS += -lrt -ldl -lpthread -lm +endif + +# Get Clang's default includes on this system. We'll explicitly add these dirs +# to the includes list when compiling with `-target bpf` because otherwise some +# architecture-specific dirs will be "missing" on some architectures/distros - +# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h, +# sys/cdefs.h etc. might be missing. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - &1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') + +ifeq ($(V),1) + Q = + msg = +else + Q = @ + msg = @printf ' %-8s %s%s\n' \ + "$(1)" \ + "$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \ + "$(if $(3), $(3))"; + MAKEFLAGS += --no-print-directory +endif + +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +$(call allow-override,CC,$(CROSS_COMPILE)cc) +$(call allow-override,LD,$(CROSS_COMPILE)ld) + +.PHONY: all +all: $(APPS) + +.PHONY: clean +clean: + $(call msg,CLEAN) + $(Q)rm -rf $(OUTPUT) $(APPS) + +$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $@ + +# Build libbpf +$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf + $(call msg,LIB,$@) + $(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \ + OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \ + INCLUDEDIR= LIBDIR= UAPIDIR= \ + install + +# Build bpftool +$(BPFTOOL): | $(BPFTOOL_OUTPUT) + $(call msg,BPFTOOL,$@) + $(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap + + +$(LIBBLAZESYM_SRC)/target/release/libblazesym.a:: + $(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --features=cheader,dont-generate-test-files --release + +$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT) + $(call msg,LIB, $@) + $(Q)cp $(LIBBLAZESYM_SRC)/target/release/libblazesym.a $@ + +$(LIBBLAZESYM_HEADER): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT) + $(call msg,LIB,$@) + $(Q)cp $(LIBBLAZESYM_SRC)/target/release/blazesym.h $@ + +# Build BPF code +$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL) + $(call msg,BPF,$@) + $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \ + $(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \ + -c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@) + $(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@) + +# Generate BPF skeletons +$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL) + $(call msg,GEN-SKEL,$@) + $(Q)$(BPFTOOL) gen skeleton $< > $@ + +# Build user-space code +$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h + +trace_helpers.o: trace_helpers.c trace_helpers.h + $(call msg,CC,$@) + $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ + +$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT) + $(call msg,CC,$@) + $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ + +$(patsubst %,$(OUTPUT)/%.o,$(BZS_APPS)): $(LIBBLAZESYM_HEADER) + +$(BZS_APPS): $(LIBBLAZESYM_OBJ) + +# Build application binary +$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) trace_helpers.o | $(OUTPUT) + $(call msg,BINARY,$@) + $(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@ + +# delete failed targets +.DELETE_ON_ERROR: + +# keep intermediate (.skel.h, .bpf.o, etc) targets +.SECONDARY: diff --git a/src/17-biopattern/biopattern.bpf.c b/src/17-biopattern/biopattern.bpf.c new file mode 100644 index 0000000..c7d306e --- /dev/null +++ b/src/17-biopattern/biopattern.bpf.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Wenbo Zhang +#include +#include +#include +#include "biopattern.h" +#include "maps.bpf.h" +#include "core_fixes.bpf.h" + +const volatile bool filter_dev = false; +const volatile __u32 targ_dev = 0; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 64); + __type(key, u32); + __type(value, struct counter); +} counters SEC(".maps"); + +SEC("tracepoint/block/block_rq_complete") +int handle__block_rq_complete(void *args) +{ + struct counter *counterp, zero = {}; + sector_t sector; + u32 nr_sector; + u32 dev; + + if (has_block_rq_completion()) { + struct trace_event_raw_block_rq_completion___x *ctx = args; + sector = BPF_CORE_READ(ctx, sector); + nr_sector = BPF_CORE_READ(ctx, nr_sector); + dev = BPF_CORE_READ(ctx, dev); + } else { + struct trace_event_raw_block_rq_complete___x *ctx = args; + sector = BPF_CORE_READ(ctx, sector); + nr_sector = BPF_CORE_READ(ctx, nr_sector); + dev = BPF_CORE_READ(ctx, dev); + } + + if (filter_dev && targ_dev != dev) + return 0; + + counterp = bpf_map_lookup_or_try_init(&counters, &dev, &zero); + if (!counterp) + return 0; + if (counterp->last_sector) { + if (counterp->last_sector == sector) + __sync_fetch_and_add(&counterp->sequential, 1); + else + __sync_fetch_and_add(&counterp->random, 1); + __sync_fetch_and_add(&counterp->bytes, nr_sector * 512); + } + counterp->last_sector = sector + nr_sector; + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/src/17-biopattern/biopattern.c b/src/17-biopattern/biopattern.c new file mode 100644 index 0000000..d9e9abf --- /dev/null +++ b/src/17-biopattern/biopattern.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2020 Wenbo Zhang +// +// Based on biopattern(8) from BPF-Perf-Tools-Book by Brendan Gregg. +// 17-Jun-2020 Wenbo Zhang Created this. +#include +#include +#include +#include +#include +#include +#include +#include "biopattern.h" +#include "biopattern.skel.h" +#include "trace_helpers.h" + +static struct env { + char *disk; + time_t interval; + bool timestamp; + bool verbose; + int times; +} env = { + .interval = 99999999, + .times = 99999999, +}; + +static volatile bool exiting; + +const char *argp_program_version = "biopattern 0.1"; +const char *argp_program_bug_address = + "https://github.com/iovisor/bcc/tree/master/libbpf-tools"; +const char argp_program_doc[] = +"Show block device I/O pattern.\n" +"\n" +"USAGE: biopattern [--help] [-T] [-d DISK] [interval] [count]\n" +"\n" +"EXAMPLES:\n" +" biopattern # show block I/O pattern\n" +" biopattern 1 10 # print 1 second summaries, 10 times\n" +" biopattern -T 1 # 1s summaries with timestamps\n" +" biopattern -d sdc # trace sdc only\n"; + +static const struct argp_option opts[] = { + { "timestamp", 'T', NULL, 0, "Include timestamp on output" }, + { "disk", 'd', "DISK", 0, "Trace this disk only" }, + { "verbose", 'v', NULL, 0, "Verbose debug output" }, + { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + static int pos_args; + + switch (key) { + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + case 'v': + env.verbose = true; + break; + case 'd': + env.disk = arg; + if (strlen(arg) + 1 > DISK_NAME_LEN) { + fprintf(stderr, "invaild disk name: too long\n"); + argp_usage(state); + } + break; + case 'T': + env.timestamp = true; + break; + case ARGP_KEY_ARG: + errno = 0; + if (pos_args == 0) { + env.interval = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid internal\n"); + argp_usage(state); + } + } else if (pos_args == 1) { + env.times = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid times\n"); + argp_usage(state); + } + } else { + fprintf(stderr, + "unrecognized positional argument: %s\n", arg); + argp_usage(state); + } + pos_args++; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !env.verbose) + return 0; + return vfprintf(stderr, format, args); +} + +static void sig_handler(int sig) +{ + exiting = true; +} + +static int print_map(struct bpf_map *counters, struct partitions *partitions) +{ + __u32 total, lookup_key = -1, next_key; + int err, fd = bpf_map__fd(counters); + const struct partition *partition; + struct counter counter; + struct tm *tm; + char ts[32]; + time_t t; + + while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { + err = bpf_map_lookup_elem(fd, &next_key, &counter); + if (err < 0) { + fprintf(stderr, "failed to lookup counters: %d\n", err); + return -1; + } + lookup_key = next_key; + total = counter.sequential + counter.random; + if (!total) + continue; + if (env.timestamp) { + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%H:%M:%S", tm); + printf("%-9s ", ts); + } + partition = partitions__get_by_dev(partitions, next_key); + printf("%-7s %5ld %5ld %8d %10lld\n", + partition ? partition->name : "Unknown", + counter.random * 100L / total, + counter.sequential * 100L / total, total, + counter.bytes / 1024); + } + + lookup_key = -1; + while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) { + err = bpf_map_delete_elem(fd, &next_key); + if (err < 0) { + fprintf(stderr, "failed to cleanup counters: %d\n", err); + return -1; + } + lookup_key = next_key; + } + + return 0; +} + +int main(int argc, char **argv) +{ + LIBBPF_OPTS(bpf_object_open_opts, open_opts); + struct partitions *partitions = NULL; + const struct partition *partition; + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + struct biopattern_bpf *obj; + int err; + + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + libbpf_set_print(libbpf_print_fn); + + obj = biopattern_bpf__open_opts(&open_opts); + if (!obj) { + fprintf(stderr, "failed to open BPF object\n"); + return 1; + } + + partitions = partitions__load(); + if (!partitions) { + fprintf(stderr, "failed to load partitions info\n"); + goto cleanup; + } + + /* initialize global data (filtering options) */ + if (env.disk) { + partition = partitions__get_by_name(partitions, env.disk); + if (!partition) { + fprintf(stderr, "invaild partition name: not exist\n"); + goto cleanup; + } + obj->rodata->filter_dev = true; + obj->rodata->targ_dev = partition->dev; + } + + err = biopattern_bpf__load(obj); + if (err) { + fprintf(stderr, "failed to load BPF object: %d\n", err); + goto cleanup; + } + + err = biopattern_bpf__attach(obj); + if (err) { + fprintf(stderr, "failed to attach BPF programs\n"); + goto cleanup; + } + + signal(SIGINT, sig_handler); + + printf("Tracing block device I/O requested seeks... Hit Ctrl-C to " + "end.\n"); + if (env.timestamp) + printf("%-9s ", "TIME"); + printf("%-7s %5s %5s %8s %10s\n", "DISK", "%RND", "%SEQ", + "COUNT", "KBYTES"); + + /* main: poll */ + while (1) { + sleep(env.interval); + + err = print_map(obj->maps.counters, partitions); + if (err) + break; + + if (exiting || --env.times == 0) + break; + } + +cleanup: + biopattern_bpf__destroy(obj); + partitions__free(partitions); + + return err != 0; +} diff --git a/src/17-biopattern/biopattern.h b/src/17-biopattern/biopattern.h new file mode 100644 index 0000000..18860a5 --- /dev/null +++ b/src/17-biopattern/biopattern.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#ifndef __BIOPATTERN_H +#define __BIOPATTERN_H + +#define DISK_NAME_LEN 32 + +struct counter { + __u64 last_sector; + __u64 bytes; + __u32 sequential; + __u32 random; +}; + +#endif /* __BIOPATTERN_H */ diff --git a/src/17-biopattern/core_fixes.bpf.h b/src/17-biopattern/core_fixes.bpf.h new file mode 100644 index 0000000..552c9fa --- /dev/null +++ b/src/17-biopattern/core_fixes.bpf.h @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2021 Hengqi Chen */ + +#ifndef __CORE_FIXES_BPF_H +#define __CORE_FIXES_BPF_H + +#include +#include + +/** + * commit 2f064a59a1 ("sched: Change task_struct::state") changes + * the name of task_struct::state to task_struct::__state + * see: + * https://github.com/torvalds/linux/commit/2f064a59a1 + */ +struct task_struct___o { + volatile long int state; +} __attribute__((preserve_access_index)); + +struct task_struct___x { + unsigned int __state; +} __attribute__((preserve_access_index)); + +static __always_inline __s64 get_task_state(void *task) +{ + struct task_struct___x *t = task; + + if (bpf_core_field_exists(t->__state)) + return BPF_CORE_READ(t, __state); + return BPF_CORE_READ((struct task_struct___o *)task, state); +} + +/** + * commit 309dca309fc3 ("block: store a block_device pointer in struct bio") + * adds a new member bi_bdev which is a pointer to struct block_device + * see: + * https://github.com/torvalds/linux/commit/309dca309fc3 + */ +struct bio___o { + struct gendisk *bi_disk; +} __attribute__((preserve_access_index)); + +struct bio___x { + struct block_device *bi_bdev; +} __attribute__((preserve_access_index)); + +static __always_inline struct gendisk *get_gendisk(void *bio) +{ + struct bio___x *b = bio; + + if (bpf_core_field_exists(b->bi_bdev)) + return BPF_CORE_READ(b, bi_bdev, bd_disk); + return BPF_CORE_READ((struct bio___o *)bio, bi_disk); +} + +/** + * commit d5869fdc189f ("block: introduce block_rq_error tracepoint") + * adds a new tracepoint block_rq_error and it shares the same arguments + * with tracepoint block_rq_complete. As a result, the kernel BTF now has + * a `struct trace_event_raw_block_rq_completion` instead of + * `struct trace_event_raw_block_rq_complete`. + * see: + * https://github.com/torvalds/linux/commit/d5869fdc189f + */ +struct trace_event_raw_block_rq_complete___x { + dev_t dev; + sector_t sector; + unsigned int nr_sector; +} __attribute__((preserve_access_index)); + +struct trace_event_raw_block_rq_completion___x { + dev_t dev; + sector_t sector; + unsigned int nr_sector; +} __attribute__((preserve_access_index)); + +static __always_inline bool has_block_rq_completion() +{ + if (bpf_core_type_exists(struct trace_event_raw_block_rq_completion___x)) + return true; + return false; +} + +/** + * commit d152c682f03c ("block: add an explicit ->disk backpointer to the + * request_queue") and commit f3fa33acca9f ("block: remove the ->rq_disk + * field in struct request") make some changes to `struct request` and + * `struct request_queue`. Now, to get the `struct gendisk *` field in a CO-RE + * way, we need both `struct request` and `struct request_queue`. + * see: + * https://github.com/torvalds/linux/commit/d152c682f03c + * https://github.com/torvalds/linux/commit/f3fa33acca9f + */ +struct request_queue___x { + struct gendisk *disk; +} __attribute__((preserve_access_index)); + +struct request___x { + struct request_queue___x *q; + struct gendisk *rq_disk; +} __attribute__((preserve_access_index)); + +static __always_inline struct gendisk *get_disk(void *request) +{ + struct request___x *r = request; + + if (bpf_core_field_exists(r->rq_disk)) + return BPF_CORE_READ(r, rq_disk); + return BPF_CORE_READ(r, q, disk); +} + +/** + * commit 6521f8917082("namei: prepare for idmapped mounts") add `struct + * user_namespace *mnt_userns` as vfs_create() and vfs_unlink() first argument. + * At the same time, struct renamedata {} add `struct user_namespace + * *old_mnt_userns` item. Now, to kprobe vfs_create()/vfs_unlink() in a CO-RE + * way, determine whether there is a `old_mnt_userns` field for `struct + * renamedata` to decide which input parameter of the vfs_create() to use as + * `dentry`. + * see: + * https://github.com/torvalds/linux/commit/6521f8917082 + */ +struct renamedata___x { + struct user_namespace *old_mnt_userns; +} __attribute__((preserve_access_index)); + +static __always_inline bool renamedata_has_old_mnt_userns_field(void) +{ + if (bpf_core_field_exists(struct renamedata___x, old_mnt_userns)) + return true; + return false; +} + +/** + * commit 3544de8ee6e4("mm, tracing: record slab name for kmem_cache_free()") + * replaces `trace_event_raw_kmem_free` with `trace_event_raw_kfree` and adds + * `tracepoint_kmem_cache_free` to enhance the information recorded for + * `kmem_cache_free`. + * see: + * https://github.com/torvalds/linux/commit/3544de8ee6e4 + */ + +struct trace_event_raw_kmem_free___x { + const void *ptr; +} __attribute__((preserve_access_index)); + +struct trace_event_raw_kfree___x { + const void *ptr; +} __attribute__((preserve_access_index)); + +struct trace_event_raw_kmem_cache_free___x { + const void *ptr; +} __attribute__((preserve_access_index)); + +static __always_inline bool has_kfree() +{ + if (bpf_core_type_exists(struct trace_event_raw_kfree___x)) + return true; + return false; +} + +static __always_inline bool has_kmem_cache_free() +{ + if (bpf_core_type_exists(struct trace_event_raw_kmem_cache_free___x)) + return true; + return false; +} + +#endif /* __CORE_FIXES_BPF_H */ diff --git a/src/17-biopattern/maps.bpf.h b/src/17-biopattern/maps.bpf.h new file mode 100644 index 0000000..51d1012 --- /dev/null +++ b/src/17-biopattern/maps.bpf.h @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2020 Anton Protopopov +#ifndef __MAPS_BPF_H +#define __MAPS_BPF_H + +#include +#include + +static __always_inline void * +bpf_map_lookup_or_try_init(void *map, const void *key, const void *init) +{ + void *val; + long err; + + val = bpf_map_lookup_elem(map, key); + if (val) + return val; + + err = bpf_map_update_elem(map, key, init, BPF_NOEXIST); + if (err && err != -EEXIST) + return 0; + + return bpf_map_lookup_elem(map, key); +} + +#endif /* __MAPS_BPF_H */ diff --git a/src/17-biopattern/trace_helpers.c b/src/17-biopattern/trace_helpers.c new file mode 100644 index 0000000..e873d35 --- /dev/null +++ b/src/17-biopattern/trace_helpers.c @@ -0,0 +1,452 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +// Copyright (c) 2020 Wenbo Zhang +// +// Based on ksyms improvements from Andrii Nakryiko, add more helpers. +// 28-Feb-2020 Wenbo Zhang Created this. +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "trace_helpers.h" + +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +#define DISK_NAME_LEN 32 + +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) + +#define MKDEV(ma, mi) (((ma) << MINORBITS) | (mi)) + +struct ksyms { + struct ksym *syms; + int syms_sz; + int syms_cap; + char *strs; + int strs_sz; + int strs_cap; +}; + +struct partitions { + struct partition *items; + int sz; +}; + +static int partitions__add_partition(struct partitions *partitions, + const char *name, unsigned int dev) +{ + struct partition *partition; + void *tmp; + + tmp = realloc(partitions->items, (partitions->sz + 1) * + sizeof(*partitions->items)); + if (!tmp) + return -1; + partitions->items = tmp; + partition = &partitions->items[partitions->sz]; + partition->name = strdup(name); + partition->dev = dev; + partitions->sz++; + + return 0; +} + +struct partitions *partitions__load(void) +{ + char part_name[DISK_NAME_LEN]; + unsigned int devmaj, devmin; + unsigned long long nop; + struct partitions *partitions; + char buf[64]; + FILE *f; + + f = fopen("/proc/partitions", "r"); + if (!f) + return NULL; + + partitions = calloc(1, sizeof(*partitions)); + if (!partitions) + goto err_out; + + while (fgets(buf, sizeof(buf), f) != NULL) { + /* skip heading */ + if (buf[0] != ' ' || buf[0] == '\n') + continue; + if (sscanf(buf, "%u %u %llu %s", &devmaj, &devmin, &nop, + part_name) != 4) + goto err_out; + if (partitions__add_partition(partitions, part_name, + MKDEV(devmaj, devmin))) + goto err_out; + } + + fclose(f); + return partitions; + +err_out: + partitions__free(partitions); + fclose(f); + return NULL; +} + +void partitions__free(struct partitions *partitions) +{ + int i; + + if (!partitions) + return; + + for (i = 0; i < partitions->sz; i++) + free(partitions->items[i].name); + free(partitions->items); + free(partitions); +} + +const struct partition * +partitions__get_by_dev(const struct partitions *partitions, unsigned int dev) +{ + int i; + + for (i = 0; i < partitions->sz; i++) { + if (partitions->items[i].dev == dev) + return &partitions->items[i]; + } + + return NULL; +} + +const struct partition * +partitions__get_by_name(const struct partitions *partitions, const char *name) +{ + int i; + + for (i = 0; i < partitions->sz; i++) { + if (strcmp(partitions->items[i].name, name) == 0) + return &partitions->items[i]; + } + + return NULL; +} + +static void print_stars(unsigned int val, unsigned int val_max, int width) +{ + int num_stars, num_spaces, i; + bool need_plus; + + num_stars = min(val, val_max) * width / val_max; + num_spaces = width - num_stars; + need_plus = val > val_max; + + for (i = 0; i < num_stars; i++) + printf("*"); + for (i = 0; i < num_spaces; i++) + printf(" "); + if (need_plus) + printf("+"); +} + +void print_log2_hist(unsigned int *vals, int vals_size, const char *val_type) +{ + int stars_max = 40, idx_max = -1; + unsigned int val, val_max = 0; + unsigned long long low, high; + int stars, width, i; + + for (i = 0; i < vals_size; i++) { + val = vals[i]; + if (val > 0) + idx_max = i; + if (val > val_max) + val_max = val; + } + + if (idx_max < 0) + return; + + printf("%*s%-*s : count distribution\n", idx_max <= 32 ? 5 : 15, "", + idx_max <= 32 ? 19 : 29, val_type); + + if (idx_max <= 32) + stars = stars_max; + else + stars = stars_max / 2; + + for (i = 0; i <= idx_max; i++) { + low = (1ULL << (i + 1)) >> 1; + high = (1ULL << (i + 1)) - 1; + if (low == high) + low -= 1; + val = vals[i]; + width = idx_max <= 32 ? 10 : 20; + printf("%*lld -> %-*lld : %-8d |", width, low, width, high, val); + print_stars(val, val_max, stars); + printf("|\n"); + } +} + +void print_linear_hist(unsigned int *vals, int vals_size, unsigned int base, + unsigned int step, const char *val_type) +{ + int i, stars_max = 40, idx_min = -1, idx_max = -1; + unsigned int val, val_max = 0; + + for (i = 0; i < vals_size; i++) { + val = vals[i]; + if (val > 0) { + idx_max = i; + if (idx_min < 0) + idx_min = i; + } + if (val > val_max) + val_max = val; + } + + if (idx_max < 0) + return; + + printf(" %-13s : count distribution\n", val_type); + for (i = idx_min; i <= idx_max; i++) { + val = vals[i]; + if (!val) + continue; + printf(" %-10d : %-8d |", base + i * step, val); + print_stars(val, val_max, stars_max); + printf("|\n"); + } +} + +unsigned long long get_ktime_ns(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; +} + +bool is_kernel_module(const char *name) +{ + bool found = false; + char buf[64]; + FILE *f; + + f = fopen("/proc/modules", "r"); + if (!f) + return false; + + while (fgets(buf, sizeof(buf), f) != NULL) { + if (sscanf(buf, "%s %*s\n", buf) != 1) + break; + if (!strcmp(buf, name)) { + found = true; + break; + } + } + + fclose(f); + return found; +} + +static bool fentry_try_attach(int id) +{ + int prog_fd, attach_fd; + char error[4096]; + struct bpf_insn insns[] = { + { .code = BPF_ALU64 | BPF_MOV | BPF_K, .dst_reg = BPF_REG_0, .imm = 0 }, + { .code = BPF_JMP | BPF_EXIT }, + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = BPF_TRACE_FENTRY, + .attach_btf_id = id, + .log_buf = error, + .log_size = sizeof(error), + ); + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACING, "test", "GPL", insns, + sizeof(insns) / sizeof(struct bpf_insn), &opts); + if (prog_fd < 0) + return false; + + attach_fd = bpf_raw_tracepoint_open(NULL, prog_fd); + if (attach_fd >= 0) + close(attach_fd); + + close(prog_fd); + return attach_fd >= 0; +} + +bool fentry_can_attach(const char *name, const char *mod) +{ + struct btf *btf, *vmlinux_btf, *module_btf = NULL; + int err, id; + + vmlinux_btf = btf__load_vmlinux_btf(); + err = libbpf_get_error(vmlinux_btf); + if (err) + return false; + + btf = vmlinux_btf; + + if (mod) { + module_btf = btf__load_module_btf(mod, vmlinux_btf); + err = libbpf_get_error(module_btf); + if (!err) + btf = module_btf; + } + + id = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); + + btf__free(module_btf); + btf__free(vmlinux_btf); + return id > 0 && fentry_try_attach(id); +} + +bool kprobe_exists(const char *name) +{ + char addr_range[256]; + char sym_name[256]; + FILE *f; + int ret; + + f = fopen("/sys/kernel/debug/kprobes/blacklist", "r"); + if (!f) + goto avail_filter; + + while (true) { + ret = fscanf(f, "%s %s%*[^\n]\n", addr_range, sym_name); + if (ret == EOF && feof(f)) + break; + if (ret != 2) { + fprintf(stderr, "failed to read symbol from kprobe blacklist\n"); + break; + } + if (!strcmp(name, sym_name)) { + fclose(f); + return false; + } + } + fclose(f); + +avail_filter: + f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); + if (!f) + goto slow_path; + + while (true) { + ret = fscanf(f, "%s%*[^\n]\n", sym_name); + if (ret == EOF && feof(f)) + break; + if (ret != 1) { + fprintf(stderr, "failed to read symbol from available_filter_functions\n"); + break; + } + if (!strcmp(name, sym_name)) { + fclose(f); + return true; + } + } + + fclose(f); + return false; + +slow_path: + f = fopen("/proc/kallsyms", "r"); + if (!f) + return false; + + while (true) { + ret = fscanf(f, "%*x %*c %s%*[^\n]\n", sym_name); + if (ret == EOF && feof(f)) + break; + if (ret != 1) { + fprintf(stderr, "failed to read symbol from kallsyms\n"); + break; + } + if (!strcmp(name, sym_name)) { + fclose(f); + return true; + } + } + + fclose(f); + return false; +} + +bool tracepoint_exists(const char *category, const char *event) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "/sys/kernel/debug/tracing/events/%s/%s/format", category, event); + if (!access(path, F_OK)) + return true; + return false; +} + +bool vmlinux_btf_exists(void) +{ + struct btf *btf; + int err; + + btf = btf__load_vmlinux_btf(); + err = libbpf_get_error(btf); + if (err) + return false; + + btf__free(btf); + return true; +} + +bool module_btf_exists(const char *mod) +{ + char sysfs_mod[80]; + + if (mod) { + snprintf(sysfs_mod, sizeof(sysfs_mod), "/sys/kernel/btf/%s", mod); + if (!access(sysfs_mod, R_OK)) + return true; + } + return false; +} + +bool probe_tp_btf(const char *name) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_TRACE_RAW_TP); + struct bpf_insn insns[] = { + { .code = BPF_ALU64 | BPF_MOV | BPF_K, .dst_reg = BPF_REG_0, .imm = 0 }, + { .code = BPF_JMP | BPF_EXIT }, + }; + int fd, insn_cnt = sizeof(insns) / sizeof(struct bpf_insn); + + opts.attach_btf_id = libbpf_find_vmlinux_btf_id(name, BPF_TRACE_RAW_TP); + fd = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL", insns, insn_cnt, &opts); + if (fd >= 0) + close(fd); + return fd >= 0; +} + +bool probe_ringbuf() +{ + int map_fd; + + map_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, getpagesize(), NULL); + if (map_fd < 0) + return false; + + close(map_fd); + return true; +} diff --git a/src/17-biopattern/trace_helpers.h b/src/17-biopattern/trace_helpers.h new file mode 100644 index 0000000..171bc4e --- /dev/null +++ b/src/17-biopattern/trace_helpers.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __TRACE_HELPERS_H +#define __TRACE_HELPERS_H + +#include + +#define NSEC_PER_SEC 1000000000ULL + +struct ksym { + const char *name; + unsigned long addr; +}; + +struct ksyms; + +struct ksyms *ksyms__load(void); +void ksyms__free(struct ksyms *ksyms); +const struct ksym *ksyms__map_addr(const struct ksyms *ksyms, + unsigned long addr); +const struct ksym *ksyms__get_symbol(const struct ksyms *ksyms, + const char *name); + +struct sym { + const char *name; + unsigned long start; + unsigned long size; + unsigned long offset; +}; + +struct syms; + +struct syms *syms__load_pid(int tgid); +struct syms *syms__load_file(const char *fname); +void syms__free(struct syms *syms); +const struct sym *syms__map_addr(const struct syms *syms, unsigned long addr); +const struct sym *syms__map_addr_dso(const struct syms *syms, unsigned long addr, + char **dso_name, unsigned long *dso_offset); + +struct syms_cache; + +struct syms_cache *syms_cache__new(int nr); +struct syms *syms_cache__get_syms(struct syms_cache *syms_cache, int tgid); +void syms_cache__free(struct syms_cache *syms_cache); + +struct partition { + char *name; + unsigned int dev; +}; + +struct partitions; + +struct partitions *partitions__load(void); +void partitions__free(struct partitions *partitions); +const struct partition * +partitions__get_by_dev(const struct partitions *partitions, unsigned int dev); +const struct partition * +partitions__get_by_name(const struct partitions *partitions, const char *name); + +void print_log2_hist(unsigned int *vals, int vals_size, const char *val_type); +void print_linear_hist(unsigned int *vals, int vals_size, unsigned int base, + unsigned int step, const char *val_type); + +unsigned long long get_ktime_ns(void); + +bool is_kernel_module(const char *name); + +/* + * When attempting to use kprobe/kretprobe, please check out new fentry/fexit + * probes, as they provide better performance and usability. But in some + * situations we have to fallback to kprobe/kretprobe probes. This helper + * is used to detect fentry/fexit support for the specified kernel function. + * + * 1. A gap between kernel versions, kernel BTF is exposed + * starting from 5.4 kernel. but fentry/fexit is actually + * supported starting from 5.5. + * 2. Whether kernel supports module BTF or not + * + * *name* is the name of a kernel function to be attached to, which can be + * from vmlinux or a kernel module. + * *mod* is a hint that indicates the *name* may reside in module BTF, + * if NULL, it means *name* belongs to vmlinux. + */ +bool fentry_can_attach(const char *name, const char *mod); + +/* + * The name of a kernel function to be attached to may be changed between + * kernel releases. This helper is used to confirm whether the target kernel + * uses a certain function name before attaching. + * + * It is achieved by scaning + * /sys/kernel/debug/tracing/available_filter_functions + * If this file does not exist, it fallbacks to parse /proc/kallsyms, + * which is slower. + */ +bool kprobe_exists(const char *name); +bool tracepoint_exists(const char *category, const char *event); + +bool vmlinux_btf_exists(void); +bool module_btf_exists(const char *mod); + +bool probe_tp_btf(const char *name); +bool probe_ringbuf(); + +#endif /* __TRACE_HELPERS_H */