From b160ff39d2cae1782d6a87705ff83f5bd10d2639 Mon Sep 17 00:00:00 2001 From: yunwei37 <1067852565@qq.com> Date: Sun, 4 Dec 2022 19:26:29 +0800 Subject: [PATCH] add full index --- 0-introduce/{introduce.md => README.md} | 26 +-- 1-helloworld/README.md | 34 +++- 10-hardirqs/README.md | 0 10-hardirqs/hardirq.h | 16 ++ 10-hardirqs/hardirqs.bpf.c | 130 +++++++++++++++ 11-llcstat/{llcstat.md => README.md} | 0 12-bindsnoop/.gitignore | 3 - 12-bindsnoop/README.md | 106 ------------ 12-bindsnoop/bindsnoop.bpf.c | 151 ------------------ 12-bindsnoop/bindsnoop.bpf.h | 31 ---- 12-bindsnoop/bindsnoop.md | 95 ----------- 16-profile/profile.md => 12-profile/README.md | 0 {17-memleak => 16-memleak}/memleak.md | 0 .../biolatency.md | 0 .../biopattern.md | 0 {18-biopattern => 17-biopattern}/biostacks.md | 0 {18-biopattern => 17-biopattern}/bitesize.md | 0 .../syscount.md | 0 {20-lsm-connect => 19-lsm-connect}/.gitignore | 0 {20-lsm-connect => 19-lsm-connect}/README.md | 0 .../lsm-connect.bpf.c | 0 {21-tc => 20-tc}/.gitignore | 0 {21-tc => 20-tc}/README.md | 0 {21-tc => 20-tc}/tc.bpf.c | 0 21-xdp/README.md | 0 9-runqlat/bits.bpf copy.h | 31 ++++ 9-runqlat/maps.bpf copy.h | 26 +++ README.md | 48 +++--- 28 files changed, 275 insertions(+), 422 deletions(-) rename 0-introduce/{introduce.md => README.md} (91%) create mode 100644 10-hardirqs/README.md create mode 100644 10-hardirqs/hardirq.h create mode 100644 10-hardirqs/hardirqs.bpf.c rename 11-llcstat/{llcstat.md => README.md} (100%) delete mode 100644 12-bindsnoop/.gitignore delete mode 100644 12-bindsnoop/README.md delete mode 100644 12-bindsnoop/bindsnoop.bpf.c delete mode 100644 12-bindsnoop/bindsnoop.bpf.h delete mode 100644 12-bindsnoop/bindsnoop.md rename 16-profile/profile.md => 12-profile/README.md (100%) rename {17-memleak => 16-memleak}/memleak.md (100%) rename {18-biopattern => 17-biopattern}/biolatency.md (100%) rename {18-biopattern => 17-biopattern}/biopattern.md (100%) rename {18-biopattern => 17-biopattern}/biostacks.md (100%) rename {18-biopattern => 17-biopattern}/bitesize.md (100%) rename {19-syscount => 18-further-reading}/syscount.md (100%) rename {20-lsm-connect => 19-lsm-connect}/.gitignore (100%) rename {20-lsm-connect => 19-lsm-connect}/README.md (100%) rename {20-lsm-connect => 19-lsm-connect}/lsm-connect.bpf.c (100%) rename {21-tc => 20-tc}/.gitignore (100%) rename {21-tc => 20-tc}/README.md (100%) rename {21-tc => 20-tc}/tc.bpf.c (100%) create mode 100644 21-xdp/README.md create mode 100644 9-runqlat/bits.bpf copy.h create mode 100644 9-runqlat/maps.bpf copy.h diff --git a/0-introduce/introduce.md b/0-introduce/README.md similarity index 91% rename from 0-introduce/introduce.md rename to 0-introduce/README.md index 9dbe637..afcbd64 100644 --- a/0-introduce/introduce.md +++ b/0-introduce/README.md @@ -1,18 +1,20 @@ -# eBPF 入门开发实践指南一:介绍与快速上手 +# eBPF 入门开发实践指南一:介绍 eBPF 的基本概念、常见的开发工具 -- [1. 什么是eBPF](#1-什么是ebpf) - - [1.1. 起源](#11-起源) - - [1.2. 执行逻辑](#12-执行逻辑) - - [1.3. 架构](#13-架构) - - [1.3.1. 寄存器设计](#131-寄存器设计) - - [1.3.2. 指令编码格式](#132-指令编码格式) - - [1.4. 本节参考文章](#14-本节参考文章) -- [2. 如何使用eBPF编程](#2-如何使用ebpf编程) - - [2.1. BCC](#21-bcc) - - [2.2. libbpf-bootstrap](#22-libbpf-bootstrap) - - [2.3 eunomia-bpf](#23-eunomia-bpf) +- [eBPF 入门开发实践指南一:介绍 eBPF 的基本概念、常见的开发工具](#ebpf-入门开发实践指南一介绍-ebpf-的基本概念常见的开发工具) + - [1. 什么是eBPF](#1-什么是ebpf) + - [1.1. 起源](#11-起源) + - [1.2. 执行逻辑](#12-执行逻辑) + - [1.3. 架构](#13-架构) + - [1.3.1. 寄存器设计](#131-寄存器设计) + - [1.3.2. 指令编码格式](#132-指令编码格式) + - [1.4. 本节参考文章](#14-本节参考文章) + - [2. 如何使用eBPF编程](#2-如何使用ebpf编程) + - [2.1. BCC](#21-bcc) + - [2.2. libbpf-bootstrap](#22-libbpf-bootstrap) + - [2.3 eunomia-bpf](#23-eunomia-bpf) + - [参考资料](#参考资料) diff --git a/1-helloworld/README.md b/1-helloworld/README.md index 190b270..72b237f 100644 --- a/1-helloworld/README.md +++ b/1-helloworld/README.md @@ -1,4 +1,34 @@ +# eBPF 入门开发实践指南二:Hello World,基本框架和开发流程 +eBPF (Extended Berkeley Packet Filter) 是 Linux 内核上的一个强大的网络和性能分析工具。它允许开发者在内核运行时动态加载、更新和运行用户定义的代码。 + +本文是 eBPF 入门开发实践指南的第二篇,主要介绍 eBPF 的基本框架和开发流程。 + +## Hello World - minimal eBPF program + +```c +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#define BPF_NO_GLOBAL_DATA +#include +#include +#include + +typedef unsigned int u32; +typedef int pid_t; +const pid_t pid_filter = 0; + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +SEC("tp/syscalls/sys_enter_write") +int handle_tp(void *ctx) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + if (pid_filter && pid != pid_filter) + return 0; + bpf_printk("BPF triggered from PID %d.\n", pid); + return 0; +} +``` `minimal` is just that – a minimal practical BPF application example. It doesn't use or require BPF CO-RE, so should run on quite old kernels. It @@ -23,7 +53,7 @@ $ sudo cat /sys/kernel/debug/tracing/trace_pipe `minimal` is great as a bare-bones experimental playground to quickly try out new ideas or BPF features. -## Compile and Run +## Compile and Run with eunomia-bpf @@ -45,4 +75,4 @@ Run: ```console sudo ecli ./package.json -``` \ No newline at end of file +``` diff --git a/10-hardirqs/README.md b/10-hardirqs/README.md new file mode 100644 index 0000000..e69de29 diff --git a/10-hardirqs/hardirq.h b/10-hardirqs/hardirq.h new file mode 100644 index 0000000..97fec18 --- /dev/null +++ b/10-hardirqs/hardirq.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __HARDIRQS_H +#define __HARDIRQS_H + +#define MAX_SLOTS 20 + +struct irq_key { + char name[32]; +}; + +struct info { + __u64 count; + __u32 slots[MAX_SLOTS]; +}; + +#endif /* __HARDIRQS_H */ diff --git a/10-hardirqs/hardirqs.bpf.c b/10-hardirqs/hardirqs.bpf.c new file mode 100644 index 0000000..14c12d9 --- /dev/null +++ b/10-hardirqs/hardirqs.bpf.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Wenbo Zhang +#include +#include +#include +#include +#include "hardirqs.h" +#include "bits.bpf.h" +#include "maps.bpf.h" + +#define MAX_ENTRIES 256 + +const volatile bool filter_cg = false; +const volatile bool targ_dist = false; +const volatile bool targ_ns = false; +const volatile bool do_count = false; + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); + __type(key, u32); + __type(value, u32); + __uint(max_entries, 1); +} cgroup_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, u64); +} start SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, struct irq_key); + __type(value, struct info); +} infos SEC(".maps"); + +static struct info zero; + +static int handle_entry(int irq, struct irqaction *action) +{ + if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) + return 0; + + if (do_count) { + struct irq_key key = {}; + struct info *info; + + bpf_probe_read_kernel_str(&key.name, sizeof(key.name), BPF_CORE_READ(action, name)); + info = bpf_map_lookup_or_try_init(&infos, &key, &zero); + if (!info) + return 0; + info->count += 1; + return 0; + } else { + u64 ts = bpf_ktime_get_ns(); + u32 key = 0; + + if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) + return 0; + + bpf_map_update_elem(&start, &key, &ts, BPF_ANY); + return 0; + } +} + +static int handle_exit(int irq, struct irqaction *action) +{ + struct irq_key ikey = {}; + struct info *info; + u32 key = 0; + u64 delta; + u64 *tsp; + + if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) + return 0; + + tsp = bpf_map_lookup_elem(&start, &key); + if (!tsp) + return 0; + + delta = bpf_ktime_get_ns() - *tsp; + if (!targ_ns) + delta /= 1000U; + + bpf_probe_read_kernel_str(&ikey.name, sizeof(ikey.name), BPF_CORE_READ(action, name)); + info = bpf_map_lookup_or_try_init(&infos, &ikey, &zero); + if (!info) + return 0; + + if (!targ_dist) { + info->count += delta; + } else { + u64 slot; + + slot = log2(delta); + if (slot >= MAX_SLOTS) + slot = MAX_SLOTS - 1; + info->slots[slot]++; + } + + return 0; +} + +SEC("tp_btf/irq_handler_entry") +int BPF_PROG(irq_handler_entry_btf, int irq, struct irqaction *action) +{ + return handle_entry(irq, action); +} + +SEC("tp_btf/irq_handler_exit") +int BPF_PROG(irq_handler_exit_btf, int irq, struct irqaction *action) +{ + return handle_exit(irq, action); +} + +SEC("raw_tp/irq_handler_entry") +int BPF_PROG(irq_handler_entry, int irq, struct irqaction *action) +{ + return handle_entry(irq, action); +} + +SEC("raw_tp/irq_handler_exit") +int BPF_PROG(irq_handler_exit, int irq, struct irqaction *action) +{ + return handle_exit(irq, action); +} + +char LICENSE[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/11-llcstat/llcstat.md b/11-llcstat/README.md similarity index 100% rename from 11-llcstat/llcstat.md rename to 11-llcstat/README.md diff --git a/12-bindsnoop/.gitignore b/12-bindsnoop/.gitignore deleted file mode 100644 index a1027ce..0000000 --- a/12-bindsnoop/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -.vscode -package.json -ecli diff --git a/12-bindsnoop/README.md b/12-bindsnoop/README.md deleted file mode 100644 index 35f167a..0000000 --- a/12-bindsnoop/README.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -layout: post -title: bindsnoop -date: 2022-10-10 16:18 -category: bpftools -author: yunwei37 -tags: [bpftools, syscall, kprobe, perf-event] -summary: This tool traces the kernel function performing socket binding and print socket options set before the system call. ---- - -## origin - -origin from: - -https://github.com/iovisor/bcc/blob/master/libbpf-tools/bindsnoop.bpf.c - -## Compile and Run - -Compile: - -```shell -docker run -it -v `pwd`/:/src/ yunwei37/ebpm:latest -``` - -Run: - -```shell -sudo ./ecli run examples/bpftools/bindsnoop/package.json -``` - -## details in bcc - -Demonstrations of bindsnoop, the Linux eBPF/bcc version. - -This tool traces the kernel function performing socket binding and -print socket options set before the system call invocation that might -```console - impact bind behavior and bound interface: - SOL_IP IP_FREEBIND F.... - SOL_IP IP_TRANSPARENT .T... - SOL_IP IP_BIND_ADDRESS_NO_PORT ..N.. - SOL_SOCKET SO_REUSEADDR ...R. - SOL_SOCKET SO_REUSEPORT ....r -``` -```console -# ./bindsnoop.py -Tracing binds ... Hit Ctrl-C to end -PID COMM PROT ADDR PORT OPTS IF -3941081 test_bind_op TCP 192.168.1.102 0 F.N.. 0 -3940194 dig TCP :: 62087 ..... 0 -3940219 dig UDP :: 48665 ..... 0 -3940893 Acceptor Thr TCP :: 35343 ...R. 0 -``` -The output shows four bind system calls: -two "test_bind_op" instances, one with IP_FREEBIND and IP_BIND_ADDRESS_NO_PORT -options, dig process called bind for TCP and UDP sockets, -and Acceptor called bind for TCP with SO_REUSEADDR option set. - - -The -t option prints a timestamp column -```console -# ./bindsnoop.py -t -TIME(s) PID COMM PROT ADDR PORT OPTS IF -0.000000 3956801 dig TCP :: 49611 ..... 0 -0.011045 3956822 dig UDP :: 56343 ..... 0 -2.310629 3956498 test_bind_op TCP 192.168.1.102 39609 F...r 0 -``` - -The -U option prints a UID column: -```console -# ./bindsnoop.py -U -Tracing binds ... Hit Ctrl-C to end - UID PID COMM PROT ADDR PORT OPTS IF -127072 3956498 test_bind_op TCP 192.168.1.102 44491 F...r 0 -127072 3960261 Acceptor Thr TCP :: 48869 ...R. 0 - 0 3960729 Acceptor Thr TCP :: 44637 ...R. 0 - 0 3959075 chef-client UDP :: 61722 ..... 0 -``` - -The -u option filtering UID: -```console -# ./bindsnoop.py -Uu 0 -Tracing binds ... Hit Ctrl-C to end - UID PID COMM PROT ADDR PORT OPTS IF - 0 3966330 Acceptor Thr TCP :: 39319 ...R. 0 - 0 3968044 python3.7 TCP ::1 59371 ..... 0 - 0 10224 fetch TCP 0.0.0.0 42091 ...R. 0 -``` - -The --cgroupmap option filters based on a cgroup set. -It is meant to be used with an externally created map. -```console -# ./bindsnoop.py --cgroupmap /sys/fs/bpf/test01 -``` -For more details, see docs/special_filtering.md - - -In order to track heavy bind usage one can use --count option -```console -# ./bindsnoop.py --count -Tracing binds ... Hit Ctrl-C to end -LADDR LPORT BINDS -0.0.0.0 6771 4 -0.0.0.0 4433 4 -127.0.0.1 33665 1 -``` \ No newline at end of file diff --git a/12-bindsnoop/bindsnoop.bpf.c b/12-bindsnoop/bindsnoop.bpf.c deleted file mode 100644 index dc99ba4..0000000 --- a/12-bindsnoop/bindsnoop.bpf.c +++ /dev/null @@ -1,151 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -/* Copyright (c) 2021 Hengqi Chen */ -#include -#include -#include -#include -#include -#include "bindsnoop.bpf.h" - -#define MAX_ENTRIES 10240 -#define MAX_PORTS 1024 - -const volatile bool filter_cg = false; -const volatile pid_t target_pid = 0; -const volatile bool ignore_errors = true; -const volatile bool filter_by_port = false; - -struct { - __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); - __type(key, u32); - __type(value, u32); - __uint(max_entries, 1); -} cgroup_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, MAX_ENTRIES); - __type(key, __u32); - __type(value, struct socket *); -} sockets SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, MAX_PORTS); - __type(key, __u16); - __type(value, __u16); -} ports SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); -} events SEC(".maps"); - -static int probe_entry(struct pt_regs *ctx, struct socket *socket) -{ - __u64 pid_tgid = bpf_get_current_pid_tgid(); - __u32 pid = pid_tgid >> 32; - __u32 tid = (__u32)pid_tgid; - - if (target_pid && target_pid != pid) - return 0; - - bpf_map_update_elem(&sockets, &tid, &socket, BPF_ANY); - return 0; -}; - -static int probe_exit(struct pt_regs *ctx, short ver) -{ - __u64 pid_tgid = bpf_get_current_pid_tgid(); - __u32 pid = pid_tgid >> 32; - __u32 tid = (__u32)pid_tgid; - struct socket **socketp, *socket; - struct inet_sock *inet_sock; - struct sock *sock; - union bind_options opts; - struct bind_event event = {}; - __u16 sport = 0, *port; - int ret; - - socketp = bpf_map_lookup_elem(&sockets, &tid); - if (!socketp) - return 0; - - ret = PT_REGS_RC(ctx); - if (ignore_errors && ret != 0) - goto cleanup; - - socket = *socketp; - sock = BPF_CORE_READ(socket, sk); - inet_sock = (struct inet_sock *)sock; - - sport = bpf_ntohs(BPF_CORE_READ(inet_sock, inet_sport)); - port = bpf_map_lookup_elem(&ports, &sport); - if (filter_by_port && !port) - goto cleanup; - - opts.fields.freebind = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, freebind); - opts.fields.transparent = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, transparent); - opts.fields.bind_address_no_port = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, bind_address_no_port); - opts.fields.reuseaddress = BPF_CORE_READ_BITFIELD_PROBED(sock, __sk_common.skc_reuse); - opts.fields.reuseport = BPF_CORE_READ_BITFIELD_PROBED(sock, __sk_common.skc_reuseport); - event.opts = opts.data; - event.ts_us = bpf_ktime_get_ns() / 1000; - event.pid = pid; - event.port = sport; - event.bound_dev_if = BPF_CORE_READ(sock, __sk_common.skc_bound_dev_if); - event.ret = ret; - event.proto = BPF_CORE_READ_BITFIELD_PROBED(sock, sk_protocol); - bpf_get_current_comm(&event.task, sizeof(event.task)); - if (ver == 4) { - event.ver = ver; - bpf_probe_read_kernel(&event.addr, sizeof(event.addr), &inet_sock->inet_saddr); - } else { /* ver == 6 */ - event.ver = ver; - bpf_probe_read_kernel(&event.addr, sizeof(event.addr), sock->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); - } - bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); - -cleanup: - bpf_map_delete_elem(&sockets, &tid); - return 0; -} - -SEC("kprobe/inet_bind") -int BPF_KPROBE(ipv4_bind_entry, struct socket *socket) -{ - if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) - return 0; - - return probe_entry(ctx, socket); -} - -SEC("kretprobe/inet_bind") -int BPF_KRETPROBE(ipv4_bind_exit) -{ - if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) - return 0; - - return probe_exit(ctx, 4); -} - -SEC("kprobe/inet6_bind") -int BPF_KPROBE(ipv6_bind_entry, struct socket *socket) -{ - if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) - return 0; - - return probe_entry(ctx, socket); -} - -SEC("kretprobe/inet6_bind") -int BPF_KRETPROBE(ipv6_bind_exit) -{ - if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) - return 0; - - return probe_exit(ctx, 6); -} - -char LICENSE[] SEC("license") = "Dual BSD/GPL"; \ No newline at end of file diff --git a/12-bindsnoop/bindsnoop.bpf.h b/12-bindsnoop/bindsnoop.bpf.h deleted file mode 100644 index 9643c86..0000000 --- a/12-bindsnoop/bindsnoop.bpf.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __BINDSNOOP_H -#define __BINDSNOOP_H - -#define TASK_COMM_LEN 16 - -struct bind_event { - unsigned __int128 addr; - unsigned long long ts_us; - unsigned int pid; - unsigned int bound_dev_if; - int ret; - unsigned short port; - unsigned short proto; - unsigned char opts; - unsigned char ver; - char task[TASK_COMM_LEN]; -}; - -union bind_options { - unsigned char data; - struct { - unsigned char freebind : 1; - unsigned char transparent : 1; - unsigned char bind_address_no_port : 1; - unsigned char reuseaddress : 1; - unsigned char reuseport : 1; - } fields; -}; - -#endif /* __BINDSNOOP_H */ diff --git a/12-bindsnoop/bindsnoop.md b/12-bindsnoop/bindsnoop.md deleted file mode 100644 index d98809a..0000000 --- a/12-bindsnoop/bindsnoop.md +++ /dev/null @@ -1,95 +0,0 @@ -## eBPF 入门实践教程:编写 eBPF 程序 Bindsnoopn 监控 socket 端口绑定事件 - -### 背景 - -Bindsnoop 会跟踪操作 socket 端口绑定的内核函数,并且在可能会影响端口绑定的系统调用发生之前,打印 -现有的 socket 选项。 - -### 实现原理 - -Bindsnoop 通过kprobe实现。其主要挂载点为 inet_bind 和 inet6_bind。inet_bind 为处理 IPV4 类型 -socket 端口绑定系统调用的接口,inet6_bind 为处理IPV6类型 socket 端口绑定系统调用的接口。 - -```c -SEC("kprobe/inet_bind") -int BPF_KPROBE(ipv4_bind_entry, struct socket *socket) -{ - if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) - return 0; - - return probe_entry(ctx, socket); -} -SEC("kretprobe/inet_bind") - -int BPF_KRETPROBE(ipv4_bind_exit) -{ - if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) - return 0; - - return probe_exit(ctx, 4); -} - -SEC("kprobe/inet6_bind") -int BPF_KPROBE(ipv6_bind_entry, struct socket *socket) -{ - if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) - return 0; - - return probe_entry(ctx, socket); -} - -SEC("kretprobe/inet6_bind") -int BPF_KRETPROBE(ipv6_bind_exit) -{ - if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) - return 0; - - return probe_exit(ctx, 6); -} -``` - -当系统试图进行socket端口绑定操作时, kprobe挂载的处理函数会被触发。在进入绑定函数时,`probe_entry`会先被 -调用,它会以 tid 为主键将 socket 信息存入 map 中。 - -```c -static int probe_entry(struct pt_regs *ctx, struct socket *socket) -{ - __u64 pid_tgid = bpf_get_current_pid_tgid(); - __u32 pid = pid_tgid >> 32; - __u32 tid = (__u32)pid_tgid; - - if (target_pid && target_pid != pid) - return 0; - - bpf_map_update_elem(&sockets, &tid, &socket, BPF_ANY); - return 0; -}; -``` -在执行完绑定函数后,`probe_exit`函数会被调用。该函数会读取tid对应的socket信息,将其和其他信息一起 -写入 event 结构体并输出到用户态。 - -```c -struct bind_event { - unsigned __int128 addr; - __u64 ts_us; - __u32 pid; - __u32 bound_dev_if; - int ret; - __u16 port; - __u16 proto; - __u8 opts; - __u8 ver; - char task[TASK_COMM_LEN]; -}; -``` - -当用户停止该工具时,其用户态代码会读取存入的数据并按要求打印。 - -### Eunomia中使用方式 - -![result](../imgs/mountsnoop.jpg) -![result](../imgs/bindsnoop-prometheus.png) - -### 总结 - -Bindsnoop 通过 kprobe 挂载点,实现了对 socket 端口的监视,增强了 Eunomia 的应用范围。 \ No newline at end of file diff --git a/16-profile/profile.md b/12-profile/README.md similarity index 100% rename from 16-profile/profile.md rename to 12-profile/README.md diff --git a/17-memleak/memleak.md b/16-memleak/memleak.md similarity index 100% rename from 17-memleak/memleak.md rename to 16-memleak/memleak.md diff --git a/18-biopattern/biolatency.md b/17-biopattern/biolatency.md similarity index 100% rename from 18-biopattern/biolatency.md rename to 17-biopattern/biolatency.md diff --git a/18-biopattern/biopattern.md b/17-biopattern/biopattern.md similarity index 100% rename from 18-biopattern/biopattern.md rename to 17-biopattern/biopattern.md diff --git a/18-biopattern/biostacks.md b/17-biopattern/biostacks.md similarity index 100% rename from 18-biopattern/biostacks.md rename to 17-biopattern/biostacks.md diff --git a/18-biopattern/bitesize.md b/17-biopattern/bitesize.md similarity index 100% rename from 18-biopattern/bitesize.md rename to 17-biopattern/bitesize.md diff --git a/19-syscount/syscount.md b/18-further-reading/syscount.md similarity index 100% rename from 19-syscount/syscount.md rename to 18-further-reading/syscount.md diff --git a/20-lsm-connect/.gitignore b/19-lsm-connect/.gitignore similarity index 100% rename from 20-lsm-connect/.gitignore rename to 19-lsm-connect/.gitignore diff --git a/20-lsm-connect/README.md b/19-lsm-connect/README.md similarity index 100% rename from 20-lsm-connect/README.md rename to 19-lsm-connect/README.md diff --git a/20-lsm-connect/lsm-connect.bpf.c b/19-lsm-connect/lsm-connect.bpf.c similarity index 100% rename from 20-lsm-connect/lsm-connect.bpf.c rename to 19-lsm-connect/lsm-connect.bpf.c diff --git a/21-tc/.gitignore b/20-tc/.gitignore similarity index 100% rename from 21-tc/.gitignore rename to 20-tc/.gitignore diff --git a/21-tc/README.md b/20-tc/README.md similarity index 100% rename from 21-tc/README.md rename to 20-tc/README.md diff --git a/21-tc/tc.bpf.c b/20-tc/tc.bpf.c similarity index 100% rename from 21-tc/tc.bpf.c rename to 20-tc/tc.bpf.c diff --git a/21-xdp/README.md b/21-xdp/README.md new file mode 100644 index 0000000..e69de29 diff --git a/9-runqlat/bits.bpf copy.h b/9-runqlat/bits.bpf copy.h new file mode 100644 index 0000000..a2b7bb9 --- /dev/null +++ b/9-runqlat/bits.bpf copy.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BITS_BPF_H +#define __BITS_BPF_H + +#define READ_ONCE(x) (*(volatile typeof(x) *)&(x)) +#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = val) + +static __always_inline u64 log2(u32 v) +{ + u32 shift, r; + + r = (v > 0xFFFF) << 4; v >>= r; + shift = (v > 0xFF) << 3; v >>= shift; r |= shift; + shift = (v > 0xF) << 2; v >>= shift; r |= shift; + shift = (v > 0x3) << 1; v >>= shift; r |= shift; + r |= (v >> 1); + + return r; +} + +static __always_inline u64 log2l(u64 v) +{ + u32 hi = v >> 32; + + if (hi) + return log2(hi) + 32; + else + return log2(v); +} + +#endif /* __BITS_BPF_H */ diff --git a/9-runqlat/maps.bpf copy.h b/9-runqlat/maps.bpf copy.h new file mode 100644 index 0000000..51d1012 --- /dev/null +++ b/9-runqlat/maps.bpf copy.h @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2020 Anton Protopopov +#ifndef __MAPS_BPF_H +#define __MAPS_BPF_H + +#include +#include + +static __always_inline void * +bpf_map_lookup_or_try_init(void *map, const void *key, const void *init) +{ + void *val; + long err; + + val = bpf_map_lookup_elem(map, key); + if (val) + return val; + + err = bpf_map_update_elem(map, key, init, BPF_NOEXIST); + if (err && err != -EEXIST) + return 0; + + return bpf_map_lookup_elem(map, key); +} + +#endif /* __MAPS_BPF_H */ diff --git a/README.md b/README.md index e019fac..8d9a764 100644 --- a/README.md +++ b/README.md @@ -4,30 +4,34 @@ 在学习 eBPF 的过程中,我们受到了 [tutorial_bcc_python_developer](https://github.com/iovisor/bcc/blob/master/docs/tutorial_bcc_python_developer.md) 的许多启发和帮助,但从 2022 年的角度出发,使用 libbpf 开发 eBPF 的应用是目前相对更好的选择。但目前似乎很少有基于 libbpf 和 BPF CO-RE 出发的、通过案例和工具介绍 eBPF 开发的教程,因此我们发起了这个项目。 +本项目主要基于 [libbpf-boostrap](https://github.com/libbpf/libbpf-bootstrap) 和 [eunomia-bpf](https://github.com/eunomia-bpf/eunomia-bpf) 两个框架完成,并使用 eunomia-bpf 帮助简化一部分 eBPF 用户态代码的编写。 + +教程主要关注于可观察性,并简要介绍了 eBPF 的其他应用。 + ## 目录 -- [lesson 0-introduce](0-introduce/introduce.md) 介绍 eBPF 的基本概念和常见的开发工具 -- [lesson 1-helloworld](1-helloworld/README.md) 演示如何使用 eBPF 开发最简单的「Hello World」程序,介绍 eBPF 的基本框架和开发流程 -- [lesson 2-fentry-unlink](2-fentry-unlink/README.md) 基于 eBPF 的 fentry hook,演示了如何捕获并记录 unlink 系统调用 -- [lesson 3-kprobe-unlink](3-kprobe-unlink/README.md) 基于 eBPF 的 kprobe hook,演示了如何捕获并记录 unlink 系统调用 -- [lesson 4-opensnoop](4-opensnoop/README.md) -- [lesson 5-uprobe-bashreadline](5-uprobe-bashreadline/README.md) -- [lesson 6-sigsnoop](6-sigsnoop/README.md) -- [lesson 7-execsnoop](7-execsnoop/README.md) -- [lesson 8-runqslower](8-runqslower/README.md) -- [lesson 9-runqlat](9-runqlat/README.md) -- [lesson 10-hardirqs](20-hardirqs/README.md) -- [lesson 11-llcstat](21-llcstat/README.md) -- [lesson 12-bindsnoop](12-bindsnoop/README.md) -- [lesson 13-tcpconnlat](13-tcpconnlat/README.md) -- [lesson 14-tcpstates](14-tcpstates/README.md) -- [lesson 15-tcprtt](15-tcprtt/README.md) -- [lesson 16-profile](16-profile/README.md) -- [lesson 17-memleak](17-memleak/README.md) -- [lesson 18-biopattern](18-biopattern/README.md) -- [lesson 19-syscount](19-syscount/README.md) -- [lesson 20-lsm-connect](20-lsm-connect/README.md) -- [lesson 21-tc](21-tc/README.md) +- [lesson 0-introduce](0-introduce/README.md) 介绍 eBPF 的基本概念和常见的开发工具 +- [lesson 1-helloworld](1-helloworld/README.md) 使用 eBPF 开发最简单的「Hello World」程序,介绍 eBPF 的基本框架和开发流程 +- [lesson 2-fentry-unlink](2-fentry-unlink/README.md) 在 eBPF 中使用 fentry 捕获 unlink 系统调用 +- [lesson 3-kprobe-unlink](3-kprobe-unlink/README.md) 在 eBPF 中使用 kprobe 捕获 unlink 系统调用 +- [lesson 4-opensnoop](4-opensnoop/README.md) 捕获进程打开文件的系统调用集合,使用全局变量在 eBPF 中过滤进程 pid +- [lesson 5-uprobe-bashreadline](5-uprobe-bashreadline/README.md) 使用 uprobe 捕获 bash 的 readline 函数调用 +- [lesson 6-sigsnoop](6-sigsnoop/README.md) 捕获进程发送信号的系统调用集合,使用 hash map 保存状态 +- [lesson 7-execsnoop](7-execsnoop/README.md) 捕获进程执行/退出时间,通过 perf event array 向用户态打印输出 +- [lesson 8-runqslower](8-runqslower/README.md) 捕获进程调度事件,使用 ring buffer 向用户态打印输出 +- [lesson 9-runqlat](9-runqlat/README.md) 捕获进程调度延迟,以直方图方式记录 +- [lesson 10-hardirqs](20-hardirqs/README.md) 使用 hardirqs 或 softirqs 捕获中断事件 +- [lesson 11-llcstat](21-llcstat/README.md) 通过 PID 统计 cache references and misses +- [lesson 12-profile](12-profile/README.md) 使用 eBPF 进行性能分析 +- [lesson 13-tcpconnlat](13-tcpconnlat/README.md) 记录 TCP 连接延迟 +- [lesson 14-tcpstates](14-tcpstates/README.md) 记录 TCP 连接状态 +- [lesson 15-tcprtt](15-tcprtt/README.md) 以直方图方式记录 TCP RTT +- [lesson 16-memleak](16-memleak/README.md) 检测内存泄漏 +- [lesson 17-biopattern](17-biopattern/README.md) 捕获磁盘 IO 模式 +- [lesson 18-further-reading](18-further-reading/README.md) 更进一步的相关资料? +- [lesson 19-lsm-connect](19-lsm-connect/README.md) 使用 LSM 进行安全检测防御 +- [lesson 20-tc](20-tc/README.md) 使用 eBPF 进行 tc 流量控制 +- [lesson 21-xdp](20-xdp/README.md) 使用 eBPF 进行 XDP 报文处理 ## 为什么需要基于 libbpf 和 BPF CO-RE 的教程?