mirror of
https://github.com/eunomia-bpf/bpf-developer-tutorial.git
synced 2026-05-06 05:11:40 +08:00
add full index
This commit is contained in:
@@ -1,18 +1,20 @@
|
|||||||
# eBPF 入门开发实践指南一:介绍与快速上手
|
# eBPF 入门开发实践指南一:介绍 eBPF 的基本概念、常见的开发工具
|
||||||
|
|
||||||
<!-- TOC -->
|
<!-- TOC -->
|
||||||
|
|
||||||
- [1. 什么是eBPF](#1-什么是ebpf)
|
- [eBPF 入门开发实践指南一:介绍 eBPF 的基本概念、常见的开发工具](#ebpf-入门开发实践指南一介绍-ebpf-的基本概念常见的开发工具)
|
||||||
- [1.1. 起源](#11-起源)
|
- [1. 什么是eBPF](#1-什么是ebpf)
|
||||||
- [1.2. 执行逻辑](#12-执行逻辑)
|
- [1.1. 起源](#11-起源)
|
||||||
- [1.3. 架构](#13-架构)
|
- [1.2. 执行逻辑](#12-执行逻辑)
|
||||||
- [1.3.1. 寄存器设计](#131-寄存器设计)
|
- [1.3. 架构](#13-架构)
|
||||||
- [1.3.2. 指令编码格式](#132-指令编码格式)
|
- [1.3.1. 寄存器设计](#131-寄存器设计)
|
||||||
- [1.4. 本节参考文章](#14-本节参考文章)
|
- [1.3.2. 指令编码格式](#132-指令编码格式)
|
||||||
- [2. 如何使用eBPF编程](#2-如何使用ebpf编程)
|
- [1.4. 本节参考文章](#14-本节参考文章)
|
||||||
- [2.1. BCC](#21-bcc)
|
- [2. 如何使用eBPF编程](#2-如何使用ebpf编程)
|
||||||
- [2.2. libbpf-bootstrap](#22-libbpf-bootstrap)
|
- [2.1. BCC](#21-bcc)
|
||||||
- [2.3 eunomia-bpf](#23-eunomia-bpf)
|
- [2.2. libbpf-bootstrap](#22-libbpf-bootstrap)
|
||||||
|
- [2.3 eunomia-bpf](#23-eunomia-bpf)
|
||||||
|
- [参考资料](#参考资料)
|
||||||
|
|
||||||
<!-- /TOC -->
|
<!-- /TOC -->
|
||||||
|
|
||||||
@@ -1,4 +1,34 @@
|
|||||||
|
# eBPF 入门开发实践指南二:Hello World,基本框架和开发流程
|
||||||
|
|
||||||
|
eBPF (Extended Berkeley Packet Filter) 是 Linux 内核上的一个强大的网络和性能分析工具。它允许开发者在内核运行时动态加载、更新和运行用户定义的代码。
|
||||||
|
|
||||||
|
本文是 eBPF 入门开发实践指南的第二篇,主要介绍 eBPF 的基本框架和开发流程。
|
||||||
|
|
||||||
|
## Hello World - minimal eBPF program
|
||||||
|
|
||||||
|
```c
|
||||||
|
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
|
||||||
|
#define BPF_NO_GLOBAL_DATA
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
#include <bpf/bpf_helpers.h>
|
||||||
|
#include <bpf/bpf_tracing.h>
|
||||||
|
|
||||||
|
typedef unsigned int u32;
|
||||||
|
typedef int pid_t;
|
||||||
|
const pid_t pid_filter = 0;
|
||||||
|
|
||||||
|
char LICENSE[] SEC("license") = "Dual BSD/GPL";
|
||||||
|
|
||||||
|
SEC("tp/syscalls/sys_enter_write")
|
||||||
|
int handle_tp(void *ctx)
|
||||||
|
{
|
||||||
|
pid_t pid = bpf_get_current_pid_tgid() >> 32;
|
||||||
|
if (pid_filter && pid != pid_filter)
|
||||||
|
return 0;
|
||||||
|
bpf_printk("BPF triggered from PID %d.\n", pid);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
`minimal` is just that – a minimal practical BPF application example. It
|
`minimal` is just that – a minimal practical BPF application example. It
|
||||||
doesn't use or require BPF CO-RE, so should run on quite old kernels. It
|
doesn't use or require BPF CO-RE, so should run on quite old kernels. It
|
||||||
@@ -23,7 +53,7 @@ $ sudo cat /sys/kernel/debug/tracing/trace_pipe
|
|||||||
`minimal` is great as a bare-bones experimental playground to quickly try out
|
`minimal` is great as a bare-bones experimental playground to quickly try out
|
||||||
new ideas or BPF features.
|
new ideas or BPF features.
|
||||||
|
|
||||||
## Compile and Run
|
## Compile and Run with eunomia-bpf
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -45,4 +75,4 @@ Run:
|
|||||||
|
|
||||||
```console
|
```console
|
||||||
sudo ecli ./package.json
|
sudo ecli ./package.json
|
||||||
```
|
```
|
||||||
|
|||||||
0
10-hardirqs/README.md
Normal file
0
10-hardirqs/README.md
Normal file
16
10-hardirqs/hardirq.h
Normal file
16
10-hardirqs/hardirq.h
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
|
||||||
|
#ifndef __HARDIRQS_H
|
||||||
|
#define __HARDIRQS_H
|
||||||
|
|
||||||
|
#define MAX_SLOTS 20
|
||||||
|
|
||||||
|
struct irq_key {
|
||||||
|
char name[32];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct info {
|
||||||
|
__u64 count;
|
||||||
|
__u32 slots[MAX_SLOTS];
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* __HARDIRQS_H */
|
||||||
130
10-hardirqs/hardirqs.bpf.c
Normal file
130
10-hardirqs/hardirqs.bpf.c
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
// Copyright (c) 2020 Wenbo Zhang
|
||||||
|
#include <vmlinux.h>
|
||||||
|
#include <bpf/bpf_core_read.h>
|
||||||
|
#include <bpf/bpf_helpers.h>
|
||||||
|
#include <bpf/bpf_tracing.h>
|
||||||
|
#include "hardirqs.h"
|
||||||
|
#include "bits.bpf.h"
|
||||||
|
#include "maps.bpf.h"
|
||||||
|
|
||||||
|
#define MAX_ENTRIES 256
|
||||||
|
|
||||||
|
const volatile bool filter_cg = false;
|
||||||
|
const volatile bool targ_dist = false;
|
||||||
|
const volatile bool targ_ns = false;
|
||||||
|
const volatile bool do_count = false;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
|
||||||
|
__type(key, u32);
|
||||||
|
__type(value, u32);
|
||||||
|
__uint(max_entries, 1);
|
||||||
|
} cgroup_map SEC(".maps");
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||||
|
__uint(max_entries, 1);
|
||||||
|
__type(key, u32);
|
||||||
|
__type(value, u64);
|
||||||
|
} start SEC(".maps");
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_HASH);
|
||||||
|
__uint(max_entries, MAX_ENTRIES);
|
||||||
|
__type(key, struct irq_key);
|
||||||
|
__type(value, struct info);
|
||||||
|
} infos SEC(".maps");
|
||||||
|
|
||||||
|
static struct info zero;
|
||||||
|
|
||||||
|
static int handle_entry(int irq, struct irqaction *action)
|
||||||
|
{
|
||||||
|
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (do_count) {
|
||||||
|
struct irq_key key = {};
|
||||||
|
struct info *info;
|
||||||
|
|
||||||
|
bpf_probe_read_kernel_str(&key.name, sizeof(key.name), BPF_CORE_READ(action, name));
|
||||||
|
info = bpf_map_lookup_or_try_init(&infos, &key, &zero);
|
||||||
|
if (!info)
|
||||||
|
return 0;
|
||||||
|
info->count += 1;
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
u64 ts = bpf_ktime_get_ns();
|
||||||
|
u32 key = 0;
|
||||||
|
|
||||||
|
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
bpf_map_update_elem(&start, &key, &ts, BPF_ANY);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int handle_exit(int irq, struct irqaction *action)
|
||||||
|
{
|
||||||
|
struct irq_key ikey = {};
|
||||||
|
struct info *info;
|
||||||
|
u32 key = 0;
|
||||||
|
u64 delta;
|
||||||
|
u64 *tsp;
|
||||||
|
|
||||||
|
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
tsp = bpf_map_lookup_elem(&start, &key);
|
||||||
|
if (!tsp)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
delta = bpf_ktime_get_ns() - *tsp;
|
||||||
|
if (!targ_ns)
|
||||||
|
delta /= 1000U;
|
||||||
|
|
||||||
|
bpf_probe_read_kernel_str(&ikey.name, sizeof(ikey.name), BPF_CORE_READ(action, name));
|
||||||
|
info = bpf_map_lookup_or_try_init(&infos, &ikey, &zero);
|
||||||
|
if (!info)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!targ_dist) {
|
||||||
|
info->count += delta;
|
||||||
|
} else {
|
||||||
|
u64 slot;
|
||||||
|
|
||||||
|
slot = log2(delta);
|
||||||
|
if (slot >= MAX_SLOTS)
|
||||||
|
slot = MAX_SLOTS - 1;
|
||||||
|
info->slots[slot]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("tp_btf/irq_handler_entry")
|
||||||
|
int BPF_PROG(irq_handler_entry_btf, int irq, struct irqaction *action)
|
||||||
|
{
|
||||||
|
return handle_entry(irq, action);
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("tp_btf/irq_handler_exit")
|
||||||
|
int BPF_PROG(irq_handler_exit_btf, int irq, struct irqaction *action)
|
||||||
|
{
|
||||||
|
return handle_exit(irq, action);
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("raw_tp/irq_handler_entry")
|
||||||
|
int BPF_PROG(irq_handler_entry, int irq, struct irqaction *action)
|
||||||
|
{
|
||||||
|
return handle_entry(irq, action);
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("raw_tp/irq_handler_exit")
|
||||||
|
int BPF_PROG(irq_handler_exit, int irq, struct irqaction *action)
|
||||||
|
{
|
||||||
|
return handle_exit(irq, action);
|
||||||
|
}
|
||||||
|
|
||||||
|
char LICENSE[] SEC("license") = "GPL";
|
||||||
3
12-bindsnoop/.gitignore
vendored
3
12-bindsnoop/.gitignore
vendored
@@ -1,3 +0,0 @@
|
|||||||
.vscode
|
|
||||||
package.json
|
|
||||||
ecli
|
|
||||||
@@ -1,106 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: bindsnoop
|
|
||||||
date: 2022-10-10 16:18
|
|
||||||
category: bpftools
|
|
||||||
author: yunwei37
|
|
||||||
tags: [bpftools, syscall, kprobe, perf-event]
|
|
||||||
summary: This tool traces the kernel function performing socket binding and print socket options set before the system call.
|
|
||||||
---
|
|
||||||
|
|
||||||
## origin
|
|
||||||
|
|
||||||
origin from:
|
|
||||||
|
|
||||||
https://github.com/iovisor/bcc/blob/master/libbpf-tools/bindsnoop.bpf.c
|
|
||||||
|
|
||||||
## Compile and Run
|
|
||||||
|
|
||||||
Compile:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
docker run -it -v `pwd`/:/src/ yunwei37/ebpm:latest
|
|
||||||
```
|
|
||||||
|
|
||||||
Run:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
sudo ./ecli run examples/bpftools/bindsnoop/package.json
|
|
||||||
```
|
|
||||||
|
|
||||||
## details in bcc
|
|
||||||
|
|
||||||
Demonstrations of bindsnoop, the Linux eBPF/bcc version.
|
|
||||||
|
|
||||||
This tool traces the kernel function performing socket binding and
|
|
||||||
print socket options set before the system call invocation that might
|
|
||||||
```console
|
|
||||||
impact bind behavior and bound interface:
|
|
||||||
SOL_IP IP_FREEBIND F....
|
|
||||||
SOL_IP IP_TRANSPARENT .T...
|
|
||||||
SOL_IP IP_BIND_ADDRESS_NO_PORT ..N..
|
|
||||||
SOL_SOCKET SO_REUSEADDR ...R.
|
|
||||||
SOL_SOCKET SO_REUSEPORT ....r
|
|
||||||
```
|
|
||||||
```console
|
|
||||||
# ./bindsnoop.py
|
|
||||||
Tracing binds ... Hit Ctrl-C to end
|
|
||||||
PID COMM PROT ADDR PORT OPTS IF
|
|
||||||
3941081 test_bind_op TCP 192.168.1.102 0 F.N.. 0
|
|
||||||
3940194 dig TCP :: 62087 ..... 0
|
|
||||||
3940219 dig UDP :: 48665 ..... 0
|
|
||||||
3940893 Acceptor Thr TCP :: 35343 ...R. 0
|
|
||||||
```
|
|
||||||
The output shows four bind system calls:
|
|
||||||
two "test_bind_op" instances, one with IP_FREEBIND and IP_BIND_ADDRESS_NO_PORT
|
|
||||||
options, dig process called bind for TCP and UDP sockets,
|
|
||||||
and Acceptor called bind for TCP with SO_REUSEADDR option set.
|
|
||||||
|
|
||||||
|
|
||||||
The -t option prints a timestamp column
|
|
||||||
```console
|
|
||||||
# ./bindsnoop.py -t
|
|
||||||
TIME(s) PID COMM PROT ADDR PORT OPTS IF
|
|
||||||
0.000000 3956801 dig TCP :: 49611 ..... 0
|
|
||||||
0.011045 3956822 dig UDP :: 56343 ..... 0
|
|
||||||
2.310629 3956498 test_bind_op TCP 192.168.1.102 39609 F...r 0
|
|
||||||
```
|
|
||||||
|
|
||||||
The -U option prints a UID column:
|
|
||||||
```console
|
|
||||||
# ./bindsnoop.py -U
|
|
||||||
Tracing binds ... Hit Ctrl-C to end
|
|
||||||
UID PID COMM PROT ADDR PORT OPTS IF
|
|
||||||
127072 3956498 test_bind_op TCP 192.168.1.102 44491 F...r 0
|
|
||||||
127072 3960261 Acceptor Thr TCP :: 48869 ...R. 0
|
|
||||||
0 3960729 Acceptor Thr TCP :: 44637 ...R. 0
|
|
||||||
0 3959075 chef-client UDP :: 61722 ..... 0
|
|
||||||
```
|
|
||||||
|
|
||||||
The -u option filtering UID:
|
|
||||||
```console
|
|
||||||
# ./bindsnoop.py -Uu 0
|
|
||||||
Tracing binds ... Hit Ctrl-C to end
|
|
||||||
UID PID COMM PROT ADDR PORT OPTS IF
|
|
||||||
0 3966330 Acceptor Thr TCP :: 39319 ...R. 0
|
|
||||||
0 3968044 python3.7 TCP ::1 59371 ..... 0
|
|
||||||
0 10224 fetch TCP 0.0.0.0 42091 ...R. 0
|
|
||||||
```
|
|
||||||
|
|
||||||
The --cgroupmap option filters based on a cgroup set.
|
|
||||||
It is meant to be used with an externally created map.
|
|
||||||
```console
|
|
||||||
# ./bindsnoop.py --cgroupmap /sys/fs/bpf/test01
|
|
||||||
```
|
|
||||||
For more details, see docs/special_filtering.md
|
|
||||||
|
|
||||||
|
|
||||||
In order to track heavy bind usage one can use --count option
|
|
||||||
```console
|
|
||||||
# ./bindsnoop.py --count
|
|
||||||
Tracing binds ... Hit Ctrl-C to end
|
|
||||||
LADDR LPORT BINDS
|
|
||||||
0.0.0.0 6771 4
|
|
||||||
0.0.0.0 4433 4
|
|
||||||
127.0.0.1 33665 1
|
|
||||||
```
|
|
||||||
@@ -1,151 +0,0 @@
|
|||||||
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
|
|
||||||
/* Copyright (c) 2021 Hengqi Chen */
|
|
||||||
#include <vmlinux.h>
|
|
||||||
#include <bpf/bpf_helpers.h>
|
|
||||||
#include <bpf/bpf_core_read.h>
|
|
||||||
#include <bpf/bpf_tracing.h>
|
|
||||||
#include <bpf/bpf_endian.h>
|
|
||||||
#include "bindsnoop.bpf.h"
|
|
||||||
|
|
||||||
#define MAX_ENTRIES 10240
|
|
||||||
#define MAX_PORTS 1024
|
|
||||||
|
|
||||||
const volatile bool filter_cg = false;
|
|
||||||
const volatile pid_t target_pid = 0;
|
|
||||||
const volatile bool ignore_errors = true;
|
|
||||||
const volatile bool filter_by_port = false;
|
|
||||||
|
|
||||||
struct {
|
|
||||||
__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
|
|
||||||
__type(key, u32);
|
|
||||||
__type(value, u32);
|
|
||||||
__uint(max_entries, 1);
|
|
||||||
} cgroup_map SEC(".maps");
|
|
||||||
|
|
||||||
struct {
|
|
||||||
__uint(type, BPF_MAP_TYPE_HASH);
|
|
||||||
__uint(max_entries, MAX_ENTRIES);
|
|
||||||
__type(key, __u32);
|
|
||||||
__type(value, struct socket *);
|
|
||||||
} sockets SEC(".maps");
|
|
||||||
|
|
||||||
struct {
|
|
||||||
__uint(type, BPF_MAP_TYPE_HASH);
|
|
||||||
__uint(max_entries, MAX_PORTS);
|
|
||||||
__type(key, __u16);
|
|
||||||
__type(value, __u16);
|
|
||||||
} ports SEC(".maps");
|
|
||||||
|
|
||||||
struct {
|
|
||||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
|
||||||
__uint(key_size, sizeof(__u32));
|
|
||||||
__uint(value_size, sizeof(__u32));
|
|
||||||
} events SEC(".maps");
|
|
||||||
|
|
||||||
static int probe_entry(struct pt_regs *ctx, struct socket *socket)
|
|
||||||
{
|
|
||||||
__u64 pid_tgid = bpf_get_current_pid_tgid();
|
|
||||||
__u32 pid = pid_tgid >> 32;
|
|
||||||
__u32 tid = (__u32)pid_tgid;
|
|
||||||
|
|
||||||
if (target_pid && target_pid != pid)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
bpf_map_update_elem(&sockets, &tid, &socket, BPF_ANY);
|
|
||||||
return 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
static int probe_exit(struct pt_regs *ctx, short ver)
|
|
||||||
{
|
|
||||||
__u64 pid_tgid = bpf_get_current_pid_tgid();
|
|
||||||
__u32 pid = pid_tgid >> 32;
|
|
||||||
__u32 tid = (__u32)pid_tgid;
|
|
||||||
struct socket **socketp, *socket;
|
|
||||||
struct inet_sock *inet_sock;
|
|
||||||
struct sock *sock;
|
|
||||||
union bind_options opts;
|
|
||||||
struct bind_event event = {};
|
|
||||||
__u16 sport = 0, *port;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
socketp = bpf_map_lookup_elem(&sockets, &tid);
|
|
||||||
if (!socketp)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
ret = PT_REGS_RC(ctx);
|
|
||||||
if (ignore_errors && ret != 0)
|
|
||||||
goto cleanup;
|
|
||||||
|
|
||||||
socket = *socketp;
|
|
||||||
sock = BPF_CORE_READ(socket, sk);
|
|
||||||
inet_sock = (struct inet_sock *)sock;
|
|
||||||
|
|
||||||
sport = bpf_ntohs(BPF_CORE_READ(inet_sock, inet_sport));
|
|
||||||
port = bpf_map_lookup_elem(&ports, &sport);
|
|
||||||
if (filter_by_port && !port)
|
|
||||||
goto cleanup;
|
|
||||||
|
|
||||||
opts.fields.freebind = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, freebind);
|
|
||||||
opts.fields.transparent = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, transparent);
|
|
||||||
opts.fields.bind_address_no_port = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, bind_address_no_port);
|
|
||||||
opts.fields.reuseaddress = BPF_CORE_READ_BITFIELD_PROBED(sock, __sk_common.skc_reuse);
|
|
||||||
opts.fields.reuseport = BPF_CORE_READ_BITFIELD_PROBED(sock, __sk_common.skc_reuseport);
|
|
||||||
event.opts = opts.data;
|
|
||||||
event.ts_us = bpf_ktime_get_ns() / 1000;
|
|
||||||
event.pid = pid;
|
|
||||||
event.port = sport;
|
|
||||||
event.bound_dev_if = BPF_CORE_READ(sock, __sk_common.skc_bound_dev_if);
|
|
||||||
event.ret = ret;
|
|
||||||
event.proto = BPF_CORE_READ_BITFIELD_PROBED(sock, sk_protocol);
|
|
||||||
bpf_get_current_comm(&event.task, sizeof(event.task));
|
|
||||||
if (ver == 4) {
|
|
||||||
event.ver = ver;
|
|
||||||
bpf_probe_read_kernel(&event.addr, sizeof(event.addr), &inet_sock->inet_saddr);
|
|
||||||
} else { /* ver == 6 */
|
|
||||||
event.ver = ver;
|
|
||||||
bpf_probe_read_kernel(&event.addr, sizeof(event.addr), sock->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
|
|
||||||
}
|
|
||||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
|
|
||||||
|
|
||||||
cleanup:
|
|
||||||
bpf_map_delete_elem(&sockets, &tid);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
SEC("kprobe/inet_bind")
|
|
||||||
int BPF_KPROBE(ipv4_bind_entry, struct socket *socket)
|
|
||||||
{
|
|
||||||
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return probe_entry(ctx, socket);
|
|
||||||
}
|
|
||||||
|
|
||||||
SEC("kretprobe/inet_bind")
|
|
||||||
int BPF_KRETPROBE(ipv4_bind_exit)
|
|
||||||
{
|
|
||||||
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return probe_exit(ctx, 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
SEC("kprobe/inet6_bind")
|
|
||||||
int BPF_KPROBE(ipv6_bind_entry, struct socket *socket)
|
|
||||||
{
|
|
||||||
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return probe_entry(ctx, socket);
|
|
||||||
}
|
|
||||||
|
|
||||||
SEC("kretprobe/inet6_bind")
|
|
||||||
int BPF_KRETPROBE(ipv6_bind_exit)
|
|
||||||
{
|
|
||||||
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return probe_exit(ctx, 6);
|
|
||||||
}
|
|
||||||
|
|
||||||
char LICENSE[] SEC("license") = "Dual BSD/GPL";
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
|
|
||||||
#ifndef __BINDSNOOP_H
|
|
||||||
#define __BINDSNOOP_H
|
|
||||||
|
|
||||||
#define TASK_COMM_LEN 16
|
|
||||||
|
|
||||||
struct bind_event {
|
|
||||||
unsigned __int128 addr;
|
|
||||||
unsigned long long ts_us;
|
|
||||||
unsigned int pid;
|
|
||||||
unsigned int bound_dev_if;
|
|
||||||
int ret;
|
|
||||||
unsigned short port;
|
|
||||||
unsigned short proto;
|
|
||||||
unsigned char opts;
|
|
||||||
unsigned char ver;
|
|
||||||
char task[TASK_COMM_LEN];
|
|
||||||
};
|
|
||||||
|
|
||||||
union bind_options {
|
|
||||||
unsigned char data;
|
|
||||||
struct {
|
|
||||||
unsigned char freebind : 1;
|
|
||||||
unsigned char transparent : 1;
|
|
||||||
unsigned char bind_address_no_port : 1;
|
|
||||||
unsigned char reuseaddress : 1;
|
|
||||||
unsigned char reuseport : 1;
|
|
||||||
} fields;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* __BINDSNOOP_H */
|
|
||||||
@@ -1,95 +0,0 @@
|
|||||||
## eBPF 入门实践教程:编写 eBPF 程序 Bindsnoopn 监控 socket 端口绑定事件
|
|
||||||
|
|
||||||
### 背景
|
|
||||||
|
|
||||||
Bindsnoop 会跟踪操作 socket 端口绑定的内核函数,并且在可能会影响端口绑定的系统调用发生之前,打印
|
|
||||||
现有的 socket 选项。
|
|
||||||
|
|
||||||
### 实现原理
|
|
||||||
|
|
||||||
Bindsnoop 通过kprobe实现。其主要挂载点为 inet_bind 和 inet6_bind。inet_bind 为处理 IPV4 类型
|
|
||||||
socket 端口绑定系统调用的接口,inet6_bind 为处理IPV6类型 socket 端口绑定系统调用的接口。
|
|
||||||
|
|
||||||
```c
|
|
||||||
SEC("kprobe/inet_bind")
|
|
||||||
int BPF_KPROBE(ipv4_bind_entry, struct socket *socket)
|
|
||||||
{
|
|
||||||
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return probe_entry(ctx, socket);
|
|
||||||
}
|
|
||||||
SEC("kretprobe/inet_bind")
|
|
||||||
|
|
||||||
int BPF_KRETPROBE(ipv4_bind_exit)
|
|
||||||
{
|
|
||||||
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return probe_exit(ctx, 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
SEC("kprobe/inet6_bind")
|
|
||||||
int BPF_KPROBE(ipv6_bind_entry, struct socket *socket)
|
|
||||||
{
|
|
||||||
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return probe_entry(ctx, socket);
|
|
||||||
}
|
|
||||||
|
|
||||||
SEC("kretprobe/inet6_bind")
|
|
||||||
int BPF_KRETPROBE(ipv6_bind_exit)
|
|
||||||
{
|
|
||||||
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return probe_exit(ctx, 6);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
当系统试图进行socket端口绑定操作时, kprobe挂载的处理函数会被触发。在进入绑定函数时,`probe_entry`会先被
|
|
||||||
调用,它会以 tid 为主键将 socket 信息存入 map 中。
|
|
||||||
|
|
||||||
```c
|
|
||||||
static int probe_entry(struct pt_regs *ctx, struct socket *socket)
|
|
||||||
{
|
|
||||||
__u64 pid_tgid = bpf_get_current_pid_tgid();
|
|
||||||
__u32 pid = pid_tgid >> 32;
|
|
||||||
__u32 tid = (__u32)pid_tgid;
|
|
||||||
|
|
||||||
if (target_pid && target_pid != pid)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
bpf_map_update_elem(&sockets, &tid, &socket, BPF_ANY);
|
|
||||||
return 0;
|
|
||||||
};
|
|
||||||
```
|
|
||||||
在执行完绑定函数后,`probe_exit`函数会被调用。该函数会读取tid对应的socket信息,将其和其他信息一起
|
|
||||||
写入 event 结构体并输出到用户态。
|
|
||||||
|
|
||||||
```c
|
|
||||||
struct bind_event {
|
|
||||||
unsigned __int128 addr;
|
|
||||||
__u64 ts_us;
|
|
||||||
__u32 pid;
|
|
||||||
__u32 bound_dev_if;
|
|
||||||
int ret;
|
|
||||||
__u16 port;
|
|
||||||
__u16 proto;
|
|
||||||
__u8 opts;
|
|
||||||
__u8 ver;
|
|
||||||
char task[TASK_COMM_LEN];
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
当用户停止该工具时,其用户态代码会读取存入的数据并按要求打印。
|
|
||||||
|
|
||||||
### Eunomia中使用方式
|
|
||||||
|
|
||||||

|
|
||||||

|
|
||||||
|
|
||||||
### 总结
|
|
||||||
|
|
||||||
Bindsnoop 通过 kprobe 挂载点,实现了对 socket 端口的监视,增强了 Eunomia 的应用范围。
|
|
||||||
0
21-tc/.gitignore → 20-tc/.gitignore
vendored
0
21-tc/.gitignore → 20-tc/.gitignore
vendored
0
21-xdp/README.md
Normal file
0
21-xdp/README.md
Normal file
31
9-runqlat/bits.bpf copy.h
Normal file
31
9-runqlat/bits.bpf copy.h
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
|
||||||
|
#ifndef __BITS_BPF_H
|
||||||
|
#define __BITS_BPF_H
|
||||||
|
|
||||||
|
#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
|
||||||
|
#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = val)
|
||||||
|
|
||||||
|
static __always_inline u64 log2(u32 v)
|
||||||
|
{
|
||||||
|
u32 shift, r;
|
||||||
|
|
||||||
|
r = (v > 0xFFFF) << 4; v >>= r;
|
||||||
|
shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
|
||||||
|
shift = (v > 0xF) << 2; v >>= shift; r |= shift;
|
||||||
|
shift = (v > 0x3) << 1; v >>= shift; r |= shift;
|
||||||
|
r |= (v >> 1);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline u64 log2l(u64 v)
|
||||||
|
{
|
||||||
|
u32 hi = v >> 32;
|
||||||
|
|
||||||
|
if (hi)
|
||||||
|
return log2(hi) + 32;
|
||||||
|
else
|
||||||
|
return log2(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __BITS_BPF_H */
|
||||||
26
9-runqlat/maps.bpf copy.h
Normal file
26
9-runqlat/maps.bpf copy.h
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||||
|
// Copyright (c) 2020 Anton Protopopov
|
||||||
|
#ifndef __MAPS_BPF_H
|
||||||
|
#define __MAPS_BPF_H
|
||||||
|
|
||||||
|
#include <bpf/bpf_helpers.h>
|
||||||
|
#include <asm-generic/errno.h>
|
||||||
|
|
||||||
|
static __always_inline void *
|
||||||
|
bpf_map_lookup_or_try_init(void *map, const void *key, const void *init)
|
||||||
|
{
|
||||||
|
void *val;
|
||||||
|
long err;
|
||||||
|
|
||||||
|
val = bpf_map_lookup_elem(map, key);
|
||||||
|
if (val)
|
||||||
|
return val;
|
||||||
|
|
||||||
|
err = bpf_map_update_elem(map, key, init, BPF_NOEXIST);
|
||||||
|
if (err && err != -EEXIST)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return bpf_map_lookup_elem(map, key);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __MAPS_BPF_H */
|
||||||
48
README.md
48
README.md
@@ -4,30 +4,34 @@
|
|||||||
|
|
||||||
在学习 eBPF 的过程中,我们受到了 [tutorial_bcc_python_developer](https://github.com/iovisor/bcc/blob/master/docs/tutorial_bcc_python_developer.md) 的许多启发和帮助,但从 2022 年的角度出发,使用 libbpf 开发 eBPF 的应用是目前相对更好的选择。但目前似乎很少有基于 libbpf 和 BPF CO-RE 出发的、通过案例和工具介绍 eBPF 开发的教程,因此我们发起了这个项目。
|
在学习 eBPF 的过程中,我们受到了 [tutorial_bcc_python_developer](https://github.com/iovisor/bcc/blob/master/docs/tutorial_bcc_python_developer.md) 的许多启发和帮助,但从 2022 年的角度出发,使用 libbpf 开发 eBPF 的应用是目前相对更好的选择。但目前似乎很少有基于 libbpf 和 BPF CO-RE 出发的、通过案例和工具介绍 eBPF 开发的教程,因此我们发起了这个项目。
|
||||||
|
|
||||||
|
本项目主要基于 [libbpf-boostrap](https://github.com/libbpf/libbpf-bootstrap) 和 [eunomia-bpf](https://github.com/eunomia-bpf/eunomia-bpf) 两个框架完成,并使用 eunomia-bpf 帮助简化一部分 eBPF 用户态代码的编写。
|
||||||
|
|
||||||
|
教程主要关注于可观察性,并简要介绍了 eBPF 的其他应用。
|
||||||
|
|
||||||
## 目录
|
## 目录
|
||||||
|
|
||||||
- [lesson 0-introduce](0-introduce/introduce.md) 介绍 eBPF 的基本概念和常见的开发工具
|
- [lesson 0-introduce](0-introduce/README.md) 介绍 eBPF 的基本概念和常见的开发工具
|
||||||
- [lesson 1-helloworld](1-helloworld/README.md) 演示如何使用 eBPF 开发最简单的「Hello World」程序,介绍 eBPF 的基本框架和开发流程
|
- [lesson 1-helloworld](1-helloworld/README.md) 使用 eBPF 开发最简单的「Hello World」程序,介绍 eBPF 的基本框架和开发流程
|
||||||
- [lesson 2-fentry-unlink](2-fentry-unlink/README.md) 基于 eBPF 的 fentry hook,演示了如何捕获并记录 unlink 系统调用
|
- [lesson 2-fentry-unlink](2-fentry-unlink/README.md) 在 eBPF 中使用 fentry 捕获 unlink 系统调用
|
||||||
- [lesson 3-kprobe-unlink](3-kprobe-unlink/README.md) 基于 eBPF 的 kprobe hook,演示了如何捕获并记录 unlink 系统调用
|
- [lesson 3-kprobe-unlink](3-kprobe-unlink/README.md) 在 eBPF 中使用 kprobe 捕获 unlink 系统调用
|
||||||
- [lesson 4-opensnoop](4-opensnoop/README.md)
|
- [lesson 4-opensnoop](4-opensnoop/README.md) 捕获进程打开文件的系统调用集合,使用全局变量在 eBPF 中过滤进程 pid
|
||||||
- [lesson 5-uprobe-bashreadline](5-uprobe-bashreadline/README.md)
|
- [lesson 5-uprobe-bashreadline](5-uprobe-bashreadline/README.md) 使用 uprobe 捕获 bash 的 readline 函数调用
|
||||||
- [lesson 6-sigsnoop](6-sigsnoop/README.md)
|
- [lesson 6-sigsnoop](6-sigsnoop/README.md) 捕获进程发送信号的系统调用集合,使用 hash map 保存状态
|
||||||
- [lesson 7-execsnoop](7-execsnoop/README.md)
|
- [lesson 7-execsnoop](7-execsnoop/README.md) 捕获进程执行/退出时间,通过 perf event array 向用户态打印输出
|
||||||
- [lesson 8-runqslower](8-runqslower/README.md)
|
- [lesson 8-runqslower](8-runqslower/README.md) 捕获进程调度事件,使用 ring buffer 向用户态打印输出
|
||||||
- [lesson 9-runqlat](9-runqlat/README.md)
|
- [lesson 9-runqlat](9-runqlat/README.md) 捕获进程调度延迟,以直方图方式记录
|
||||||
- [lesson 10-hardirqs](20-hardirqs/README.md)
|
- [lesson 10-hardirqs](20-hardirqs/README.md) 使用 hardirqs 或 softirqs 捕获中断事件
|
||||||
- [lesson 11-llcstat](21-llcstat/README.md)
|
- [lesson 11-llcstat](21-llcstat/README.md) 通过 PID 统计 cache references and misses
|
||||||
- [lesson 12-bindsnoop](12-bindsnoop/README.md)
|
- [lesson 12-profile](12-profile/README.md) 使用 eBPF 进行性能分析
|
||||||
- [lesson 13-tcpconnlat](13-tcpconnlat/README.md)
|
- [lesson 13-tcpconnlat](13-tcpconnlat/README.md) 记录 TCP 连接延迟
|
||||||
- [lesson 14-tcpstates](14-tcpstates/README.md)
|
- [lesson 14-tcpstates](14-tcpstates/README.md) 记录 TCP 连接状态
|
||||||
- [lesson 15-tcprtt](15-tcprtt/README.md)
|
- [lesson 15-tcprtt](15-tcprtt/README.md) 以直方图方式记录 TCP RTT
|
||||||
- [lesson 16-profile](16-profile/README.md)
|
- [lesson 16-memleak](16-memleak/README.md) 检测内存泄漏
|
||||||
- [lesson 17-memleak](17-memleak/README.md)
|
- [lesson 17-biopattern](17-biopattern/README.md) 捕获磁盘 IO 模式
|
||||||
- [lesson 18-biopattern](18-biopattern/README.md)
|
- [lesson 18-further-reading](18-further-reading/README.md) 更进一步的相关资料?
|
||||||
- [lesson 19-syscount](19-syscount/README.md)
|
- [lesson 19-lsm-connect](19-lsm-connect/README.md) 使用 LSM 进行安全检测防御
|
||||||
- [lesson 20-lsm-connect](20-lsm-connect/README.md)
|
- [lesson 20-tc](20-tc/README.md) 使用 eBPF 进行 tc 流量控制
|
||||||
- [lesson 21-tc](21-tc/README.md)
|
- [lesson 21-xdp](20-xdp/README.md) 使用 eBPF 进行 XDP 报文处理
|
||||||
|
|
||||||
## 为什么需要基于 libbpf 和 BPF CO-RE 的教程?
|
## 为什么需要基于 libbpf 和 BPF CO-RE 的教程?
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user