init with documents from eunomia-bpf

This commit is contained in:
yunwei37
2022-12-02 19:18:03 +08:00
parent 1179ec171e
commit 81d749a9cc
85 changed files with 11876 additions and 0 deletions

3
12-bindsnoop/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
.vscode
package.json
ecli

106
12-bindsnoop/README.md Normal file
View File

@@ -0,0 +1,106 @@
---
layout: post
title: bindsnoop
date: 2022-10-10 16:18
category: bpftools
author: yunwei37
tags: [bpftools, syscall, kprobe, perf-event]
summary: This tool traces the kernel function performing socket binding and print socket options set before the system call.
---
## origin
origin from:
https://github.com/iovisor/bcc/blob/master/libbpf-tools/bindsnoop.bpf.c
## Compile and Run
Compile:
```shell
docker run -it -v `pwd`/:/src/ yunwei37/ebpm:latest
```
Run:
```shell
sudo ./ecli run examples/bpftools/bindsnoop/package.json
```
## details in bcc
Demonstrations of bindsnoop, the Linux eBPF/bcc version.
This tool traces the kernel function performing socket binding and
print socket options set before the system call invocation that might
```console
impact bind behavior and bound interface:
SOL_IP IP_FREEBIND F....
SOL_IP IP_TRANSPARENT .T...
SOL_IP IP_BIND_ADDRESS_NO_PORT ..N..
SOL_SOCKET SO_REUSEADDR ...R.
SOL_SOCKET SO_REUSEPORT ....r
```
```console
# ./bindsnoop.py
Tracing binds ... Hit Ctrl-C to end
PID COMM PROT ADDR PORT OPTS IF
3941081 test_bind_op TCP 192.168.1.102 0 F.N.. 0
3940194 dig TCP :: 62087 ..... 0
3940219 dig UDP :: 48665 ..... 0
3940893 Acceptor Thr TCP :: 35343 ...R. 0
```
The output shows four bind system calls:
two "test_bind_op" instances, one with IP_FREEBIND and IP_BIND_ADDRESS_NO_PORT
options, dig process called bind for TCP and UDP sockets,
and Acceptor called bind for TCP with SO_REUSEADDR option set.
The -t option prints a timestamp column
```console
# ./bindsnoop.py -t
TIME(s) PID COMM PROT ADDR PORT OPTS IF
0.000000 3956801 dig TCP :: 49611 ..... 0
0.011045 3956822 dig UDP :: 56343 ..... 0
2.310629 3956498 test_bind_op TCP 192.168.1.102 39609 F...r 0
```
The -U option prints a UID column:
```console
# ./bindsnoop.py -U
Tracing binds ... Hit Ctrl-C to end
UID PID COMM PROT ADDR PORT OPTS IF
127072 3956498 test_bind_op TCP 192.168.1.102 44491 F...r 0
127072 3960261 Acceptor Thr TCP :: 48869 ...R. 0
0 3960729 Acceptor Thr TCP :: 44637 ...R. 0
0 3959075 chef-client UDP :: 61722 ..... 0
```
The -u option filtering UID:
```console
# ./bindsnoop.py -Uu 0
Tracing binds ... Hit Ctrl-C to end
UID PID COMM PROT ADDR PORT OPTS IF
0 3966330 Acceptor Thr TCP :: 39319 ...R. 0
0 3968044 python3.7 TCP ::1 59371 ..... 0
0 10224 fetch TCP 0.0.0.0 42091 ...R. 0
```
The --cgroupmap option filters based on a cgroup set.
It is meant to be used with an externally created map.
```console
# ./bindsnoop.py --cgroupmap /sys/fs/bpf/test01
```
For more details, see docs/special_filtering.md
In order to track heavy bind usage one can use --count option
```console
# ./bindsnoop.py --count
Tracing binds ... Hit Ctrl-C to end
LADDR LPORT BINDS
0.0.0.0 6771 4
0.0.0.0 4433 4
127.0.0.1 33665 1
```

View File

@@ -0,0 +1,151 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (c) 2021 Hengqi Chen */
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
#include "bindsnoop.bpf.h"
#define MAX_ENTRIES 10240
#define MAX_PORTS 1024
const volatile bool filter_cg = false;
const volatile pid_t target_pid = 0;
const volatile bool ignore_errors = true;
const volatile bool filter_by_port = false;
struct {
__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
__type(key, u32);
__type(value, u32);
__uint(max_entries, 1);
} cgroup_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, MAX_ENTRIES);
__type(key, __u32);
__type(value, struct socket *);
} sockets SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, MAX_PORTS);
__type(key, __u16);
__type(value, __u16);
} ports SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} events SEC(".maps");
static int probe_entry(struct pt_regs *ctx, struct socket *socket)
{
__u64 pid_tgid = bpf_get_current_pid_tgid();
__u32 pid = pid_tgid >> 32;
__u32 tid = (__u32)pid_tgid;
if (target_pid && target_pid != pid)
return 0;
bpf_map_update_elem(&sockets, &tid, &socket, BPF_ANY);
return 0;
};
static int probe_exit(struct pt_regs *ctx, short ver)
{
__u64 pid_tgid = bpf_get_current_pid_tgid();
__u32 pid = pid_tgid >> 32;
__u32 tid = (__u32)pid_tgid;
struct socket **socketp, *socket;
struct inet_sock *inet_sock;
struct sock *sock;
union bind_options opts;
struct bind_event event = {};
__u16 sport = 0, *port;
int ret;
socketp = bpf_map_lookup_elem(&sockets, &tid);
if (!socketp)
return 0;
ret = PT_REGS_RC(ctx);
if (ignore_errors && ret != 0)
goto cleanup;
socket = *socketp;
sock = BPF_CORE_READ(socket, sk);
inet_sock = (struct inet_sock *)sock;
sport = bpf_ntohs(BPF_CORE_READ(inet_sock, inet_sport));
port = bpf_map_lookup_elem(&ports, &sport);
if (filter_by_port && !port)
goto cleanup;
opts.fields.freebind = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, freebind);
opts.fields.transparent = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, transparent);
opts.fields.bind_address_no_port = BPF_CORE_READ_BITFIELD_PROBED(inet_sock, bind_address_no_port);
opts.fields.reuseaddress = BPF_CORE_READ_BITFIELD_PROBED(sock, __sk_common.skc_reuse);
opts.fields.reuseport = BPF_CORE_READ_BITFIELD_PROBED(sock, __sk_common.skc_reuseport);
event.opts = opts.data;
event.ts_us = bpf_ktime_get_ns() / 1000;
event.pid = pid;
event.port = sport;
event.bound_dev_if = BPF_CORE_READ(sock, __sk_common.skc_bound_dev_if);
event.ret = ret;
event.proto = BPF_CORE_READ_BITFIELD_PROBED(sock, sk_protocol);
bpf_get_current_comm(&event.task, sizeof(event.task));
if (ver == 4) {
event.ver = ver;
bpf_probe_read_kernel(&event.addr, sizeof(event.addr), &inet_sock->inet_saddr);
} else { /* ver == 6 */
event.ver = ver;
bpf_probe_read_kernel(&event.addr, sizeof(event.addr), sock->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
}
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
cleanup:
bpf_map_delete_elem(&sockets, &tid);
return 0;
}
SEC("kprobe/inet_bind")
int BPF_KPROBE(ipv4_bind_entry, struct socket *socket)
{
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
return 0;
return probe_entry(ctx, socket);
}
SEC("kretprobe/inet_bind")
int BPF_KRETPROBE(ipv4_bind_exit)
{
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
return 0;
return probe_exit(ctx, 4);
}
SEC("kprobe/inet6_bind")
int BPF_KPROBE(ipv6_bind_entry, struct socket *socket)
{
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
return 0;
return probe_entry(ctx, socket);
}
SEC("kretprobe/inet6_bind")
int BPF_KRETPROBE(ipv6_bind_exit)
{
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
return 0;
return probe_exit(ctx, 6);
}
char LICENSE[] SEC("license") = "Dual BSD/GPL";

View File

@@ -0,0 +1,31 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __BINDSNOOP_H
#define __BINDSNOOP_H
#define TASK_COMM_LEN 16
struct bind_event {
unsigned __int128 addr;
unsigned long long ts_us;
unsigned int pid;
unsigned int bound_dev_if;
int ret;
unsigned short port;
unsigned short proto;
unsigned char opts;
unsigned char ver;
char task[TASK_COMM_LEN];
};
union bind_options {
unsigned char data;
struct {
unsigned char freebind : 1;
unsigned char transparent : 1;
unsigned char bind_address_no_port : 1;
unsigned char reuseaddress : 1;
unsigned char reuseport : 1;
} fields;
};
#endif /* __BINDSNOOP_H */

95
12-bindsnoop/bindsnoop.md Normal file
View File

@@ -0,0 +1,95 @@
## eBPF 入门实践教程:编写 eBPF 程序 Bindsnoopn 监控 socket 端口绑定事件
### 背景
Bindsnoop 会跟踪操作 socket 端口绑定的内核函数,并且在可能会影响端口绑定的系统调用发生之前,打印
现有的 socket 选项。
### 实现原理
Bindsnoop 通过kprobe实现。其主要挂载点为 inet_bind 和 inet6_bind。inet_bind 为处理 IPV4 类型
socket 端口绑定系统调用的接口inet6_bind 为处理IPV6类型 socket 端口绑定系统调用的接口。
```c
SEC("kprobe/inet_bind")
int BPF_KPROBE(ipv4_bind_entry, struct socket *socket)
{
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
return 0;
return probe_entry(ctx, socket);
}
SEC("kretprobe/inet_bind")
int BPF_KRETPROBE(ipv4_bind_exit)
{
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
return 0;
return probe_exit(ctx, 4);
}
SEC("kprobe/inet6_bind")
int BPF_KPROBE(ipv6_bind_entry, struct socket *socket)
{
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
return 0;
return probe_entry(ctx, socket);
}
SEC("kretprobe/inet6_bind")
int BPF_KRETPROBE(ipv6_bind_exit)
{
if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
return 0;
return probe_exit(ctx, 6);
}
```
当系统试图进行socket端口绑定操作时, kprobe挂载的处理函数会被触发。在进入绑定函数时`probe_entry`会先被
调用,它会以 tid 为主键将 socket 信息存入 map 中。
```c
static int probe_entry(struct pt_regs *ctx, struct socket *socket)
{
__u64 pid_tgid = bpf_get_current_pid_tgid();
__u32 pid = pid_tgid >> 32;
__u32 tid = (__u32)pid_tgid;
if (target_pid && target_pid != pid)
return 0;
bpf_map_update_elem(&sockets, &tid, &socket, BPF_ANY);
return 0;
};
```
在执行完绑定函数后,`probe_exit`函数会被调用。该函数会读取tid对应的socket信息将其和其他信息一起
写入 event 结构体并输出到用户态。
```c
struct bind_event {
unsigned __int128 addr;
__u64 ts_us;
__u32 pid;
__u32 bound_dev_if;
int ret;
__u16 port;
__u16 proto;
__u8 opts;
__u8 ver;
char task[TASK_COMM_LEN];
};
```
当用户停止该工具时,其用户态代码会读取存入的数据并按要求打印。
### Eunomia中使用方式
![result](../imgs/mountsnoop.jpg)
![result](../imgs/bindsnoop-prometheus.png)
### 总结
Bindsnoop 通过 kprobe 挂载点,实现了对 socket 端口的监视,增强了 Eunomia 的应用范围。