implement opensnoop and uprobe

This commit is contained in:
yunwei37
2022-12-04 23:59:55 +08:00
parent b160ff39d2
commit 40260892b4
16 changed files with 239 additions and 827 deletions

View File

@@ -1,263 +0,0 @@
## eBPF 入门实践教程:编写 eBPF 程序监控打开文件路径并使用 Prometheus 可视化
### 背景
通过对 open 系统调用的监测,`opensnoop`可以展现系统内所有调用了 open 系统调用的进程信息。
### 使用 ecli 一键运行
```console
$ # 下载安装 ecli 二进制
$ wget https://aka.pw/bpf-ecli -O ./ecli && chmod +x ./ecli
$ # 使用 url 一键运行
$ ./ecli run https://eunomia-bpf.github.io/eunomia-bpf/opensnoop/package.json
running and waiting for the ebpf events from perf event...
time ts pid uid ret flags comm fname
00:58:08 0 812 0 9 524288 vmtoolsd /etc/mtab
00:58:08 0 812 0 11 0 vmtoolsd /proc/devices
00:58:08 0 34351 0 24 524288 ecli /etc/localtime
00:58:08 0 812 0 9 0 vmtoolsd /sys/class/block/sda5/../device/../../../class
00:58:08 0 812 0 -2 0 vmtoolsd /sys/class/block/sda5/../device/../../../label
00:58:08 0 812 0 9 0 vmtoolsd /sys/class/block/sda1/../device/../../../class
00:58:08 0 812 0 -2 0 vmtoolsd /sys/class/block/sda1/../device/../../../label
00:58:08 0 812 0 9 0 vmtoolsd /run/systemd/resolve/resolv.conf
00:58:08 0 812 0 9 0 vmtoolsd /proc/net/route
00:58:08 0 812 0 9 0 vmtoolsd /proc/net/ipv6_route
```
### 实现
使用 eunomia-bpf 可以帮助你只需要编写内核态应用程序,不需要编写任何用户态辅助框架代码;需要编写的代码由两个部分组成:
- 头文件 opensnoop.h 里面定义需要导出的 C 语言结构体:
- 源文件 opensnoop.bpf.c 里面定义 BPF 代码:
头文件 opensnoop.h
```c
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __OPENSNOOP_H
#define __OPENSNOOP_H
#define TASK_COMM_LEN 16
#define NAME_MAX 255
#define INVALID_UID ((uid_t)-1)
// used for export event
struct event {
/* user terminology for pid: */
unsigned long long ts;
int pid;
int uid;
int ret;
int flags;
char comm[TASK_COMM_LEN];
char fname[NAME_MAX];
};
#endif /* __OPENSNOOP_H */
```
`opensnoop` 的实现逻辑比较简单,它在 `sys_enter_open``sys_enter_openat` 这两个追踪点下
加了执行函数,当有 open 系统调用发生时,执行函数便会被触发。同样在,在对应的 `sys_exit_open`
`sys_exit_openat` 系统调用下,`opensnoop` 也加了执行函数。
源文件 opensnoop.bpf.c
```c
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
// Copyright (c) 2020 Netflix
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include "opensnoop.h"
struct args_t {
const char *fname;
int flags;
};
const volatile pid_t targ_pid = 0;
const volatile pid_t targ_tgid = 0;
const volatile uid_t targ_uid = 0;
const volatile bool targ_failed = false;
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 10240);
__type(key, u32);
__type(value, struct args_t);
} start SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(u32));
} events SEC(".maps");
static __always_inline bool valid_uid(uid_t uid) {
return uid != INVALID_UID;
}
static __always_inline
bool trace_allowed(u32 tgid, u32 pid)
{
u32 uid;
/* filters */
if (targ_tgid && targ_tgid != tgid)
return false;
if (targ_pid && targ_pid != pid)
return false;
if (valid_uid(targ_uid)) {
uid = (u32)bpf_get_current_uid_gid();
if (targ_uid != uid) {
return false;
}
}
return true;
}
SEC("tracepoint/syscalls/sys_enter_open")
int tracepoint__syscalls__sys_enter_open(struct trace_event_raw_sys_enter* ctx)
{
u64 id = bpf_get_current_pid_tgid();
/* use kernel terminology here for tgid/pid: */
u32 tgid = id >> 32;
u32 pid = id;
/* store arg info for later lookup */
if (trace_allowed(tgid, pid)) {
struct args_t args = {};
args.fname = (const char *)ctx->args[0];
args.flags = (int)ctx->args[1];
bpf_map_update_elem(&start, &pid, &args, 0);
}
return 0;
}
SEC("tracepoint/syscalls/sys_enter_openat")
int tracepoint__syscalls__sys_enter_openat(struct trace_event_raw_sys_enter* ctx)
{
u64 id = bpf_get_current_pid_tgid();
/* use kernel terminology here for tgid/pid: */
u32 tgid = id >> 32;
u32 pid = id;
/* store arg info for later lookup */
if (trace_allowed(tgid, pid)) {
struct args_t args = {};
args.fname = (const char *)ctx->args[1];
args.flags = (int)ctx->args[2];
bpf_map_update_elem(&start, &pid, &args, 0);
}
return 0;
}
static __always_inline
int trace_exit(struct trace_event_raw_sys_exit* ctx)
{
struct event event = {};
struct args_t *ap;
int ret;
u32 pid = bpf_get_current_pid_tgid();
ap = bpf_map_lookup_elem(&start, &pid);
if (!ap)
return 0; /* missed entry */
ret = ctx->ret;
if (targ_failed && ret >= 0)
goto cleanup; /* want failed only */
/* event data */
event.pid = bpf_get_current_pid_tgid() >> 32;
event.uid = bpf_get_current_uid_gid();
bpf_get_current_comm(&event.comm, sizeof(event.comm));
bpf_probe_read_user_str(&event.fname, sizeof(event.fname), ap->fname);
event.flags = ap->flags;
event.ret = ret;
/* emit event */
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&event, sizeof(event));
cleanup:
bpf_map_delete_elem(&start, &pid);
return 0;
}
SEC("tracepoint/syscalls/sys_exit_open")
int tracepoint__syscalls__sys_exit_open(struct trace_event_raw_sys_exit* ctx)
{
return trace_exit(ctx);
}
SEC("tracepoint/syscalls/sys_exit_openat")
int tracepoint__syscalls__sys_exit_openat(struct trace_event_raw_sys_exit* ctx)
{
return trace_exit(ctx);
}
char LICENSE[] SEC("license") = "GPL";
```
在 enter 环节,`opensnoop` 会记录调用者的 pid, comm 等基本信息,并存入 map 中。在 exit 环节,`opensnoop`
会根据 pid 读出之前存入的数据,再结合捕获的其他数据,输出到用户态处理函数中,展现给用户。
完整示例代码请参考https://github.com/eunomia-bpf/eunomia-bpf/tree/master/examples/bpftools/opensnoop
把头文件和源文件放在独立的目录里面,编译运行:
```bash
$ # 使用容器进行编译,生成一个 package.json 文件,里面是已经编译好的代码和一些辅助信息
$ docker run -it -v /path/to/opensnoop:/src yunwei37/ebpm:latest
$ # 运行 eBPF 程序root shell
$ sudo ecli run package.json
```
### Prometheus 可视化
编写 yaml 配置文件:
```yaml
programs:
- name: opensnoop
metrics:
counters:
- name: eunomia_file_open_counter
description: test
labels:
- name: pid
- name: comm
- name: filename
from: fname
compiled_ebpf_filename: package.json
```
使用 eunomia-exporter 实现导出信息到 Prometheus
- 通过 https://github.com/eunomia-bpf/eunomia-bpf/releases 下载 eunomia-exporter
```console
$ ls
config.yaml eunomia-exporter package.json
$ sudo ./eunomia-exporter
Running ebpf program opensnoop takes 46 ms
Listening on http://127.0.0.1:8526
running and waiting for the ebpf events from perf event...
Receiving request at path /metrics
```
![result](../img/opensnoop_prometheus.png)
### 总结和参考资料
`opensnoop` 通过对 open 系统调用的追踪,使得用户可以较为方便地掌握目前系统中调用了 open 系统调用的进程信息。
参考资料:
- 源代码https://github.com/eunomia-bpf/eunomia-bpf/tree/master/examples/bpftools/opensnoop
- libbpf 参考代码https://github.com/iovisor/bcc/blob/master/libbpf-tools/opensnoop.bpf.c
- eunomia-bpf 手册https://eunomia-bpf.github.io/

View File

@@ -1,281 +1,86 @@
---
layout: post
title: opensnoop
date: 2022-10-10 16:18
category: bpftools
author: yunwei37
tags: [bpftools, syscall]
summary: opensnoop traces the open() syscall system-wide, and prints various details.
---
# eBPF 入门开发实践指南四:捕获进程打开文件的系统调用集合,使用全局变量在 eBPF 中过滤进程 pid
## origin
eBPF (Extended Berkeley Packet Filter) 是 Linux 内核上的一个强大的网络和性能分析工具,它允许开发者在内核运行时动态加载、更新和运行用户定义的代码。
The kernel code is origin from:
本文是 eBPF 入门开发实践指南的第四篇,主要介绍如何捕获进程打开文件的系统调用集合,并使用全局变量在 eBPF 中过滤进程 pid。
<https://github.com/iovisor/bcc/blob/master/libbpf-tools/opensnoop.bpf.c>
## 在 eBPF 中捕获进程打开文件的系统调用集合
result:
首先,我们需要编写一段 eBPF 程序来捕获进程打开文件的系统调用,具体实现如下:
```console
$ sudo ecli examples/bpftools/opensnoop/package.json -h
Usage: opensnoop_bpf [--help] [--version] [--verbose] [--pid_target VAR] [--tgid_target VAR] [--uid_target VAR] [--failed]
```c
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
// Copyright (c) 2020 Netflix
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include "opensnoop.h"
Trace open family syscalls.
Optional arguments:
-h, --help shows help message and exits
-v, --version prints version information and exits
--verbose prints libbpf debug information
--pid_target Process ID to trace
--tgid_target Thread ID to trace
--uid_target User ID to trace
-f, --failed trace only failed events
/// Process ID to trace
const volatile int pid_target = 0;
Built with eunomia-bpf framework.
See https://github.com/eunomia-bpf/eunomia-bpf for more information.
SEC("tracepoint/syscalls/sys_enter_open")
int tracepoint__syscalls__sys_enter_open(struct trace_event_raw_sys_enter* ctx)
{
u64 id = bpf_get_current_pid_tgid();
u32 pid = id;
$ sudo ecli examples/bpftools/opensnoop/package.json
TIME TS PID UID RET FLAGS COMM FNAME
20:31:50 0 1 0 51 524288 systemd /proc/614/cgroup
20:31:50 0 33182 0 25 524288 ecli /etc/localtime
20:31:53 0 754 0 6 0 irqbalance /proc/interrupts
20:31:53 0 754 0 6 0 irqbalance /proc/stat
20:32:03 0 754 0 6 0 irqbalance /proc/interrupts
20:32:03 0 754 0 6 0 irqbalance /proc/stat
20:32:03 0 632 0 7 524288 vmtoolsd /etc/mtab
20:32:03 0 632 0 9 0 vmtoolsd /proc/devices
if (pid_target && pid_target != pid)
return false;
// Use bpf_printk to print the process information
bpf_printk("Process ID: %d enter sys open\n", pid);
return 0;
}
$ sudo ecli examples/bpftools/opensnoop/package.json --pid_target 754
TIME TS PID UID RET FLAGS COMM FNAME
20:34:13 0 754 0 6 0 irqbalance /proc/interrupts
20:34:13 0 754 0 6 0 irqbalance /proc/stat
20:34:23 0 754 0 6 0 irqbalance /proc/interrupts
20:34:23 0 754 0 6 0 irqbalance /proc/stat
SEC("tracepoint/syscalls/sys_enter_openat")
int tracepoint__syscalls__sys_enter_openat(struct trace_event_raw_sys_enter* ctx)
{
u64 id = bpf_get_current_pid_tgid();
u32 pid = id;
if (pid_target && pid_target != pid)
return false;
// Use bpf_printk to print the process information
bpf_printk("Process ID: %d enter sys openat\n", pid);
return 0;
}
/// Trace open family syscalls.
char LICENSE[] SEC("license") = "GPL";
```
## Compile and Run
上面的 eBPF 程序通过定义两个函数 tracepoint__syscalls__sys_enter_open 和 tracepoint__syscalls__sys_enter_openat 并使用 SEC 宏把它们附加到 sys_enter_open 和 sys_enter_openat 两个 tracepoint即在进入 open 和 openat 系统调用时执行)。这两个函数通过使用 bpf_get_current_pid_tgid 函数获取调用 open 或 openat 系统调用的进程 ID并使用 bpf_printk 函数在内核日志中打印出来。
Compile with docker:
```shell
docker run -it -v `pwd`/:/src/ yunwei37/ebpm:latest
```
or compile with `ecc`:
编译运行上述代码:
```console
$ ecc opensnoop.bpf.c opensnoop.h
$ ecc fentry-link.bpf.c
Compiling bpf object...
Generating export types...
Packing ebpf object and config into package.json...
$ sudo ecli package.json
Runing eBPF program...
```
Run:
```shell
sudo ./ecli run examples/bpftools/opensnoop/package.json
```
## details in bcc
Demonstrations of opensnoop, the Linux eBPF/bcc version.
opensnoop traces the open() syscall system-wide, and prints various details.
Example output:
运行这段程序后,可以通过查看 /sys/kernel/debug/tracing/trace_pipe 文件来查看 eBPF 程序的输出:
```console
# ./opensnoop
PID COMM FD ERR PATH
17326 <...> 7 0 /sys/kernel/debug/tracing/trace_pipe
1576 snmpd 9 0 /proc/net/dev
1576 snmpd 11 0 /proc/net/if_inet6
1576 snmpd 11 0 /proc/sys/net/ipv4/neigh/eth0/retrans_time_ms
1576 snmpd 11 0 /proc/sys/net/ipv6/neigh/eth0/retrans_time_ms
1576 snmpd 11 0 /proc/sys/net/ipv6/conf/eth0/forwarding
1576 snmpd 11 0 /proc/sys/net/ipv6/neigh/eth0/base_reachable_time_ms
1576 snmpd 11 0 /proc/sys/net/ipv4/neigh/lo/retrans_time_ms
1576 snmpd 11 0 /proc/sys/net/ipv6/neigh/lo/retrans_time_ms
1576 snmpd 11 0 /proc/sys/net/ipv6/conf/lo/forwarding
1576 snmpd 11 0 /proc/sys/net/ipv6/neigh/lo/base_reachable_time_ms
1576 snmpd 9 0 /proc/diskstats
1576 snmpd 9 0 /proc/stat
1576 snmpd 9 0 /proc/vmstat
1956 supervise 9 0 supervise/status.new
1956 supervise 9 0 supervise/status.new
17358 run 3 0 /etc/ld.so.cache
17358 run 3 0 /lib/x86_64-linux-gnu/libtinfo.so.5
17358 run 3 0 /lib/x86_64-linux-gnu/libdl.so.2
17358 run 3 0 /lib/x86_64-linux-gnu/libc.so.6
17358 run -1 6 /dev/tty
17358 run 3 0 /proc/meminfo
17358 run 3 0 /etc/nsswitch.conf
17358 run 3 0 /etc/ld.so.cache
17358 run 3 0 /lib/x86_64-linux-gnu/libnss_compat.so.2
17358 run 3 0 /lib/x86_64-linux-gnu/libnsl.so.1
17358 run 3 0 /etc/ld.so.cache
17358 run 3 0 /lib/x86_64-linux-gnu/libnss_nis.so.2
17358 run 3 0 /lib/x86_64-linux-gnu/libnss_files.so.2
17358 run 3 0 /etc/passwd
17358 run 3 0 ./run
^C
``
While tracing, the snmpd process opened various /proc files (reading metrics),
and a "run" process read various libraries and config files (looks like it
was starting up: a new process).
opensnoop can be useful for discovering configuration and log files, if used
during application startup.
```console
The -p option can be used to filter on a PID, which is filtered in-kernel. Here
I've used it with -T to print timestamps:
./opensnoop -Tp 1956
TIME(s) PID COMM FD ERR PATH
0.000000000 1956 supervise 9 0 supervise/status.new
0.000289999 1956 supervise 9 0 supervise/status.new
1.023068000 1956 supervise 9 0 supervise/status.new
1.023381997 1956 supervise 9 0 supervise/status.new
2.046030000 1956 supervise 9 0 supervise/status.new
2.046363000 1956 supervise 9 0 supervise/status.new
3.068203997 1956 supervise 9 0 supervise/status.new
3.068544999 1956 supervise 9 0 supervise/status.new
$ sudo cat /sys/kernel/debug/tracing/trace_pipe
<...>-3840345 [010] d... 3220701.101143: bpf_trace_printk: Process ID: 3840345 enter sys open
<...>-3840345 [010] d... 3220701.101179: bpf_trace_printk: Process ID: 3840345 enter sys openat
<...>-3840345 [010] d... 3220702.157967: bpf_trace_printk: Process ID: 3840345 enter sys open
<...>-3840345 [010] d... 3220702.158000: bpf_trace_printk: Process ID: 3840345 enter sys openat
```
This shows the supervise process is opening the status.new file twice every
second.
此时,我们已经能够捕获进程打开文件的系统调用了。
The -U option include UID on output:
## 使用全局变量在 eBPF 中过滤进程 pid
```console
# ./opensnoop -U
UID PID COMM FD ERR PATH
0 27063 vminfo 5 0 /var/run/utmp
103 628 dbus-daemon -1 2 /usr/local/share/dbus-1/system-services
103 628 dbus-daemon 18 0 /usr/share/dbus-1/system-services
103 628 dbus-daemon -1 2 /lib/dbus-1/system-services
```
在上面的程序中,我们定义了一个全局变量 pid_target 来指定要捕获的进程的 pid。在 tracepoint__syscalls__sys_enter_open 和 tracepoint__syscalls__sys_enter_openat 函数中,我们可以使用这个全局变量来过滤输出,只输出指定的进程的信息。
The -u option filtering UID:
可以通过执行 ecli -h 命令来查看 opensnoop 的帮助信息:
```console
# ./opensnoop -Uu 1000
UID PID COMM FD ERR PATH
1000 30240 ls 3 0 /etc/ld.so.cache
1000 30240 ls 3 0 /lib/x86_64-linux-gnu/libselinux.so.1
1000 30240 ls 3 0 /lib/x86_64-linux-gnu/libc.so.6
1000 30240 ls 3 0 /lib/x86_64-linux-gnu/libpcre.so.3
1000 30240 ls 3 0 /lib/x86_64-linux-gnu/libdl.so.2
1000 30240 ls 3 0 /lib/x86_64-linux-gnu/libpthread.so.0
```
```c
The -x option only prints failed opens:
```console
# ./opensnoop -x
PID COMM FD ERR PATH
18372 run -1 6 /dev/tty
18373 run -1 6 /dev/tty
18373 multilog -1 13 lock
18372 multilog -1 13 lock
18384 df -1 2 /usr/share/locale/en_US.UTF-8/LC_MESSAGES/coreutils.mo
18384 df -1 2 /usr/share/locale/en_US.utf8/LC_MESSAGES/coreutils.mo
18384 df -1 2 /usr/share/locale/en_US/LC_MESSAGES/coreutils.mo
18384 df -1 2 /usr/share/locale/en.UTF-8/LC_MESSAGES/coreutils.mo
18384 df -1 2 /usr/share/locale/en.utf8/LC_MESSAGES/coreutils.mo
18384 df -1 2 /usr/share/locale/en/LC_MESSAGES/coreutils.mo
18385 run -1 6 /dev/tty
18386 run -1 6 /dev/tty
```
This caught a df command failing to open a coreutils.mo file, and trying from
different directories.
The ERR column is the system error number. Error number 2 is ENOENT: no such
file or directory.
A maximum tracing duration can be set with the -d option. For example, to trace
for 2 seconds:
```console
# ./opensnoop -d 2
PID COMM FD ERR PATH
2191 indicator-multi 11 0 /sys/block
2191 indicator-multi 11 0 /sys/block
2191 indicator-multi 11 0 /sys/block
2191 indicator-multi 11 0 /sys/block
2191 indicator-multi 11 0 /sys/block
```
The -n option can be used to filter on process name using partial matches:
```console
# ./opensnoop -n ed
PID COMM FD ERR PATH
2679 sed 3 0 /etc/ld.so.cache
2679 sed 3 0 /lib/x86_64-linux-gnu/libselinux.so.1
2679 sed 3 0 /lib/x86_64-linux-gnu/libc.so.6
2679 sed 3 0 /lib/x86_64-linux-gnu/libpcre.so.3
2679 sed 3 0 /lib/x86_64-linux-gnu/libdl.so.2
2679 sed 3 0 /lib/x86_64-linux-gnu/libpthread.so.0
2679 sed 3 0 /proc/filesystems
2679 sed 3 0 /usr/lib/locale/locale-archive
2679 sed -1 2
2679 sed 3 0 /usr/lib/x86_64-linux-gnu/gconv/gconv-modules.cache
2679 sed 3 0 /dev/null
2680 sed 3 0 /etc/ld.so.cache
2680 sed 3 0 /lib/x86_64-linux-gnu/libselinux.so.1
2680 sed 3 0 /lib/x86_64-linux-gnu/libc.so.6
2680 sed 3 0 /lib/x86_64-linux-gnu/libpcre.so.3
2680 sed 3 0 /lib/x86_64-linux-gnu/libdl.so.2
2680 sed 3 0 /lib/x86_64-linux-gnu/libpthread.so.0
2680 sed 3 0 /proc/filesystems
2680 sed 3 0 /usr/lib/locale/locale-archive
2680 sed -1 2
^C
```
This caught the 'sed' command because it partially matches 'ed' that's passed
to the '-n' option.
The -e option prints out extra columns; for example, the following output
contains the flags passed to open(2), in octal:
```console
# ./opensnoop -e
PID COMM FD ERR FLAGS PATH
28512 sshd 10 0 00101101 /proc/self/oom_score_adj
28512 sshd 3 0 02100000 /etc/ld.so.cache
28512 sshd 3 0 02100000 /lib/x86_64-linux-gnu/libwrap.so.0
28512 sshd 3 0 02100000 /lib/x86_64-linux-gnu/libaudit.so.1
28512 sshd 3 0 02100000 /lib/x86_64-linux-gnu/libpam.so.0
28512 sshd 3 0 02100000 /lib/x86_64-linux-gnu/libselinux.so.1
28512 sshd 3 0 02100000 /lib/x86_64-linux-gnu/libsystemd.so.0
28512 sshd 3 0 02100000 /usr/lib/x86_64-linux-gnu/libcrypto.so.1.0.2
28512 sshd 3 0 02100000 /lib/x86_64-linux-gnu/libutil.so.1
```
The -f option filters based on flags to the open(2) call, for example:
```console
# ./opensnoop -e -f O_WRONLY -f O_RDWR
PID COMM FD ERR FLAGS PATH
28084 clear_console 3 0 00100002 /dev/tty
28084 clear_console -1 13 00100002 /dev/tty0
28084 clear_console -1 13 00100001 /dev/tty0
28084 clear_console -1 13 00100002 /dev/console
28084 clear_console -1 13 00100001 /dev/console
28051 sshd 8 0 02100002 /var/run/utmp
28051 sshd 7 0 00100001 /var/log/wtmp
```
The --cgroupmap option filters based on a cgroup set. It is meant to be used
with an externally created map.
```console
# ./opensnoop --cgroupmap /sys/fs/bpf/test01
```
For more details, see docs/special_filtering.md
```

View File

@@ -1,12 +0,0 @@
programs:
- name: opensnoop
metrics:
counters:
- name: eunomia_file_open_counter
description: test
labels:
- name: pid
- name: comm
- name: filename
from: fname
compiled_ebpf_filename: package.json

View File

@@ -5,72 +5,20 @@
#include <bpf/bpf_helpers.h>
#include "opensnoop.h"
struct args_t {
const char *fname;
int flags;
};
/// Process ID to trace
const volatile int pid_target = 0;
/// Thread ID to trace
const volatile int tgid_target = 0;
/// @description User ID to trace
const volatile int uid_target = 0;
/// @cmdarg {"default": false, "short": "f", "long": "failed"}
/// @description trace only failed events
const volatile bool targ_failed = false;
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 10240);
__type(key, u32);
__type(value, struct args_t);
} start SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(u32));
} events SEC(".maps");
static __always_inline bool valid_uid(uid_t uid) {
return uid != INVALID_UID;
}
static __always_inline
bool trace_allowed(u32 tgid, u32 pid)
{
u32 uid;
/* filters */
if (tgid_target && tgid_target != tgid)
return false;
if (pid_target && pid_target != pid)
return false;
if (valid_uid(uid_target)) {
uid = (u32)bpf_get_current_uid_gid();
if (uid_target != uid) {
return false;
}
}
return true;
}
SEC("tracepoint/syscalls/sys_enter_open")
int tracepoint__syscalls__sys_enter_open(struct trace_event_raw_sys_enter* ctx)
{
u64 id = bpf_get_current_pid_tgid();
/* use kernel terminology here for tgid/pid: */
u32 tgid = id >> 32;
u32 pid = id;
/* store arg info for later lookup */
if (trace_allowed(tgid, pid)) {
struct args_t args = {};
args.fname = (const char *)ctx->args[0];
args.flags = (int)ctx->args[1];
bpf_map_update_elem(&start, &pid, &args, 0);
}
if (pid_target && pid_target != pid)
return false;
// Use bpf_printk to print the process information
bpf_printk("Process ID: %d enter sys open\n", pid);
return 0;
}
@@ -78,63 +26,14 @@ SEC("tracepoint/syscalls/sys_enter_openat")
int tracepoint__syscalls__sys_enter_openat(struct trace_event_raw_sys_enter* ctx)
{
u64 id = bpf_get_current_pid_tgid();
/* use kernel terminology here for tgid/pid: */
u32 tgid = id >> 32;
u32 pid = id;
/* store arg info for later lookup */
if (trace_allowed(tgid, pid)) {
struct args_t args = {};
args.fname = (const char *)ctx->args[1];
args.flags = (int)ctx->args[2];
bpf_map_update_elem(&start, &pid, &args, 0);
}
if (pid_target && pid_target != pid)
return false;
// Use bpf_printk to print the process information
bpf_printk("Process ID: %d enter sys openat\n", pid);
return 0;
}
static __always_inline
int trace_exit(struct trace_event_raw_sys_exit* ctx)
{
struct event event = {};
struct args_t *ap;
int ret;
u32 pid = bpf_get_current_pid_tgid();
ap = bpf_map_lookup_elem(&start, &pid);
if (!ap)
return 0; /* missed entry */
ret = ctx->ret;
if (targ_failed && ret >= 0)
goto cleanup; /* want failed only */
/* event data */
event.pid = bpf_get_current_pid_tgid() >> 32;
event.uid = bpf_get_current_uid_gid();
bpf_get_current_comm(&event.comm, sizeof(event.comm));
bpf_probe_read_user_str(&event.fname, sizeof(event.fname), ap->fname);
event.flags = ap->flags;
event.ret = ret;
/* emit event */
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&event, sizeof(event));
cleanup:
bpf_map_delete_elem(&start, &pid);
return 0;
}
SEC("tracepoint/syscalls/sys_exit_open")
int tracepoint__syscalls__sys_exit_open(struct trace_event_raw_sys_exit* ctx)
{
return trace_exit(ctx);
}
SEC("tracepoint/syscalls/sys_exit_openat")
int tracepoint__syscalls__sys_exit_openat(struct trace_event_raw_sys_exit* ctx)
{
return trace_exit(ctx);
}
/// Trace open family syscalls.
char LICENSE[] SEC("license") = "GPL";