This commit is contained in:
victoryang00
2025-07-05 14:53:54 +00:00
parent 179fe4a49e
commit 1c9542cbc2
11 changed files with 1251 additions and 0 deletions

32
src/48-energy/.gitignore vendored Normal file
View File

@@ -0,0 +1,32 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
venv/
env/
ENV/
# Energy monitoring data
*.csv
*.json
energy_log_*
.vscode
package.json
*.o
*.skel.json
*.skel.yaml
package.yaml
ecli
bootstrap
# IDE
.vscode/
.idea/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db

141
src/48-energy/Makefile Normal file
View File

@@ -0,0 +1,141 @@
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
OUTPUT := .output
CLANG ?= clang
LIBBPF_SRC := $(abspath ../third_party/libbpf/src)
BPFTOOL_SRC := $(abspath ../third_party/bpftool/src)
LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a)
BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool)
BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool
LIBBLAZESYM_SRC := $(abspath ../third_party/blazesym/)
LIBBLAZESYM_OBJ := $(abspath $(OUTPUT)/libblazesym.a)
LIBBLAZESYM_HEADER := $(abspath $(OUTPUT)/blazesym.h)
ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \
| sed 's/arm.*/arm/' \
| sed 's/aarch64/arm64/' \
| sed 's/ppc64le/powerpc/' \
| sed 's/mips.*/mips/' \
| sed 's/riscv64/riscv/' \
| sed 's/loongarch64/loongarch/')
VMLINUX := ../third_party/vmlinux/$(ARCH)/vmlinux.h
# Use our own libbpf API headers and Linux UAPI headers distributed with
# libbpf to avoid dependency on system-wide headers, which could be missing or
# outdated
INCLUDES := -I$(OUTPUT) -I../third_party/libbpf/include/uapi -I$(dir $(VMLINUX))
CFLAGS := -g -Wall
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)
APPS = bootstrap # minimal minimal_legacy uprobe kprobe fentry usdt sockfilter tc ksyscall
CARGO ?= $(shell which cargo)
ifeq ($(strip $(CARGO)),)
BZS_APPS :=
else
BZS_APPS := # profile
APPS += $(BZS_APPS)
# Required by libblazesym
ALL_LDFLAGS += -lrt -ldl -lpthread -lm
endif
# Get Clang's default includes on this system. We'll explicitly add these dirs
# to the includes list when compiling with `-target bpf` because otherwise some
# architecture-specific dirs will be "missing" on some architectures/distros -
# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h,
# sys/cdefs.h etc. might be missing.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
ifeq ($(V),1)
Q =
msg =
else
Q = @
msg = @printf ' %-8s %s%s\n' \
"$(1)" \
"$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \
"$(if $(3), $(3))";
MAKEFLAGS += --no-print-directory
endif
define allow-override
$(if $(or $(findstring environment,$(origin $(1))),\
$(findstring command line,$(origin $(1)))),,\
$(eval $(1) = $(2)))
endef
$(call allow-override,CC,$(CROSS_COMPILE)cc)
$(call allow-override,LD,$(CROSS_COMPILE)ld)
.PHONY: all
all: $(APPS)
.PHONY: clean
clean:
$(call msg,CLEAN)
$(Q)rm -rf $(OUTPUT) $(APPS)
$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT):
$(call msg,MKDIR,$@)
$(Q)mkdir -p $@
# Build libbpf
$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf
$(call msg,LIB,$@)
$(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \
OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \
INCLUDEDIR= LIBDIR= UAPIDIR= \
install
# Build bpftool
$(BPFTOOL): | $(BPFTOOL_OUTPUT)
$(call msg,BPFTOOL,$@)
$(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap
$(LIBBLAZESYM_SRC)/target/release/libblazesym.a::
$(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --features=cheader,dont-generate-test-files --release
$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT)
$(call msg,LIB, $@)
$(Q)cp $(LIBBLAZESYM_SRC)/target/release/libblazesym.a $@
$(LIBBLAZESYM_HEADER): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT)
$(call msg,LIB,$@)
$(Q)cp $(LIBBLAZESYM_SRC)/target/release/blazesym.h $@
# Build BPF code
$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL)
$(call msg,BPF,$@)
$(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \
$(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \
-c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
$(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
# Generate BPF skeletons
$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL)
$(call msg,GEN-SKEL,$@)
$(Q)$(BPFTOOL) gen skeleton $< > $@
# Build user-space code
$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h
$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT)
$(call msg,CC,$@)
$(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
$(patsubst %,$(OUTPUT)/%.o,$(BZS_APPS)): $(LIBBLAZESYM_HEADER)
$(BZS_APPS): $(LIBBLAZESYM_OBJ)
# Build application binary
$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT)
$(call msg,BINARY,$@)
$(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@
# delete failed targets
.DELETE_ON_ERROR:
# keep intermediate (.skel.h, .bpf.o, etc) targets
.SECONDARY:

73
src/48-energy/README.md Normal file
View File

@@ -0,0 +1,73 @@
# System Energy Monitoring with Intel RAPL
This project provides tools to monitor system energy consumption using Intel's Running Average Power Limit (RAPL) interface.
## Features
- Real-time power consumption monitoring
- Live terminal-based display of power usage across different domains (CPU, DRAM, etc.)
- Data logging to CSV or JSON formats
- Support for multiple Intel RAPL domains
- No external dependencies - uses only Python standard library
## Requirements
- Intel CPU with RAPL support
- Python 3.6+
- Root access or appropriate permissions for `/sys/class/powercap/intel-rapl`
## Installation
No additional Python packages required - uses only Python standard library.
## Usage
### Real-time Monitoring
```bash
sudo python3 energy_monitor.py
```
This displays real-time power consumption in the terminal:
- Power consumption for each domain (Package, DRAM, etc.)
- Total system power consumption
- Updates every 0.5 seconds
### Logging Energy Data
```bash
sudo python3 energy_monitor.py -l -d 300 -i 0.5 -f csv -o my_energy_log
```
Options:
- `-d, --duration`: Monitoring duration in seconds (default: 60)
- `-i, --interval`: Sampling interval in seconds (default: 1.0)
- `-f, --format`: Output format - csv or json (default: csv)
- `-o, --output`: Output filename without extension
## Permissions
If you don't want to run with sudo, adjust permissions:
```bash
sudo chmod -R a+r /sys/class/powercap/intel-rapl
```
Note: This allows all users to read RAPL data but not modify power limits.
## RAPL Domains
Common domains include:
- `package-0`: Entire CPU package power
- `core`: CPU cores power
- `uncore`: CPU uncore components (cache, memory controller)
- `dram`: Memory power consumption
## Example Output
The logger provides a summary like:
```
Total samples: 300
Average power: 45.23 W
Total energy: 0.0377 Wh
```

View File

@@ -0,0 +1,112 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Copyright (c) 2020 Facebook */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
#include "bootstrap.h"
char LICENSE[] SEC("license") = "Dual BSD/GPL";
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 8192);
__type(key, pid_t);
__type(value, u64);
} exec_start SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 256 * 1024);
} rb SEC(".maps");
const volatile unsigned long long min_duration_ns = 0;
SEC("tp/sched/sched_process_exec")
int handle_exec(struct trace_event_raw_sched_process_exec *ctx)
{
struct task_struct *task;
unsigned fname_off;
struct event *e;
pid_t pid;
u64 ts;
/* remember time exec() was executed for this PID */
pid = bpf_get_current_pid_tgid() >> 32;
ts = bpf_ktime_get_ns();
bpf_map_update_elem(&exec_start, &pid, &ts, BPF_ANY);
/* don't emit exec events when minimum duration is specified */
if (min_duration_ns)
return 0;
/* reserve sample from BPF ringbuf */
e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0);
if (!e)
return 0;
/* fill out the sample with data */
task = (struct task_struct *)bpf_get_current_task();
e->exit_event = false;
e->pid = pid;
e->ppid = BPF_CORE_READ(task, real_parent, tgid);
bpf_get_current_comm(&e->comm, sizeof(e->comm));
fname_off = ctx->__data_loc_filename & 0xFFFF;
bpf_probe_read_str(&e->filename, sizeof(e->filename), (void *)ctx + fname_off);
/* successfully submit it to user-space for post-processing */
bpf_ringbuf_submit(e, 0);
return 0;
}
SEC("tp/sched/sched_process_exit")
int handle_exit(struct trace_event_raw_sched_process_template* ctx)
{
struct task_struct *task;
struct event *e;
pid_t pid, tid;
u64 id, ts, *start_ts, duration_ns = 0;
/* get PID and TID of exiting thread/process */
id = bpf_get_current_pid_tgid();
pid = id >> 32;
tid = (u32)id;
/* ignore thread exits */
if (pid != tid)
return 0;
/* if we recorded start of the process, calculate lifetime duration */
start_ts = bpf_map_lookup_elem(&exec_start, &pid);
if (start_ts)
duration_ns = bpf_ktime_get_ns() - *start_ts;
else if (min_duration_ns)
return 0;
bpf_map_delete_elem(&exec_start, &pid);
/* if process didn't live long enough, return early */
if (min_duration_ns && duration_ns < min_duration_ns)
return 0;
/* reserve sample from BPF ringbuf */
e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0);
if (!e)
return 0;
/* fill out the sample with data */
task = (struct task_struct *)bpf_get_current_task();
e->exit_event = true;
e->duration_ns = duration_ns;
e->pid = pid;
e->ppid = BPF_CORE_READ(task, real_parent, tgid);
e->exit_code = (BPF_CORE_READ(task, exit_code) >> 8) & 0xff;
bpf_get_current_comm(&e->comm, sizeof(e->comm));
/* send data to user-space for post-processing */
bpf_ringbuf_submit(e, 0);
return 0;
}

173
src/48-energy/bootstrap.c Normal file
View File

@@ -0,0 +1,173 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2020 Facebook */
#include <argp.h>
#include <signal.h>
#include <stdio.h>
#include <time.h>
#include <sys/resource.h>
#include <bpf/libbpf.h>
#include "bootstrap.h"
#include "bootstrap.skel.h"
static struct env {
bool verbose;
long min_duration_ms;
} env;
const char *argp_program_version = "bootstrap 0.0";
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
const char argp_program_doc[] =
"BPF bootstrap demo application.\n"
"\n"
"It traces process start and exits and shows associated \n"
"information (filename, process duration, PID and PPID, etc).\n"
"\n"
"USAGE: ./bootstrap [-d <min-duration-ms>] [-v]\n";
static const struct argp_option opts[] = {
{ "verbose", 'v', NULL, 0, "Verbose debug output" },
{ "duration", 'd', "DURATION-MS", 0, "Minimum process duration (ms) to report" },
{},
};
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
switch (key) {
case 'v':
env.verbose = true;
break;
case 'd':
errno = 0;
env.min_duration_ms = strtol(arg, NULL, 10);
if (errno || env.min_duration_ms <= 0) {
fprintf(stderr, "Invalid duration: %s\n", arg);
argp_usage(state);
}
break;
case ARGP_KEY_ARG:
argp_usage(state);
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
static const struct argp argp = {
.options = opts,
.parser = parse_arg,
.doc = argp_program_doc,
};
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
{
if (level == LIBBPF_DEBUG && !env.verbose)
return 0;
return vfprintf(stderr, format, args);
}
static volatile bool exiting = false;
static void sig_handler(int sig)
{
exiting = true;
}
static int handle_event(void *ctx, void *data, size_t data_sz)
{
const struct event *e = data;
struct tm *tm;
char ts[32];
time_t t;
time(&t);
tm = localtime(&t);
strftime(ts, sizeof(ts), "%H:%M:%S", tm);
if (e->exit_event) {
printf("%-8s %-5s %-16s %-7d %-7d [%u]",
ts, "EXIT", e->comm, e->pid, e->ppid, e->exit_code);
if (e->duration_ns)
printf(" (%llums)", e->duration_ns / 1000000);
printf("\n");
} else {
printf("%-8s %-5s %-16s %-7d %-7d %s\n",
ts, "EXEC", e->comm, e->pid, e->ppid, e->filename);
}
return 0;
}
int main(int argc, char **argv)
{
struct ring_buffer *rb = NULL;
struct bootstrap_bpf *skel;
int err;
/* Parse command line arguments */
err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
if (err)
return err;
/* Set up libbpf errors and debug info callback */
libbpf_set_print(libbpf_print_fn);
/* Cleaner handling of Ctrl-C */
signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler);
/* Load and verify BPF application */
skel = bootstrap_bpf__open();
if (!skel) {
fprintf(stderr, "Failed to open and load BPF skeleton\n");
return 1;
}
/* Parameterize BPF code with minimum duration parameter */
skel->rodata->min_duration_ns = env.min_duration_ms * 1000000ULL;
/* Load & verify BPF programs */
err = bootstrap_bpf__load(skel);
if (err) {
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
goto cleanup;
}
/* Attach tracepoints */
err = bootstrap_bpf__attach(skel);
if (err) {
fprintf(stderr, "Failed to attach BPF skeleton\n");
goto cleanup;
}
/* Set up ring buffer polling */
rb = ring_buffer__new(bpf_map__fd(skel->maps.rb), handle_event, NULL, NULL);
if (!rb) {
err = -1;
fprintf(stderr, "Failed to create ring buffer\n");
goto cleanup;
}
/* Process events */
printf("%-8s %-5s %-16s %-7s %-7s %s\n",
"TIME", "EVENT", "COMM", "PID", "PPID", "FILENAME/EXIT CODE");
while (!exiting) {
err = ring_buffer__poll(rb, 100 /* timeout, ms */);
/* Ctrl-C will cause -EINTR */
if (err == -EINTR) {
err = 0;
break;
}
if (err < 0) {
printf("Error polling perf buffer: %d\n", err);
break;
}
}
cleanup:
/* Clean up */
ring_buffer__free(rb);
bootstrap_bpf__destroy(skel);
return err < 0 ? -err : 0;
}

19
src/48-energy/bootstrap.h Normal file
View File

@@ -0,0 +1,19 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (c) 2020 Facebook */
#ifndef __BOOTSTRAP_H
#define __BOOTSTRAP_H
#define TASK_COMM_LEN 16
#define MAX_FILENAME_LEN 127
struct event {
int pid;
int ppid;
unsigned exit_code;
unsigned long long duration_ns;
char comm[TASK_COMM_LEN];
char filename[MAX_FILENAME_LEN];
bool exit_event;
};
#endif /* __BOOTSTRAP_H */

102
src/48-energy/debug_energy.py Executable file
View File

@@ -0,0 +1,102 @@
#!/usr/bin/env python3
"""
Debug script to check RAPL energy readings
"""
import os
import time
def check_rapl():
rapl_base = "/sys/class/powercap/intel-rapl"
print("Checking Intel RAPL availability...")
print("=" * 50)
if not os.path.exists(rapl_base):
print(f"ERROR: {rapl_base} does not exist!")
print("Intel RAPL may not be available on this system.")
return
# Check permissions
print("\nChecking permissions...")
for item in os.listdir(rapl_base):
if item.startswith("intel-rapl:"):
energy_file = os.path.join(rapl_base, item, "energy_uj")
if os.path.exists(energy_file):
readable = os.access(energy_file, os.R_OK)
print(f"{energy_file}: {'readable' if readable else 'NOT readable'}")
print("\n" + "=" * 50)
print("Reading energy values over 5 seconds...")
print("=" * 50)
# Discover domains
domains = {}
for item in os.listdir(rapl_base):
path = os.path.join(rapl_base, item)
if os.path.isdir(path) and item.startswith("intel-rapl:"):
try:
with open(os.path.join(path, "name"), "r") as f:
name = f.read().strip()
energy_file = os.path.join(path, "energy_uj")
if os.path.exists(energy_file):
domains[name] = energy_file
except:
pass
if not domains:
print("ERROR: No RAPL domains found!")
return
print(f"Found domains: {', '.join(domains.keys())}\n")
# Read energy values multiple times
readings = {domain: [] for domain in domains}
for i in range(10):
for domain, energy_file in domains.items():
try:
with open(energy_file, "r") as f:
energy = int(f.read().strip())
readings[domain].append(energy)
except Exception as e:
print(f"Error reading {domain}: {e}")
time.sleep(0.5)
# Analyze readings
print("\nAnalysis:")
print("-" * 50)
for domain, values in readings.items():
if len(values) < 2:
continue
print(f"\n{domain}:")
print(f" First reading: {values[0]} µJ")
print(f" Last reading: {values[-1]} µJ")
print(f" Difference: {values[-1] - values[0]} µJ")
# Check if values are changing
unique_values = len(set(values))
print(f" Unique values: {unique_values}")
if unique_values == 1:
print(" ⚠️ WARNING: Energy values are not changing!")
else:
# Calculate average power
energy_diff = values[-1] - values[0]
time_diff = 0.5 * (len(values) - 1)
if energy_diff > 0:
power = (energy_diff / 1e6) / time_diff
print(f" Average power: {power:.2f} W")
print("\n" + "=" * 50)
print("\nPossible issues if readings are zero:")
print("1. The system is idle with very low power consumption")
print("2. RAPL updates may be infrequent (try longer sampling intervals)")
print("3. Permission issues (try running with sudo)")
print("4. RAPL may not be fully supported on this CPU")
if __name__ == "__main__":
check_rapl()

View File

@@ -0,0 +1,70 @@
Below is a quick-scan map of **public eBPF projects & papers that touch CPU power-management knobs (DVFS, idle, thermal) or pure energy accounting.**
Ive grouped them so you can see where work already exists and where the gap still is.
---
## 1 Projects/papers that *try to control* DVFS / idle / thermal directly
| Name & date | What it does with eBPF | Sub-knobs covered | Status / notes |
| --------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------ | -------------------------------------------------------------------------------------------------------------- |
| **`cpufreq_ext` RFC (Zou, 2024)** | Hooks the cpufreq governor into a `bpf_struct_ops` table (`get_next_freq()` etc.) so a policy can be written in eBPF instead of C. Integrates with `sched_ext` to let a BPF scheduler and a BPF DVFS policy co-operate. | **DVFS** (per-policy frequency) | RFC on linux-pm & bpf lists. Compiles on ≥ 6.9 kernels; crude sample policy included. ([lwn.net][1]) |
| **eBPF CPU-Idle governor prototype (Eco-Compute summit, 2024)** | Replaces the “menu/TEO” cpuidle governor with a BPF hook so that idle-state choice and idle-injection can be decided in eBPF. | **Idle states** (C-states), idle injection | Academic prototype; slides only, but code expected to be released by the Eco-Compute students. ([jauu.net][2]) |
| **Early “power-driver” & BEAR lineage** | Molnar/Rasmussens 2013 power-driver idea was to unify `go_faster/go_slower/enter_idle`. Our BEAR concept simply modernises this with eBPF. No public code yet, but it shows the *direction* the kernel community is discussing. | **DVFS + Idle + Thermal** (goal) | Design idea; opportunity for a full implementation (research gap). ([jauu.net][2], [lwn.net][1]) |
> **Reality check:** right now cpufreq\_ext is the *only* upstream-bound eBPF code that truly changes CPU frequency. Idle and thermal hooks are still research prototypes, so this area is wide-open if you want to publish.
---
## 2 eBPF projects focused on **energy telemetry / accounting**
*(These dont set DVFS or idle, but they give the per-process or per-container energy data youd need to *drive* such policies.)*
| Name | Scope & technique | Why it matters |
| -------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
| **Wattmeter / *Energy-Aware Process Scheduling in Linux* (HotCarbon 24)** | Attaches an eBPF program to every context-switch to read RAPL MSRs in-kernel, giving millisecond-scale per-process joules with <1 µs overhead. Used to build energy-fair and energy-capped schedulers on top of ghOSt/sched\_ext. | Gives accurate, low-overhead energy numbers that could feed a DVFS/thermal policy. ([asafcidon.com][3]) |
| **Kepler (CNCF sandbox, 2023-)** | A Prometheus exporter for Kubernetes. Uses eBPF tracepoints + perf counters + RAPL/NVML to attribute energy to pods/containers; ships ML models for platforms that lack RAPL. | Quickly gaining traction in cloud-native stacks; good data source for cluster-level power orchestration. ([sustainable-computing.io][4]) |
| **DEEP-mon (Polimi, 2018)** | In-kernel eBPF aggregation of scheduler events to attribute power to Docker containers; <5 % runtime overhead on Phoronix & NPB. | Older but shows in-kernel aggregation trick to avoid 200 k sched-switch/sec user-space wake-ups. ([slideshare.net][5]) |
| **eBPF-energy-monitor (GitHub toy project)** | Minimal BCC script that latches on `sched_switch`, tracks CPU-time per PID, multiplies by per-core joules from RAPL. | Handy starting point if you just need a working demo. ([github.com][6]) |
| **DEEP-mon spin-offs: BitWatts, Scaphandre** | Both offer software-defined power meters; BitWatts focuses on VMs, Scaphandre on bare-metal & K8s. Scaphandre can optionally compile an eBPF sensor module for process attribution. | Good for comparing accuracy / overhead trade-offs vs. Kepler. ([github.com][7], [github.com][8]) |
---
### 3 Quick take-aways
* **Very little published work** uses eBPF to *control* DVFS/idle/thermal today cpufreq\_ext is the main concrete code.
* **Telemetry is mature.** Kepler, Wattmeter and DEEP-mon already give fine-grained joule accounting that a governor could use as feedback.
* **Open research space:** wiring those telemetry sources into an eBPF-based unified policy (BEAR-style) that calls cpufreq\_ext + a future cpuidle\_bpf hook + thermal caps is still almost untouched.
If you need more detail on any specific project (code pointers, evaluation numbers, etc.) just tell me which one and Ill dig in.
[1]: https://lwn.net/Articles/991991/?utm_source=chatgpt.com "cpufreq_ext: Introduce cpufreq ext governor - LWN.net"
[2]: https://jauu.net/talks/eco-compute-linux-power-analysis.pdf?utm_source=chatgpt.com "[PDF] Linux Power Management Analysis for Embedded Systems"
[3]: https://www.asafcidon.com/uploads/5/9/7/0/59701649/energy-aware-ebpf.pdf "Energy-Aware Process Scheduling in Linux"
[4]: https://sustainable-computing.io/?utm_source=chatgpt.com "Kepler"
[5]: https://www.slideshare.net/necstlab/deepmon-dynamic-and-energy-efficient-power-monitoring-for-containerbased-infrastructures "DEEP-mon: Dynamic and Energy Efficient Power monitoring for container-based infrastructures | PPT"
[6]: https://github.com/fjebaker/eBPF-energy-monitor?utm_source=chatgpt.com "Monitoring energy usage with eBPF at process level granularity."
[7]: https://github.com/Spirals-Team/bitwatts?utm_source=chatgpt.com "BitWatts is a software-defined power meter for virtualized ... - GitHub"
[8]: https://github.com/hubblo-org/scaphandre?utm_source=chatgpt.com "hubblo-org/scaphandre - GitHub"
**为什么要在 eBPF 里“自己管” DVFS / idle**
| 典型诉求 | 传统做法 | eBPF 动态管控能带来的额外好处 | 什么时候“有必要” |
| ------------------------------ | -------------------------------------- | ------------------------------------------------------------------------------ | --------------------------------- |
| **降能耗 / 提电池** | 靠内核默认 governor`schedutil``menu/TEO` | 结合调度事件、负载特征、温度实时算最优 P/C-state针对特定 App 可省 5-30 % 电(已在 Android 定制 governor 里见过) | 移动设备、电池供电 IoT对续航敏感、负载模式单一游戏、摄像 |
| **稳帧率 / 避免温度跳水** | 被动等热节流;温度超了再降频 | 提前预测热量,把频率慢慢收掉或注入 idle平均 FPS 更稳;可把“突降”变成“缓降” | 连续长时间满载(录 4K、跑 LLM且不能掉帧 |
| **按租户/容器分功耗预算** | 只能全机统一 RAPL / PL1 | eBPF 在 `sched_switch` 里实时累能,把 budget 切给高优租户;结合 `cpufreq_ext` 只降其他租户频率 | 多租户云、边缘节点需要功率隔离 |
| **实验 / 研究新策略** | 改 kernel 再重启 | eBPF 代码热插拔5 秒换一套算法;和 `sched_ext` 一起做“联合调度+DVFS”实验快得多 | 学术/性能团队要 A/B 频繁试验 |
| **异构平台 (big.LITTLE, CPU+GPU)** | Vendor blob、用户态守护进程 | eBPF 可直接读 GPU 负载、温度 map然后下调 CPU 频率让热 budget 让给 GPU——无 vendor 驱动也能做 | SoC 自己做系统集成、不想依赖私有 HAL |
---
### 真的“必要”吗?一张简表判断
* **工作负载简单、对能耗不敏感** → 默认 governor 足够eBPF 只是锦上添花。
* **对每瓦性能或温度拐点有硬约束**手游、电池无人机、5 U 机柜卡着 PDU→ 自定策略往往能挖出 10-30 % 空间。
* **要做系统研究 / 定制产品** → eBPF 是当下最省事、最安全的内核内实验手段,比写 LKM / 改源省几个数量级的维护成本。
> **一句话**
> *“用不用 eBPF 管电源,看你在乎多少瓦、多少度,以及你改内核的代价能不能收回。”*
如果只是想看个大概功率曲线powertop 就够但要做细粒度、自适应、可热更新的功耗或温度控制eBPF 给的“事件驱动 + 内核态汇总 + 安全热插拔”组合基本无可替代。

473
src/48-energy/energy_monitor.py Executable file
View File

@@ -0,0 +1,473 @@
#!/usr/bin/env python3
import os
import time
import json
import csv
from datetime import datetime
from collections import deque
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib.figure import Figure
class RAPLEnergyMonitor:
def __init__(self):
self.rapl_base = "/sys/class/powercap/intel-rapl"
self.energy_data = {}
self.timestamps = deque(maxlen=100)
self.power_data = {}
self.domains = self._discover_domains()
def _discover_domains(self):
domains = {}
if not os.path.exists(self.rapl_base):
raise RuntimeError("Intel RAPL not available. Are you running on Intel CPU with appropriate permissions?")
for item in os.listdir(self.rapl_base):
path = os.path.join(self.rapl_base, item)
if os.path.isdir(path) and item.startswith("intel-rapl:"):
try:
with open(os.path.join(path, "name"), "r") as f:
name = f.read().strip()
domains[name] = {
"path": path,
"energy_file": os.path.join(path, "energy_uj"),
"max_energy": self._read_max_energy(path),
"last_energy": None,
"last_time": None
}
except:
continue
# Check for subdomains
for subitem in os.listdir(path):
subpath = os.path.join(path, subitem)
if os.path.isdir(subpath) and subitem.startswith("intel-rapl:"):
try:
with open(os.path.join(subpath, "name"), "r") as f:
subname = f.read().strip()
domains[f"{name}:{subname}"] = {
"path": subpath,
"energy_file": os.path.join(subpath, "energy_uj"),
"max_energy": self._read_max_energy(subpath),
"last_energy": None,
"last_time": None
}
except:
continue
for domain in domains:
self.power_data[domain] = deque(maxlen=100)
return domains
def _read_max_energy(self, path):
try:
with open(os.path.join(path, "max_energy_range_uj"), "r") as f:
return int(f.read().strip())
except:
return 2**32
def _read_energy(self, domain):
try:
with open(self.domains[domain]["energy_file"], "r") as f:
return int(f.read().strip())
except:
return None
def update_power(self):
current_time = time.time()
for domain in self.domains:
energy = self._read_energy(domain)
if energy is None:
continue
domain_info = self.domains[domain]
if domain_info["last_energy"] is not None:
# Handle wraparound
if energy < domain_info["last_energy"]:
energy_diff = (domain_info["max_energy"] - domain_info["last_energy"]) + energy
else:
energy_diff = energy - domain_info["last_energy"]
time_diff = current_time - domain_info["last_time"]
if time_diff > 0 and energy_diff > 0:
# Convert from microjoules to watts
power = (energy_diff / 1e6) / time_diff
self.power_data[domain].append(power)
elif time_diff > 0:
# No energy change, append last known power or 0
if len(self.power_data[domain]) > 0:
self.power_data[domain].append(self.power_data[domain][-1])
else:
self.power_data[domain].append(0.0)
domain_info["last_energy"] = energy
domain_info["last_time"] = current_time
self.timestamps.append(current_time)
def get_current_power(self):
result = {}
for domain in self.domains:
if len(self.power_data[domain]) > 0:
result[domain] = self.power_data[domain][-1]
else:
result[domain] = 0
return result
def get_power_history(self):
return {domain: list(self.power_data[domain]) for domain in self.domains}
def plot_power_history(self, save_path=None, show=True):
"""Plot power consumption history for all domains"""
fig, ax = plt.subplots(figsize=(12, 8))
# Get timestamps relative to start
if len(self.timestamps) < 2:
print("Not enough data to plot")
return
start_time = self.timestamps[0]
time_points = [(t - start_time) for t in self.timestamps]
# Plot each domain
for domain in self.domains:
if len(self.power_data[domain]) > 0:
# Ensure we have matching lengths
data_len = min(len(time_points), len(self.power_data[domain]))
ax.plot(time_points[:data_len],
list(self.power_data[domain])[:data_len],
label=domain, linewidth=2)
ax.set_xlabel('Time (seconds)', fontsize=12)
ax.set_ylabel('Power (Watts)', fontsize=12)
ax.set_title('System Power Consumption Over Time', fontsize=14)
ax.grid(True, alpha=0.3)
ax.legend()
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
if show:
plt.show()
return fig
class EnergyLogger:
def __init__(self, output_format="csv"):
self.monitor = RAPLEnergyMonitor()
self.output_format = output_format
self.start_time = time.time()
self.log_data = []
def log_sample(self):
self.monitor.update_power()
current_power = self.monitor.get_current_power()
sample = {
"timestamp": datetime.now().isoformat(),
"elapsed_seconds": time.time() - self.start_time,
"total_power": sum(current_power.values())
}
for domain, power in current_power.items():
sample[f"power_{domain}"] = power
self.log_data.append(sample)
return sample
def save_csv(self, filename):
if not self.log_data:
return
with open(filename, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=self.log_data[0].keys())
writer.writeheader()
writer.writerows(self.log_data)
def save_json(self, filename):
with open(filename, 'w') as f:
json.dump(self.log_data, f, indent=2)
def save(self, filename=None):
if filename is None:
filename = f"energy_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
if self.output_format == "csv":
self.save_csv(f"{filename}.csv")
else:
self.save_json(f"{filename}.json")
return filename
def plot_log_data(self, save_path=None, show=True):
"""Plot logged energy data"""
if not self.log_data:
print("No data to plot")
return
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
# Extract data
timestamps = [sample['elapsed_seconds'] for sample in self.log_data]
total_power = [sample['total_power'] for sample in self.log_data]
# Plot total power
ax1.plot(timestamps, total_power, 'b-', linewidth=2, label='Total Power')
ax1.set_xlabel('Time (seconds)', fontsize=12)
ax1.set_ylabel('Power (Watts)', fontsize=12)
ax1.set_title('Total System Power Consumption', fontsize=14)
ax1.grid(True, alpha=0.3)
ax1.legend()
# Plot individual domains
domain_names = [key for key in self.log_data[0].keys()
if key.startswith('power_') and key != 'power_']
for domain_key in domain_names:
domain_power = [sample.get(domain_key, 0) for sample in self.log_data]
domain_name = domain_key.replace('power_', '')
ax2.plot(timestamps, domain_power, linewidth=2, label=domain_name)
ax2.set_xlabel('Time (seconds)', fontsize=12)
ax2.set_ylabel('Power (Watts)', fontsize=12)
ax2.set_title('Power Consumption by Domain', fontsize=14)
ax2.grid(True, alpha=0.3)
ax2.legend()
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
if show:
plt.show()
return fig
def monitor_realtime(duration=60, visualize=False):
"""Real-time monitoring with optional visualization"""
if visualize:
return monitor_realtime_visual(duration)
print("Real-time Energy Monitor")
print("=" * 50)
try:
monitor = RAPLEnergyMonitor()
print(f"Monitoring domains: {', '.join(monitor.domains.keys())}")
print(f"Duration: {duration} seconds")
print("=" * 50)
start_time = time.time()
while time.time() - start_time < duration:
monitor.update_power()
power = monitor.get_current_power()
# Clear line and print current values
print("\r", end="")
print(f"[{int(time.time() - start_time):3d}s] ", end="")
for domain, watts in power.items():
print(f"{domain}: {watts:6.2f}W ", end="")
print(f"Total: {sum(power.values()):6.2f}W", end="", flush=True)
time.sleep(0.1)
print("\n" + "=" * 50)
print("Monitoring complete!")
except RuntimeError as e:
print(f"Error: {e}")
except KeyboardInterrupt:
print("\n\nMonitoring stopped by user.")
def monitor_realtime_visual(duration=60):
"""Real-time monitoring with live plotting"""
plt.ion()
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
try:
monitor = RAPLEnergyMonitor()
domains = list(monitor.domains.keys())
# Initialize plot lines
lines1 = {}
lines2 = []
# Setup total power plot
ax1.set_xlabel('Time (seconds)')
ax1.set_ylabel('Power (Watts)')
ax1.set_title('Total System Power Consumption')
ax1.grid(True, alpha=0.3)
lines1['total'], = ax1.plot([], [], 'b-', linewidth=2, label='Total Power')
ax1.legend()
# Setup domain power plot
ax2.set_xlabel('Time (seconds)')
ax2.set_ylabel('Power (Watts)')
ax2.set_title('Power Consumption by Domain')
ax2.grid(True, alpha=0.3)
for i, domain in enumerate(domains):
line, = ax2.plot([], [], linewidth=2, label=domain)
lines2.append(line)
ax2.legend()
# Data storage
times = []
total_powers = []
domain_powers = {domain: [] for domain in domains}
start_time = time.time()
print(f"Monitoring for {duration} seconds... Press Ctrl+C to stop early.")
while time.time() - start_time < duration:
monitor.update_power()
power = monitor.get_current_power()
# Update data
current_time = time.time() - start_time
times.append(current_time)
total_powers.append(sum(power.values()))
for domain in domains:
domain_powers[domain].append(power.get(domain, 0))
# Update plots
lines1['total'].set_data(times, total_powers)
ax1.relim()
ax1.autoscale_view()
for i, domain in enumerate(domains):
lines2[i].set_data(times, domain_powers[domain])
ax2.relim()
ax2.autoscale_view()
plt.draw()
plt.pause(0.05)
plt.ioff()
# Save final plot
save_path = f"energy_plot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
plt.savefig(save_path, dpi=300, bbox_inches='tight')
print(f"\nPlot saved to: {save_path}")
# Show final plot
plt.show()
except RuntimeError as e:
print(f"Error: {e}")
except KeyboardInterrupt:
print("\n\nMonitoring stopped by user.")
plt.ioff()
plt.close()
def main():
import argparse
parser = argparse.ArgumentParser(description="Monitor system energy consumption")
parser.add_argument("-d", "--duration", type=int, default=60,
help="Duration to monitor in seconds (default: 60)")
parser.add_argument("-l", "--log", action="store_true",
help="Log data to file instead of real-time display")
parser.add_argument("-i", "--interval", type=float, default=1.0,
help="Sampling interval for logging (default: 1.0)")
parser.add_argument("-f", "--format", choices=["csv", "json"], default="csv",
help="Output format for logging (default: csv)")
parser.add_argument("-o", "--output", type=str,
help="Output filename for logging")
parser.add_argument("-v", "--visualize", action="store_true",
help="Enable real-time visualization")
parser.add_argument("-p", "--plot", type=str,
help="Plot saved data from CSV/JSON file")
args = parser.parse_args()
# Handle plotting existing data
if args.plot:
print(f"Loading data from: {args.plot}")
if args.plot.endswith('.csv'):
# Load CSV data
import pandas as pd
df = pd.read_csv(args.plot)
log_data = df.to_dict('records')
elif args.plot.endswith('.json'):
# Load JSON data
with open(args.plot, 'r') as f:
log_data = json.load(f)
else:
print("Error: Plot file must be .csv or .json")
return
# Create a temporary logger to use its plotting method
logger = EnergyLogger()
logger.log_data = log_data
plot_path = args.plot.rsplit('.', 1)[0] + '_plot.png'
logger.plot_log_data(save_path=plot_path)
print(f"Plot saved to: {plot_path}")
return
if args.log:
# Logging mode
print(f"Starting energy logging for {args.duration} seconds...")
print(f"Sampling interval: {args.interval} seconds")
print(f"Output format: {args.format}")
try:
logger = EnergyLogger(output_format=args.format)
start_time = time.time()
sample_count = 0
while time.time() - start_time < args.duration:
sample = logger.log_sample()
sample_count += 1
print(f"\rSamples: {sample_count} | Total Power: {sample['total_power']:.2f} W",
end='', flush=True)
time.sleep(args.interval)
print("\n\nSaving data...")
filename = logger.save(args.output)
print(f"Data saved to: {filename}.{args.format}")
# Print summary
avg_power = sum(s['total_power'] for s in logger.log_data) / len(logger.log_data)
print(f"\nSummary:")
print(f" Total samples: {len(logger.log_data)}")
print(f" Average power: {avg_power:.2f} W")
print(f" Total energy: {avg_power * args.duration / 3600:.4f} Wh")
# Generate plot if visualization is enabled
if args.visualize:
plot_filename = (args.output or filename) + "_plot.png"
logger.plot_log_data(save_path=plot_filename)
print(f" Plot saved to: {plot_filename}")
except RuntimeError as e:
print(f"Error: {e}")
except KeyboardInterrupt:
print("\n\nLogging interrupted. Saving partial data...")
if 'logger' in locals():
filename = logger.save(args.output)
print(f"Partial data saved to: {filename}.{args.format}")
else:
# Real-time monitoring mode
monitor_realtime(args.duration, visualize=args.visualize)
if __name__ == "__main__":
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 158 KiB

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
"""
Test script to demonstrate energy monitor visualization features
"""
import subprocess
import sys
import os
def test_visualization():
print("Energy Monitor Visualization Test")
print("=" * 50)
# Check if we can import matplotlib
try:
import matplotlib
print("✓ matplotlib is installed")
except ImportError:
print("✗ matplotlib is not installed")
print("Please install with: pip install matplotlib")
return
# Test 1: Real-time monitoring with visualization
print("\nTest 1: Real-time monitoring with visualization (10 seconds)")
print("This will show a live updating plot of power consumption")
cmd1 = [sys.executable, "energy_monitor.py", "-d", "10", "-v"]
print(f"Running: {' '.join(cmd1)}")
input("Press Enter to start...")
subprocess.run(cmd1)
# Test 2: Logging with plot generation
print("\n\nTest 2: Logging data and generating plot (15 seconds)")
cmd2 = [sys.executable, "energy_monitor.py", "-l", "-d", "15", "-i", "0.5", "-v", "-o", "test_energy"]
print(f"Running: {' '.join(cmd2)}")
input("Press Enter to start...")
subprocess.run(cmd2)
# Test 3: Plot from saved data
print("\n\nTest 3: Plotting from saved CSV file")
if os.path.exists("test_energy.csv"):
cmd3 = [sys.executable, "energy_monitor.py", "-p", "test_energy.csv"]
print(f"Running: {' '.join(cmd3)}")
input("Press Enter to start...")
subprocess.run(cmd3)
else:
print("No saved data file found from Test 2")
print("\n" + "=" * 50)
print("Visualization tests complete!")
print("\nUsage examples:")
print(" Real-time monitoring with plot: python energy_monitor.py -v")
print(" Log data and generate plot: python energy_monitor.py -l -v")
print(" Plot from existing data: python energy_monitor.py -p data.csv")
if __name__ == "__main__":
test_visualization()