diff --git a/src/48-energy/.gitignore b/src/48-energy/.gitignore new file mode 100644 index 0000000..725f0a1 --- /dev/null +++ b/src/48-energy/.gitignore @@ -0,0 +1,32 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +venv/ +env/ +ENV/ + +# Energy monitoring data +*.csv +*.json +energy_log_* +.vscode +package.json +*.o +*.skel.json +*.skel.yaml +package.yaml +ecli +bootstrap + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/src/48-energy/Makefile b/src/48-energy/Makefile new file mode 100644 index 0000000..7437c1a --- /dev/null +++ b/src/48-energy/Makefile @@ -0,0 +1,141 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +OUTPUT := .output +CLANG ?= clang +LIBBPF_SRC := $(abspath ../third_party/libbpf/src) +BPFTOOL_SRC := $(abspath ../third_party/bpftool/src) +LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a) +BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool) +BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool +LIBBLAZESYM_SRC := $(abspath ../third_party/blazesym/) +LIBBLAZESYM_OBJ := $(abspath $(OUTPUT)/libblazesym.a) +LIBBLAZESYM_HEADER := $(abspath $(OUTPUT)/blazesym.h) +ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \ + | sed 's/arm.*/arm/' \ + | sed 's/aarch64/arm64/' \ + | sed 's/ppc64le/powerpc/' \ + | sed 's/mips.*/mips/' \ + | sed 's/riscv64/riscv/' \ + | sed 's/loongarch64/loongarch/') +VMLINUX := ../third_party/vmlinux/$(ARCH)/vmlinux.h +# Use our own libbpf API headers and Linux UAPI headers distributed with +# libbpf to avoid dependency on system-wide headers, which could be missing or +# outdated +INCLUDES := -I$(OUTPUT) -I../third_party/libbpf/include/uapi -I$(dir $(VMLINUX)) +CFLAGS := -g -Wall +ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS) + +APPS = bootstrap # minimal minimal_legacy uprobe kprobe fentry usdt sockfilter tc ksyscall + +CARGO ?= $(shell which cargo) +ifeq ($(strip $(CARGO)),) +BZS_APPS := +else +BZS_APPS := # profile +APPS += $(BZS_APPS) +# Required by libblazesym +ALL_LDFLAGS += -lrt -ldl -lpthread -lm +endif + +# Get Clang's default includes on this system. We'll explicitly add these dirs +# to the includes list when compiling with `-target bpf` because otherwise some +# architecture-specific dirs will be "missing" on some architectures/distros - +# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h, +# sys/cdefs.h etc. might be missing. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - &1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') + +ifeq ($(V),1) + Q = + msg = +else + Q = @ + msg = @printf ' %-8s %s%s\n' \ + "$(1)" \ + "$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \ + "$(if $(3), $(3))"; + MAKEFLAGS += --no-print-directory +endif + +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +$(call allow-override,CC,$(CROSS_COMPILE)cc) +$(call allow-override,LD,$(CROSS_COMPILE)ld) + +.PHONY: all +all: $(APPS) + +.PHONY: clean +clean: + $(call msg,CLEAN) + $(Q)rm -rf $(OUTPUT) $(APPS) + +$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $@ + +# Build libbpf +$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf + $(call msg,LIB,$@) + $(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \ + OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \ + INCLUDEDIR= LIBDIR= UAPIDIR= \ + install + +# Build bpftool +$(BPFTOOL): | $(BPFTOOL_OUTPUT) + $(call msg,BPFTOOL,$@) + $(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap + + +$(LIBBLAZESYM_SRC)/target/release/libblazesym.a:: + $(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --features=cheader,dont-generate-test-files --release + +$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT) + $(call msg,LIB, $@) + $(Q)cp $(LIBBLAZESYM_SRC)/target/release/libblazesym.a $@ + +$(LIBBLAZESYM_HEADER): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT) + $(call msg,LIB,$@) + $(Q)cp $(LIBBLAZESYM_SRC)/target/release/blazesym.h $@ + +# Build BPF code +$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL) + $(call msg,BPF,$@) + $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \ + $(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \ + -c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@) + $(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@) + +# Generate BPF skeletons +$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL) + $(call msg,GEN-SKEL,$@) + $(Q)$(BPFTOOL) gen skeleton $< > $@ + +# Build user-space code +$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h + +$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT) + $(call msg,CC,$@) + $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ + +$(patsubst %,$(OUTPUT)/%.o,$(BZS_APPS)): $(LIBBLAZESYM_HEADER) + +$(BZS_APPS): $(LIBBLAZESYM_OBJ) + +# Build application binary +$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT) + $(call msg,BINARY,$@) + $(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@ + +# delete failed targets +.DELETE_ON_ERROR: + +# keep intermediate (.skel.h, .bpf.o, etc) targets +.SECONDARY: diff --git a/src/48-energy/README.md b/src/48-energy/README.md new file mode 100644 index 0000000..c075e2c --- /dev/null +++ b/src/48-energy/README.md @@ -0,0 +1,73 @@ +# System Energy Monitoring with Intel RAPL + +This project provides tools to monitor system energy consumption using Intel's Running Average Power Limit (RAPL) interface. + +## Features + +- Real-time power consumption monitoring +- Live terminal-based display of power usage across different domains (CPU, DRAM, etc.) +- Data logging to CSV or JSON formats +- Support for multiple Intel RAPL domains +- No external dependencies - uses only Python standard library + +## Requirements + +- Intel CPU with RAPL support +- Python 3.6+ +- Root access or appropriate permissions for `/sys/class/powercap/intel-rapl` + +## Installation + +No additional Python packages required - uses only Python standard library. + +## Usage + +### Real-time Monitoring + +```bash +sudo python3 energy_monitor.py +``` + +This displays real-time power consumption in the terminal: +- Power consumption for each domain (Package, DRAM, etc.) +- Total system power consumption +- Updates every 0.5 seconds + +### Logging Energy Data + +```bash +sudo python3 energy_monitor.py -l -d 300 -i 0.5 -f csv -o my_energy_log +``` + +Options: +- `-d, --duration`: Monitoring duration in seconds (default: 60) +- `-i, --interval`: Sampling interval in seconds (default: 1.0) +- `-f, --format`: Output format - csv or json (default: csv) +- `-o, --output`: Output filename without extension + +## Permissions + +If you don't want to run with sudo, adjust permissions: + +```bash +sudo chmod -R a+r /sys/class/powercap/intel-rapl +``` + +Note: This allows all users to read RAPL data but not modify power limits. + +## RAPL Domains + +Common domains include: +- `package-0`: Entire CPU package power +- `core`: CPU cores power +- `uncore`: CPU uncore components (cache, memory controller) +- `dram`: Memory power consumption + +## Example Output + +The logger provides a summary like: +``` +Total samples: 300 +Average power: 45.23 W +Total energy: 0.0377 Wh +``` \ No newline at end of file diff --git a/src/48-energy/bootstrap.bpf.c b/src/48-energy/bootstrap.bpf.c new file mode 100644 index 0000000..54e33e7 --- /dev/null +++ b/src/48-energy/bootstrap.bpf.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2020 Facebook */ +#include "vmlinux.h" +#include +#include +#include +#include "bootstrap.h" + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 8192); + __type(key, pid_t); + __type(value, u64); +} exec_start SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); +} rb SEC(".maps"); + +const volatile unsigned long long min_duration_ns = 0; + +SEC("tp/sched/sched_process_exec") +int handle_exec(struct trace_event_raw_sched_process_exec *ctx) +{ + struct task_struct *task; + unsigned fname_off; + struct event *e; + pid_t pid; + u64 ts; + + /* remember time exec() was executed for this PID */ + pid = bpf_get_current_pid_tgid() >> 32; + ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&exec_start, &pid, &ts, BPF_ANY); + + /* don't emit exec events when minimum duration is specified */ + if (min_duration_ns) + return 0; + + /* reserve sample from BPF ringbuf */ + e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0); + if (!e) + return 0; + + /* fill out the sample with data */ + task = (struct task_struct *)bpf_get_current_task(); + + e->exit_event = false; + e->pid = pid; + e->ppid = BPF_CORE_READ(task, real_parent, tgid); + bpf_get_current_comm(&e->comm, sizeof(e->comm)); + + fname_off = ctx->__data_loc_filename & 0xFFFF; + bpf_probe_read_str(&e->filename, sizeof(e->filename), (void *)ctx + fname_off); + + /* successfully submit it to user-space for post-processing */ + bpf_ringbuf_submit(e, 0); + return 0; +} + +SEC("tp/sched/sched_process_exit") +int handle_exit(struct trace_event_raw_sched_process_template* ctx) +{ + struct task_struct *task; + struct event *e; + pid_t pid, tid; + u64 id, ts, *start_ts, duration_ns = 0; + + /* get PID and TID of exiting thread/process */ + id = bpf_get_current_pid_tgid(); + pid = id >> 32; + tid = (u32)id; + + /* ignore thread exits */ + if (pid != tid) + return 0; + + /* if we recorded start of the process, calculate lifetime duration */ + start_ts = bpf_map_lookup_elem(&exec_start, &pid); + if (start_ts) + duration_ns = bpf_ktime_get_ns() - *start_ts; + else if (min_duration_ns) + return 0; + bpf_map_delete_elem(&exec_start, &pid); + + /* if process didn't live long enough, return early */ + if (min_duration_ns && duration_ns < min_duration_ns) + return 0; + + /* reserve sample from BPF ringbuf */ + e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0); + if (!e) + return 0; + + /* fill out the sample with data */ + task = (struct task_struct *)bpf_get_current_task(); + + e->exit_event = true; + e->duration_ns = duration_ns; + e->pid = pid; + e->ppid = BPF_CORE_READ(task, real_parent, tgid); + e->exit_code = (BPF_CORE_READ(task, exit_code) >> 8) & 0xff; + bpf_get_current_comm(&e->comm, sizeof(e->comm)); + + /* send data to user-space for post-processing */ + bpf_ringbuf_submit(e, 0); + return 0; +} + diff --git a/src/48-energy/bootstrap.c b/src/48-energy/bootstrap.c new file mode 100644 index 0000000..f023df6a --- /dev/null +++ b/src/48-energy/bootstrap.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2020 Facebook */ +#include +#include +#include +#include +#include +#include +#include "bootstrap.h" +#include "bootstrap.skel.h" + +static struct env { + bool verbose; + long min_duration_ms; +} env; + +const char *argp_program_version = "bootstrap 0.0"; +const char *argp_program_bug_address = ""; +const char argp_program_doc[] = +"BPF bootstrap demo application.\n" +"\n" +"It traces process start and exits and shows associated \n" +"information (filename, process duration, PID and PPID, etc).\n" +"\n" +"USAGE: ./bootstrap [-d ] [-v]\n"; + +static const struct argp_option opts[] = { + { "verbose", 'v', NULL, 0, "Verbose debug output" }, + { "duration", 'd', "DURATION-MS", 0, "Minimum process duration (ms) to report" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case 'v': + env.verbose = true; + break; + case 'd': + errno = 0; + env.min_duration_ms = strtol(arg, NULL, 10); + if (errno || env.min_duration_ms <= 0) { + fprintf(stderr, "Invalid duration: %s\n", arg); + argp_usage(state); + } + break; + case ARGP_KEY_ARG: + argp_usage(state); + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, +}; + +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !env.verbose) + return 0; + return vfprintf(stderr, format, args); +} + +static volatile bool exiting = false; + +static void sig_handler(int sig) +{ + exiting = true; +} + +static int handle_event(void *ctx, void *data, size_t data_sz) +{ + const struct event *e = data; + struct tm *tm; + char ts[32]; + time_t t; + + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%H:%M:%S", tm); + + if (e->exit_event) { + printf("%-8s %-5s %-16s %-7d %-7d [%u]", + ts, "EXIT", e->comm, e->pid, e->ppid, e->exit_code); + if (e->duration_ns) + printf(" (%llums)", e->duration_ns / 1000000); + printf("\n"); + } else { + printf("%-8s %-5s %-16s %-7d %-7d %s\n", + ts, "EXEC", e->comm, e->pid, e->ppid, e->filename); + } + + return 0; +} + +int main(int argc, char **argv) +{ + struct ring_buffer *rb = NULL; + struct bootstrap_bpf *skel; + int err; + + /* Parse command line arguments */ + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + /* Set up libbpf errors and debug info callback */ + libbpf_set_print(libbpf_print_fn); + + /* Cleaner handling of Ctrl-C */ + signal(SIGINT, sig_handler); + signal(SIGTERM, sig_handler); + + /* Load and verify BPF application */ + skel = bootstrap_bpf__open(); + if (!skel) { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + return 1; + } + + /* Parameterize BPF code with minimum duration parameter */ + skel->rodata->min_duration_ns = env.min_duration_ms * 1000000ULL; + + /* Load & verify BPF programs */ + err = bootstrap_bpf__load(skel); + if (err) { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto cleanup; + } + + /* Attach tracepoints */ + err = bootstrap_bpf__attach(skel); + if (err) { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto cleanup; + } + + /* Set up ring buffer polling */ + rb = ring_buffer__new(bpf_map__fd(skel->maps.rb), handle_event, NULL, NULL); + if (!rb) { + err = -1; + fprintf(stderr, "Failed to create ring buffer\n"); + goto cleanup; + } + + /* Process events */ + printf("%-8s %-5s %-16s %-7s %-7s %s\n", + "TIME", "EVENT", "COMM", "PID", "PPID", "FILENAME/EXIT CODE"); + while (!exiting) { + err = ring_buffer__poll(rb, 100 /* timeout, ms */); + /* Ctrl-C will cause -EINTR */ + if (err == -EINTR) { + err = 0; + break; + } + if (err < 0) { + printf("Error polling perf buffer: %d\n", err); + break; + } + } + +cleanup: + /* Clean up */ + ring_buffer__free(rb); + bootstrap_bpf__destroy(skel); + + return err < 0 ? -err : 0; +} diff --git a/src/48-energy/bootstrap.h b/src/48-energy/bootstrap.h new file mode 100644 index 0000000..34e765a --- /dev/null +++ b/src/48-energy/bootstrap.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2020 Facebook */ +#ifndef __BOOTSTRAP_H +#define __BOOTSTRAP_H + +#define TASK_COMM_LEN 16 +#define MAX_FILENAME_LEN 127 + +struct event { + int pid; + int ppid; + unsigned exit_code; + unsigned long long duration_ns; + char comm[TASK_COMM_LEN]; + char filename[MAX_FILENAME_LEN]; + bool exit_event; +}; + +#endif /* __BOOTSTRAP_H */ diff --git a/src/48-energy/debug_energy.py b/src/48-energy/debug_energy.py new file mode 100755 index 0000000..e857aed --- /dev/null +++ b/src/48-energy/debug_energy.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +""" +Debug script to check RAPL energy readings +""" + +import os +import time + +def check_rapl(): + rapl_base = "/sys/class/powercap/intel-rapl" + + print("Checking Intel RAPL availability...") + print("=" * 50) + + if not os.path.exists(rapl_base): + print(f"ERROR: {rapl_base} does not exist!") + print("Intel RAPL may not be available on this system.") + return + + # Check permissions + print("\nChecking permissions...") + for item in os.listdir(rapl_base): + if item.startswith("intel-rapl:"): + energy_file = os.path.join(rapl_base, item, "energy_uj") + if os.path.exists(energy_file): + readable = os.access(energy_file, os.R_OK) + print(f"{energy_file}: {'readable' if readable else 'NOT readable'}") + + print("\n" + "=" * 50) + print("Reading energy values over 5 seconds...") + print("=" * 50) + + # Discover domains + domains = {} + for item in os.listdir(rapl_base): + path = os.path.join(rapl_base, item) + if os.path.isdir(path) and item.startswith("intel-rapl:"): + try: + with open(os.path.join(path, "name"), "r") as f: + name = f.read().strip() + energy_file = os.path.join(path, "energy_uj") + if os.path.exists(energy_file): + domains[name] = energy_file + except: + pass + + if not domains: + print("ERROR: No RAPL domains found!") + return + + print(f"Found domains: {', '.join(domains.keys())}\n") + + # Read energy values multiple times + readings = {domain: [] for domain in domains} + + for i in range(10): + for domain, energy_file in domains.items(): + try: + with open(energy_file, "r") as f: + energy = int(f.read().strip()) + readings[domain].append(energy) + except Exception as e: + print(f"Error reading {domain}: {e}") + + time.sleep(0.5) + + # Analyze readings + print("\nAnalysis:") + print("-" * 50) + + for domain, values in readings.items(): + if len(values) < 2: + continue + + print(f"\n{domain}:") + print(f" First reading: {values[0]} µJ") + print(f" Last reading: {values[-1]} µJ") + print(f" Difference: {values[-1] - values[0]} µJ") + + # Check if values are changing + unique_values = len(set(values)) + print(f" Unique values: {unique_values}") + + if unique_values == 1: + print(" ⚠️ WARNING: Energy values are not changing!") + else: + # Calculate average power + energy_diff = values[-1] - values[0] + time_diff = 0.5 * (len(values) - 1) + if energy_diff > 0: + power = (energy_diff / 1e6) / time_diff + print(f" Average power: {power:.2f} W") + + print("\n" + "=" * 50) + print("\nPossible issues if readings are zero:") + print("1. The system is idle with very low power consumption") + print("2. RAPL updates may be infrequent (try longer sampling intervals)") + print("3. Permission issues (try running with sudo)") + print("4. RAPL may not be fully supported on this CPU") + +if __name__ == "__main__": + check_rapl() \ No newline at end of file diff --git a/src/48-energy/ebpf-energy.md b/src/48-energy/ebpf-energy.md new file mode 100644 index 0000000..ecdebc6 --- /dev/null +++ b/src/48-energy/ebpf-energy.md @@ -0,0 +1,70 @@ +Below is a quick-scan map of **public eBPF projects & papers that touch CPU power-management knobs (DVFS, idle, thermal) or pure energy accounting.** +I’ve grouped them so you can see where work already exists and where the gap still is. + +--- + +## 1 Projects/papers that *try to control* DVFS / idle / thermal directly + +| Name & date | What it does with eBPF | Sub-knobs covered | Status / notes | +| --------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------ | -------------------------------------------------------------------------------------------------------------- | +| **`cpufreq_ext` RFC (Zou, 2024)** | Hooks the cpufreq governor into a `bpf_struct_ops` table (`get_next_freq()` etc.) so a policy can be written in eBPF instead of C. Integrates with `sched_ext` to let a BPF scheduler and a BPF DVFS policy co-operate. | **DVFS** (per-policy frequency) | RFC on linux-pm & bpf lists. Compiles on ≥ 6.9 kernels; crude sample policy included. ([lwn.net][1]) | +| **eBPF CPU-Idle governor prototype (Eco-Compute summit, 2024)** | Replaces the “menu/TEO” cpuidle governor with a BPF hook so that idle-state choice and idle-injection can be decided in eBPF. | **Idle states** (C-states), idle injection | Academic prototype; slides only, but code expected to be released by the Eco-Compute students. ([jauu.net][2]) | +| **Early “power-driver” & BEAR lineage** | Molnar/Rasmussen’s 2013 power-driver idea was to unify `go_faster/go_slower/enter_idle`. Our BEAR concept simply modernises this with eBPF. No public code yet, but it shows the *direction* the kernel community is discussing. | **DVFS + Idle + Thermal** (goal) | Design idea; opportunity for a full implementation (research gap). ([jauu.net][2], [lwn.net][1]) | + +> **Reality check:** right now cpufreq\_ext is the *only* upstream-bound eBPF code that truly changes CPU frequency. Idle and thermal hooks are still research prototypes, so this area is wide-open if you want to publish. + +--- + +## 2 eBPF projects focused on **energy telemetry / accounting** + +*(These don’t set DVFS or idle, but they give the per-process or per-container energy data you’d need to *drive* such policies.)* + +| Name | Scope & technique | Why it matters | +| -------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| **Wattmeter / *Energy-Aware Process Scheduling in Linux* (HotCarbon ’24)** | Attaches an eBPF program to every context-switch to read RAPL MSRs in-kernel, giving millisecond-scale per-process joules with <1 µs overhead. Used to build energy-fair and energy-capped schedulers on top of ghOSt/sched\_ext. | Gives accurate, low-overhead energy numbers that could feed a DVFS/thermal policy. ([asafcidon.com][3]) | +| **Kepler (CNCF sandbox, 2023-)** | A Prometheus exporter for Kubernetes. Uses eBPF tracepoints + perf counters + RAPL/NVML to attribute energy to pods/containers; ships ML models for platforms that lack RAPL. | Quickly gaining traction in cloud-native stacks; good data source for cluster-level power orchestration. ([sustainable-computing.io][4]) | +| **DEEP-mon (Polimi, 2018)** | In-kernel eBPF aggregation of scheduler events to attribute power to Docker containers; <5 % runtime overhead on Phoronix & NPB. | Older but shows in-kernel aggregation trick to avoid 200 k sched-switch/sec user-space wake-ups. ([slideshare.net][5]) | +| **eBPF-energy-monitor (GitHub toy project)** | Minimal BCC script that latches on `sched_switch`, tracks CPU-time per PID, multiplies by per-core joules from RAPL. | Handy starting point if you just need a working demo. ([github.com][6]) | +| **DEEP-mon spin-offs: BitWatts, Scaphandre** | Both offer software-defined power meters; BitWatts focuses on VMs, Scaphandre on bare-metal & K8s. Scaphandre can optionally compile an eBPF sensor module for process attribution. | Good for comparing accuracy / overhead trade-offs vs. Kepler. ([github.com][7], [github.com][8]) | + +--- + +### 3 Quick take-aways + +* **Very little published work** uses eBPF to *control* DVFS/idle/thermal today – cpufreq\_ext is the main concrete code. +* **Telemetry is mature.** Kepler, Wattmeter and DEEP-mon already give fine-grained joule accounting that a governor could use as feedback. +* **Open research space:** wiring those telemetry sources into an eBPF-based unified policy (BEAR-style) that calls cpufreq\_ext + a future cpuidle\_bpf hook + thermal caps is still almost untouched. + +If you need more detail on any specific project (code pointers, evaluation numbers, etc.) just tell me which one and I’ll dig in. + +[1]: https://lwn.net/Articles/991991/?utm_source=chatgpt.com "cpufreq_ext: Introduce cpufreq ext governor - LWN.net" +[2]: https://jauu.net/talks/eco-compute-linux-power-analysis.pdf?utm_source=chatgpt.com "[PDF] Linux Power Management Analysis for Embedded Systems" +[3]: https://www.asafcidon.com/uploads/5/9/7/0/59701649/energy-aware-ebpf.pdf "Energy-Aware Process Scheduling in Linux" +[4]: https://sustainable-computing.io/?utm_source=chatgpt.com "Kepler" +[5]: https://www.slideshare.net/necstlab/deepmon-dynamic-and-energy-efficient-power-monitoring-for-containerbased-infrastructures "DEEP-mon: Dynamic and Energy Efficient Power monitoring for container-based infrastructures | PPT" +[6]: https://github.com/fjebaker/eBPF-energy-monitor?utm_source=chatgpt.com "Monitoring energy usage with eBPF at process level granularity." +[7]: https://github.com/Spirals-Team/bitwatts?utm_source=chatgpt.com "BitWatts is a software-defined power meter for virtualized ... - GitHub" +[8]: https://github.com/hubblo-org/scaphandre?utm_source=chatgpt.com "hubblo-org/scaphandre - GitHub" + +**为什么要在 eBPF 里“自己管” DVFS / idle?** + +| 典型诉求 | 传统做法 | eBPF 动态管控能带来的额外好处 | 什么时候“有必要” | +| ------------------------------ | -------------------------------------- | ------------------------------------------------------------------------------ | --------------------------------- | +| **降能耗 / 提电池** | 靠内核默认 governor(`schedutil`、`menu/TEO`) | 结合调度事件、负载特征、温度实时算最优 P/C-state;针对特定 App 可省 5-30 % 电(已在 Android 定制 governor 里见过) | 移动设备、电池供电 IoT;对续航敏感、负载模式单一(游戏、摄像) | +| **稳帧率 / 避免温度跳水** | 被动等热节流;温度超了再降频 | 提前预测热量,把频率慢慢收掉或注入 idle,平均 FPS 更稳;可把“突降”变成“缓降” | 连续长时间满载(录 4K、跑 LLM)且不能掉帧 | +| **按租户/容器分功耗预算** | 只能全机统一 RAPL / PL1 | eBPF 在 `sched_switch` 里实时累能,把 budget 切给高优租户;结合 `cpufreq_ext` 只降其他租户频率 | 多租户云、边缘节点需要功率隔离 | +| **实验 / 研究新策略** | 改 kernel 再重启 | eBPF 代码热插拔,5 秒换一套算法;和 `sched_ext` 一起做“联合调度+DVFS”实验快得多 | 学术/性能团队要 A/B 频繁试验 | +| **异构平台 (big.LITTLE, CPU+GPU)** | Vendor blob、用户态守护进程 | eBPF 可直接读 GPU 负载、温度 map,然后下调 CPU 频率让热 budget 让给 GPU——无 vendor 驱动也能做 | SoC 自己做系统集成、不想依赖私有 HAL | + +--- + +### 真的“必要”吗?一张简表判断 + +* **工作负载简单、对能耗不敏感** → 默认 governor 足够,eBPF 只是锦上添花。 +* **对每瓦性能或温度拐点有硬约束**(手游、电池无人机、5 U 机柜卡着 PDU)→ 自定策略往往能挖出 10-30 % 空间。 +* **要做系统研究 / 定制产品** → eBPF 是当下最省事、最安全的内核内实验手段,比写 LKM / 改源省几个数量级的维护成本。 + +> **一句话**: +> *“用不用 eBPF 管电源,看你在乎多少瓦、多少度,以及你改内核的代价能不能收回。”* + +如果只是想看个大概功率曲线,powertop 就够;但要做细粒度、自适应、可热更新的功耗或温度控制,eBPF 给的“事件驱动 + 内核态汇总 + 安全热插拔”组合基本无可替代。 diff --git a/src/48-energy/energy_monitor.py b/src/48-energy/energy_monitor.py new file mode 100755 index 0000000..09d199c --- /dev/null +++ b/src/48-energy/energy_monitor.py @@ -0,0 +1,473 @@ +#!/usr/bin/env python3 + +import os +import time +import json +import csv +from datetime import datetime +from collections import deque +import matplotlib.pyplot as plt +import matplotlib.animation as animation +from matplotlib.figure import Figure + +class RAPLEnergyMonitor: + def __init__(self): + self.rapl_base = "/sys/class/powercap/intel-rapl" + self.energy_data = {} + self.timestamps = deque(maxlen=100) + self.power_data = {} + self.domains = self._discover_domains() + + def _discover_domains(self): + domains = {} + if not os.path.exists(self.rapl_base): + raise RuntimeError("Intel RAPL not available. Are you running on Intel CPU with appropriate permissions?") + + for item in os.listdir(self.rapl_base): + path = os.path.join(self.rapl_base, item) + if os.path.isdir(path) and item.startswith("intel-rapl:"): + try: + with open(os.path.join(path, "name"), "r") as f: + name = f.read().strip() + domains[name] = { + "path": path, + "energy_file": os.path.join(path, "energy_uj"), + "max_energy": self._read_max_energy(path), + "last_energy": None, + "last_time": None + } + except: + continue + + # Check for subdomains + for subitem in os.listdir(path): + subpath = os.path.join(path, subitem) + if os.path.isdir(subpath) and subitem.startswith("intel-rapl:"): + try: + with open(os.path.join(subpath, "name"), "r") as f: + subname = f.read().strip() + domains[f"{name}:{subname}"] = { + "path": subpath, + "energy_file": os.path.join(subpath, "energy_uj"), + "max_energy": self._read_max_energy(subpath), + "last_energy": None, + "last_time": None + } + except: + continue + + for domain in domains: + self.power_data[domain] = deque(maxlen=100) + + return domains + + def _read_max_energy(self, path): + try: + with open(os.path.join(path, "max_energy_range_uj"), "r") as f: + return int(f.read().strip()) + except: + return 2**32 + + def _read_energy(self, domain): + try: + with open(self.domains[domain]["energy_file"], "r") as f: + return int(f.read().strip()) + except: + return None + + def update_power(self): + current_time = time.time() + + for domain in self.domains: + energy = self._read_energy(domain) + if energy is None: + continue + + domain_info = self.domains[domain] + + if domain_info["last_energy"] is not None: + # Handle wraparound + if energy < domain_info["last_energy"]: + energy_diff = (domain_info["max_energy"] - domain_info["last_energy"]) + energy + else: + energy_diff = energy - domain_info["last_energy"] + + time_diff = current_time - domain_info["last_time"] + + if time_diff > 0 and energy_diff > 0: + # Convert from microjoules to watts + power = (energy_diff / 1e6) / time_diff + self.power_data[domain].append(power) + elif time_diff > 0: + # No energy change, append last known power or 0 + if len(self.power_data[domain]) > 0: + self.power_data[domain].append(self.power_data[domain][-1]) + else: + self.power_data[domain].append(0.0) + + domain_info["last_energy"] = energy + domain_info["last_time"] = current_time + + self.timestamps.append(current_time) + + def get_current_power(self): + result = {} + for domain in self.domains: + if len(self.power_data[domain]) > 0: + result[domain] = self.power_data[domain][-1] + else: + result[domain] = 0 + return result + + def get_power_history(self): + return {domain: list(self.power_data[domain]) for domain in self.domains} + + def plot_power_history(self, save_path=None, show=True): + """Plot power consumption history for all domains""" + fig, ax = plt.subplots(figsize=(12, 8)) + + # Get timestamps relative to start + if len(self.timestamps) < 2: + print("Not enough data to plot") + return + + start_time = self.timestamps[0] + time_points = [(t - start_time) for t in self.timestamps] + + # Plot each domain + for domain in self.domains: + if len(self.power_data[domain]) > 0: + # Ensure we have matching lengths + data_len = min(len(time_points), len(self.power_data[domain])) + ax.plot(time_points[:data_len], + list(self.power_data[domain])[:data_len], + label=domain, linewidth=2) + + ax.set_xlabel('Time (seconds)', fontsize=12) + ax.set_ylabel('Power (Watts)', fontsize=12) + ax.set_title('System Power Consumption Over Time', fontsize=14) + ax.grid(True, alpha=0.3) + ax.legend() + + plt.tight_layout() + + if save_path: + plt.savefig(save_path, dpi=300, bbox_inches='tight') + + if show: + plt.show() + + return fig + +class EnergyLogger: + def __init__(self, output_format="csv"): + self.monitor = RAPLEnergyMonitor() + self.output_format = output_format + self.start_time = time.time() + self.log_data = [] + + def log_sample(self): + self.monitor.update_power() + current_power = self.monitor.get_current_power() + + sample = { + "timestamp": datetime.now().isoformat(), + "elapsed_seconds": time.time() - self.start_time, + "total_power": sum(current_power.values()) + } + + for domain, power in current_power.items(): + sample[f"power_{domain}"] = power + + self.log_data.append(sample) + return sample + + def save_csv(self, filename): + if not self.log_data: + return + + with open(filename, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=self.log_data[0].keys()) + writer.writeheader() + writer.writerows(self.log_data) + + def save_json(self, filename): + with open(filename, 'w') as f: + json.dump(self.log_data, f, indent=2) + + def save(self, filename=None): + if filename is None: + filename = f"energy_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + + if self.output_format == "csv": + self.save_csv(f"{filename}.csv") + else: + self.save_json(f"{filename}.json") + + return filename + + def plot_log_data(self, save_path=None, show=True): + """Plot logged energy data""" + if not self.log_data: + print("No data to plot") + return + + fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10)) + + # Extract data + timestamps = [sample['elapsed_seconds'] for sample in self.log_data] + total_power = [sample['total_power'] for sample in self.log_data] + + # Plot total power + ax1.plot(timestamps, total_power, 'b-', linewidth=2, label='Total Power') + ax1.set_xlabel('Time (seconds)', fontsize=12) + ax1.set_ylabel('Power (Watts)', fontsize=12) + ax1.set_title('Total System Power Consumption', fontsize=14) + ax1.grid(True, alpha=0.3) + ax1.legend() + + # Plot individual domains + domain_names = [key for key in self.log_data[0].keys() + if key.startswith('power_') and key != 'power_'] + + for domain_key in domain_names: + domain_power = [sample.get(domain_key, 0) for sample in self.log_data] + domain_name = domain_key.replace('power_', '') + ax2.plot(timestamps, domain_power, linewidth=2, label=domain_name) + + ax2.set_xlabel('Time (seconds)', fontsize=12) + ax2.set_ylabel('Power (Watts)', fontsize=12) + ax2.set_title('Power Consumption by Domain', fontsize=14) + ax2.grid(True, alpha=0.3) + ax2.legend() + + plt.tight_layout() + + if save_path: + plt.savefig(save_path, dpi=300, bbox_inches='tight') + + if show: + plt.show() + + return fig + +def monitor_realtime(duration=60, visualize=False): + """Real-time monitoring with optional visualization""" + if visualize: + return monitor_realtime_visual(duration) + + print("Real-time Energy Monitor") + print("=" * 50) + + try: + monitor = RAPLEnergyMonitor() + print(f"Monitoring domains: {', '.join(monitor.domains.keys())}") + print(f"Duration: {duration} seconds") + print("=" * 50) + + start_time = time.time() + + while time.time() - start_time < duration: + monitor.update_power() + power = monitor.get_current_power() + + # Clear line and print current values + print("\r", end="") + print(f"[{int(time.time() - start_time):3d}s] ", end="") + + for domain, watts in power.items(): + print(f"{domain}: {watts:6.2f}W ", end="") + + print(f"Total: {sum(power.values()):6.2f}W", end="", flush=True) + + time.sleep(0.1) + + print("\n" + "=" * 50) + print("Monitoring complete!") + + except RuntimeError as e: + print(f"Error: {e}") + except KeyboardInterrupt: + print("\n\nMonitoring stopped by user.") + +def monitor_realtime_visual(duration=60): + """Real-time monitoring with live plotting""" + plt.ion() + fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8)) + + try: + monitor = RAPLEnergyMonitor() + domains = list(monitor.domains.keys()) + + # Initialize plot lines + lines1 = {} + lines2 = [] + + # Setup total power plot + ax1.set_xlabel('Time (seconds)') + ax1.set_ylabel('Power (Watts)') + ax1.set_title('Total System Power Consumption') + ax1.grid(True, alpha=0.3) + lines1['total'], = ax1.plot([], [], 'b-', linewidth=2, label='Total Power') + ax1.legend() + + # Setup domain power plot + ax2.set_xlabel('Time (seconds)') + ax2.set_ylabel('Power (Watts)') + ax2.set_title('Power Consumption by Domain') + ax2.grid(True, alpha=0.3) + + for i, domain in enumerate(domains): + line, = ax2.plot([], [], linewidth=2, label=domain) + lines2.append(line) + ax2.legend() + + # Data storage + times = [] + total_powers = [] + domain_powers = {domain: [] for domain in domains} + + start_time = time.time() + + print(f"Monitoring for {duration} seconds... Press Ctrl+C to stop early.") + + while time.time() - start_time < duration: + monitor.update_power() + power = monitor.get_current_power() + + # Update data + current_time = time.time() - start_time + times.append(current_time) + total_powers.append(sum(power.values())) + + for domain in domains: + domain_powers[domain].append(power.get(domain, 0)) + + # Update plots + lines1['total'].set_data(times, total_powers) + ax1.relim() + ax1.autoscale_view() + + for i, domain in enumerate(domains): + lines2[i].set_data(times, domain_powers[domain]) + ax2.relim() + ax2.autoscale_view() + + plt.draw() + plt.pause(0.05) + + plt.ioff() + + # Save final plot + save_path = f"energy_plot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png" + plt.savefig(save_path, dpi=300, bbox_inches='tight') + print(f"\nPlot saved to: {save_path}") + + # Show final plot + plt.show() + + except RuntimeError as e: + print(f"Error: {e}") + except KeyboardInterrupt: + print("\n\nMonitoring stopped by user.") + plt.ioff() + plt.close() + +def main(): + import argparse + + parser = argparse.ArgumentParser(description="Monitor system energy consumption") + parser.add_argument("-d", "--duration", type=int, default=60, + help="Duration to monitor in seconds (default: 60)") + parser.add_argument("-l", "--log", action="store_true", + help="Log data to file instead of real-time display") + parser.add_argument("-i", "--interval", type=float, default=1.0, + help="Sampling interval for logging (default: 1.0)") + parser.add_argument("-f", "--format", choices=["csv", "json"], default="csv", + help="Output format for logging (default: csv)") + parser.add_argument("-o", "--output", type=str, + help="Output filename for logging") + parser.add_argument("-v", "--visualize", action="store_true", + help="Enable real-time visualization") + parser.add_argument("-p", "--plot", type=str, + help="Plot saved data from CSV/JSON file") + + args = parser.parse_args() + + # Handle plotting existing data + if args.plot: + print(f"Loading data from: {args.plot}") + + if args.plot.endswith('.csv'): + # Load CSV data + import pandas as pd + df = pd.read_csv(args.plot) + log_data = df.to_dict('records') + elif args.plot.endswith('.json'): + # Load JSON data + with open(args.plot, 'r') as f: + log_data = json.load(f) + else: + print("Error: Plot file must be .csv or .json") + return + + # Create a temporary logger to use its plotting method + logger = EnergyLogger() + logger.log_data = log_data + + plot_path = args.plot.rsplit('.', 1)[0] + '_plot.png' + logger.plot_log_data(save_path=plot_path) + print(f"Plot saved to: {plot_path}") + return + + if args.log: + # Logging mode + print(f"Starting energy logging for {args.duration} seconds...") + print(f"Sampling interval: {args.interval} seconds") + print(f"Output format: {args.format}") + + try: + logger = EnergyLogger(output_format=args.format) + + start_time = time.time() + sample_count = 0 + + while time.time() - start_time < args.duration: + sample = logger.log_sample() + sample_count += 1 + + print(f"\rSamples: {sample_count} | Total Power: {sample['total_power']:.2f} W", + end='', flush=True) + + time.sleep(args.interval) + + print("\n\nSaving data...") + filename = logger.save(args.output) + print(f"Data saved to: {filename}.{args.format}") + + # Print summary + avg_power = sum(s['total_power'] for s in logger.log_data) / len(logger.log_data) + print(f"\nSummary:") + print(f" Total samples: {len(logger.log_data)}") + print(f" Average power: {avg_power:.2f} W") + print(f" Total energy: {avg_power * args.duration / 3600:.4f} Wh") + + # Generate plot if visualization is enabled + if args.visualize: + plot_filename = (args.output or filename) + "_plot.png" + logger.plot_log_data(save_path=plot_filename) + print(f" Plot saved to: {plot_filename}") + + except RuntimeError as e: + print(f"Error: {e}") + except KeyboardInterrupt: + print("\n\nLogging interrupted. Saving partial data...") + if 'logger' in locals(): + filename = logger.save(args.output) + print(f"Partial data saved to: {filename}.{args.format}") + else: + # Real-time monitoring mode + monitor_realtime(args.duration, visualize=args.visualize) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/48-energy/energy_plot_20250705_143910.png b/src/48-energy/energy_plot_20250705_143910.png new file mode 100644 index 0000000..1488a17 Binary files /dev/null and b/src/48-energy/energy_plot_20250705_143910.png differ diff --git a/src/48-energy/test_visualization.py b/src/48-energy/test_visualization.py new file mode 100644 index 0000000..5b0e1c3 --- /dev/null +++ b/src/48-energy/test_visualization.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +""" +Test script to demonstrate energy monitor visualization features +""" + +import subprocess +import sys +import os + +def test_visualization(): + print("Energy Monitor Visualization Test") + print("=" * 50) + + # Check if we can import matplotlib + try: + import matplotlib + print("✓ matplotlib is installed") + except ImportError: + print("✗ matplotlib is not installed") + print("Please install with: pip install matplotlib") + return + + # Test 1: Real-time monitoring with visualization + print("\nTest 1: Real-time monitoring with visualization (10 seconds)") + print("This will show a live updating plot of power consumption") + cmd1 = [sys.executable, "energy_monitor.py", "-d", "10", "-v"] + print(f"Running: {' '.join(cmd1)}") + input("Press Enter to start...") + subprocess.run(cmd1) + + # Test 2: Logging with plot generation + print("\n\nTest 2: Logging data and generating plot (15 seconds)") + cmd2 = [sys.executable, "energy_monitor.py", "-l", "-d", "15", "-i", "0.5", "-v", "-o", "test_energy"] + print(f"Running: {' '.join(cmd2)}") + input("Press Enter to start...") + subprocess.run(cmd2) + + # Test 3: Plot from saved data + print("\n\nTest 3: Plotting from saved CSV file") + if os.path.exists("test_energy.csv"): + cmd3 = [sys.executable, "energy_monitor.py", "-p", "test_energy.csv"] + print(f"Running: {' '.join(cmd3)}") + input("Press Enter to start...") + subprocess.run(cmd3) + else: + print("No saved data file found from Test 2") + + print("\n" + "=" * 50) + print("Visualization tests complete!") + print("\nUsage examples:") + print(" Real-time monitoring with plot: python energy_monitor.py -v") + print(" Log data and generate plot: python energy_monitor.py -l -v") + print(" Plot from existing data: python energy_monitor.py -p data.csv") + +if __name__ == "__main__": + test_visualization() \ No newline at end of file