mirror of
https://github.com/eunomia-bpf/bpf-developer-tutorial.git
synced 2026-02-12 22:56:28 +08:00
Enhance Flamegraph Documentation and GPU Profiling Scripts
- Added an example flamegraph for Qwen3 LLM inference, highlighting key insights and performance bottlenecks. - Updated README.md to include detailed explanations of CPU and GPU profiling results, emphasizing the correlation between CPU stacks and GPU kernels. - Modified gpuperf.py to ensure absolute paths are used for output files, improving reliability across different working directories. - Enhanced merge_gpu_cpu_trace.py to strip ANSI escape sequences from CPU stack traces, ensuring cleaner output for analysis. - Introduced a new SVG file for the Qwen3 flamegraph, providing a visual representation of profiling data with interactive features.
This commit is contained in:
@@ -59,7 +59,8 @@ class GPUPerf:
|
||||
if not cpu_output_file:
|
||||
cpu_output_file = f"cpu_profile_{pid if pid else 'cuda'}.txt"
|
||||
|
||||
self.profiler_output = cpu_output_file
|
||||
# Convert to absolute path to handle working directory changes
|
||||
self.profiler_output = str(Path(cpu_output_file).absolute())
|
||||
|
||||
# Find CUDA runtime library if not specified
|
||||
if not cuda_lib_path:
|
||||
@@ -131,14 +132,15 @@ class GPUPerf:
|
||||
trace_file = None
|
||||
if do_gpu_profiling:
|
||||
if output_trace:
|
||||
trace_file = output_trace
|
||||
# Convert to absolute path to handle target process changing directories
|
||||
trace_file = str(Path(output_trace).absolute())
|
||||
else:
|
||||
# Create temporary file for trace output
|
||||
fd, trace_file = tempfile.mkstemp(suffix=".txt", prefix="gpuperf_trace_")
|
||||
os.close(fd)
|
||||
self.temp_trace_file = trace_file
|
||||
atexit.register(self.cleanup_temp_files)
|
||||
|
||||
|
||||
# Set up environment variables
|
||||
env = os.environ.copy()
|
||||
env['CUDA_INJECTION64_PATH'] = str(self.injection_lib)
|
||||
|
||||
Reference in New Issue
Block a user