This commit is contained in:
yunwei37
2024-10-03 03:31:55 +00:00
parent 252625d27c
commit 6f7bc37a6a
353 changed files with 165801 additions and 0 deletions

2
third_party/blazesym/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
/target
Cargo.lock

1
third_party/blazesym/.rustfmt.toml vendored Normal file
View File

@@ -0,0 +1 @@
blank_lines_upper_bound = 2

38
third_party/blazesym/Cargo.toml vendored Normal file
View File

@@ -0,0 +1,38 @@
[package]
name = "blazesym"
description = "BlazeSym is a library that symbolizes addresses where symbol names, source file names, and line numbers can be acquired."
version = "0.1.0"
authors = ["Kui-Feng <thinker.li@gmail.com>"]
license-file = "LICENSE"
repository = "https://github.com/libbpf/blazesym"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "blazesym"
crate-type = ["cdylib", "rlib", "staticlib"]
[package.metadata.docs.rs]
features = ["dont-generate-test-files"]
[dependencies]
nix = "0.24"
regex = "1.6"
crossbeam-channel = "0.5"
libc = "0.2.137"
[build-dependencies]
anyhow = "1.0.68"
cbindgen = {version = "0.24", optional = true}
[features]
cheader = ["cbindgen"]
# Enable this feature to opt out of the generation of test files. That may be
# useful when certain utilities are not installed or when there is no intention
# to run tests.
dont-generate-test-files = []
# Enable code paths requiring a nightly toolchain. This feature is only meant to
# be used for testing and benchmarking purposes, not for the core library, which
# is expected to work on stable.
nightly = []

29
third_party/blazesym/LICENSE vendored Normal file
View File

@@ -0,0 +1,29 @@
BSD 3-Clause License
Copyright (c) 2022, Kuifeng Lee
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

153
third_party/blazesym/build.rs vendored Normal file
View File

@@ -0,0 +1,153 @@
use std::env;
use std::ffi::OsStr;
use std::ffi::OsString;
use std::ops::Deref as _;
use std::path::Path;
use std::process::Command;
use std::process::Stdio;
use anyhow::bail;
use anyhow::Context as _;
use anyhow::Result;
/// Format a command with the given list of arguments as a string.
fn format_command<C, A, S>(command: C, args: A) -> String
where
C: AsRef<OsStr>,
A: IntoIterator<Item = S>,
S: AsRef<OsStr>,
{
args.into_iter().fold(
command.as_ref().to_string_lossy().into_owned(),
|mut cmd, arg| {
cmd += " ";
cmd += arg.as_ref().to_string_lossy().deref();
cmd
},
)
}
/// Run a command with the provided arguments.
fn run<C, A, S>(command: C, args: A) -> Result<()>
where
C: AsRef<OsStr>,
A: IntoIterator<Item = S> + Clone,
S: AsRef<OsStr>,
{
let instance = Command::new(command.as_ref())
.stdin(Stdio::null())
.stdout(Stdio::null())
.args(args.clone())
.output()
.with_context(|| {
format!(
"failed to run `{}`",
format_command(command.as_ref(), args.clone())
)
})?;
if !instance.status.success() {
let code = if let Some(code) = instance.status.code() {
format!(" ({code})")
} else {
" (terminated by signal)".to_string()
};
let stderr = String::from_utf8_lossy(&instance.stderr);
let stderr = stderr.trim_end();
let stderr = if !stderr.is_empty() {
format!(": {stderr}")
} else {
String::new()
};
bail!(
"`{}` reported non-zero exit-status{code}{stderr}",
format_command(command, args),
);
}
Ok(())
}
/// Compile `src` into `dst` using `cc`.
fn cc(src: &Path, dst: &str, options: &[&str]) {
let dst = src.with_file_name(dst);
println!("cargo:rerun-if-changed={}", src.display());
println!("cargo:rerun-if-changed={}", dst.display());
// Ideally we'd use the `cc` crate here, but it seemingly can't be convinced
// to create binaries.
run(
"cc",
options
.iter()
.map(OsStr::new)
.chain([src.as_os_str(), "-o".as_ref(), dst.as_os_str()]),
)
.expect("failed to run `cc`")
}
/// Convert debug information contained in `src` into GSYM in `dst` using
/// `llvm-gsymutil`.
fn gsym(src: &Path, dst: &str) {
let dst = src.with_file_name(dst);
println!("cargo:rerun-if-changed={}", src.display());
println!("cargo:rerun-if-changed={}", dst.display());
let gsymutil = env::var_os("LLVM_GSYMUTIL").unwrap_or_else(|| OsString::from("llvm-gsymutil"));
run(
gsymutil,
["--convert".as_ref(), src, "--out-file".as_ref(), &dst],
)
.expect("failed to run `llvm-gsymutil`")
}
/// Build the various test binaries.
fn build_test_bins(crate_root: &Path) {
let src = crate_root.join("data").join("test.c");
cc(&src, "test-no-debug.bin", &["-g0"]);
cc(&src, "test-dwarf-v4.bin", &["-gdwarf-4"]);
let src = crate_root.join("data").join("test-gsym.c");
let ld_script = crate_root.join("data").join("test-gsym.ld");
let ld_script = ld_script.to_str().unwrap();
println!("cargo:rerun-if-changed={ld_script}");
cc(
&src,
"test-gsym.bin",
&[
"-gdwarf-4",
"-T",
ld_script,
"-Wl,--build-id=none",
"-O0",
"-nostdlib",
],
);
let src = crate_root.join("data").join("test-gsym.bin");
gsym(&src, "test.gsym");
}
fn main() {
let crate_dir = env!("CARGO_MANIFEST_DIR");
if !cfg!(feature = "dont-generate-test-files") {
build_test_bins(crate_dir.as_ref());
}
#[cfg(feature = "cheader")]
{
let build_type = env::var("PROFILE").unwrap();
let target_path = Path::new(&crate_dir).join("target").join(build_type);
cbindgen::Builder::new()
.with_crate(crate_dir)
.with_config(cbindgen::Config::from_root_or_default(crate_dir))
.generate()
.expect("Unable to generate bindings")
.write_to_file(target_path.join("blazesym.h"));
}
}

22
third_party/blazesym/cbindgen.toml vendored Normal file
View File

@@ -0,0 +1,22 @@
language = "C"
include_guard = "__blazesym_h_"
[export]
item_types = ["globals", "enums", "structs", "unions", "typedefs", "opaque", "functions"]
[fn]
args = "Vertical"
rename_args = "GeckoCase"
[struct]
associated_constants_in_body = true
derive_eq = true
derive_ostream = true
[enum]
add_sentinel = false
derive_helper_methods = true
derive_ostream = true
[macro_expansion]
bitflags = true

BIN
third_party/blazesym/data/dwarf-example vendored Executable file

Binary file not shown.

22
third_party/blazesym/data/test-gsym.c vendored Normal file
View File

@@ -0,0 +1,22 @@
/* The sample program is used to generate test.gsym.
*
* Chosen functions are placed in dedicated sections to allow for control placement.
*/
__attribute__((section(".text.factorial"))) unsigned int
factorial(unsigned int n) {
if (n == 0)
return 1;
return factorial(n - 1) * n;
}
static inline void
factorial_inline_wrapper() {
factorial(5);
}
__attribute__((section(".text.main"))) int
main(int argc, const char *argv[]) {
factorial_inline_wrapper();
return 0;
}

40
third_party/blazesym/data/test-gsym.ld vendored Normal file
View File

@@ -0,0 +1,40 @@
SECTIONS {
.text (0x2000000) : {
*(.text.main)
*(.text)
. = ABSOLUTE(0x2000100);
*(.text.factorial)
}
.data : {
*(.data)
}
.bss : {
*(.bss)
}
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0.
*/
/* DWARF 1. */
.debug 0 : { *(.debug) }
.line 0 : { *(.line) }
/* GNU DWARF 1 extensions. */
.debug_srcinfo 0 : { *(.debug_srcinfo) }
.debug_sfnames 0 : { *(.debug_sfnames) }
/* DWARF 1.1 and DWARF 2. */
.debug_aranges 0 : { *(.debug_aranges) }
.debug_pubnames 0 : { *(.debug_pubnames) }
/* DWARF 2. */
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
.debug_abbrev 0 : { *(.debug_abbrev) }
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) }
.debug_frame 0 : { *(.debug_frame) }
.debug_str 0 : { *(.debug_str) }
.debug_loc 0 : { *(.debug_loc) }
.debug_macinfo 0 : { *(.debug_macinfo) }
/DISCARD/ : {
*(.*)
}
}

22
third_party/blazesym/data/test.c vendored Normal file
View File

@@ -0,0 +1,22 @@
/*
* The sample program is used to generate test.bin.
*/
#include <stdio.h>
static
unsigned int fibonacci(unsigned int n) {
if (n <= 1)
return n;
return fibonacci(n - 1) + fibonacci(n - 2);
}
int
main() {
int i;
printf("calculate fibonacci(n); n = ");
scanf("%d", &i);
printf("fibonacci(%d) = %d\n", i, fibonacci(i));
return 0;
}

View File

@@ -0,0 +1,44 @@
extern crate blazesym;
use blazesym::{BlazeSymbolizer, SymbolSrcCfg};
use std::env;
use std::path;
fn show_usage() {
let args: Vec<String> = env::args().collect();
println!("Usage: {} <file> <address>", args[0]);
}
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() != 3 {
show_usage();
return;
}
let bin_name = &args[1];
let mut addr_str = &args[2][..];
let sym_srcs = [SymbolSrcCfg::Elf {
file_name: path::PathBuf::from(bin_name),
base_address: 0x0,
}];
let resolver = BlazeSymbolizer::new().unwrap();
if &addr_str[0..2] == "0x" {
// Remove prefixed 0x
addr_str = &addr_str[2..];
}
let addr = u64::from_str_radix(addr_str, 16).unwrap();
let results = resolver.symbolize(&sym_srcs, &[addr]);
if results.len() == 1 && !results[0].is_empty() {
let result = &results[0][0];
println!(
"0x{:x} @ {} {}:{}",
addr, result.symbol, result.path, result.line_no
);
} else {
println!("0x{addr:x} is not found");
}
}

View File

@@ -0,0 +1,54 @@
extern crate blazesym;
use blazesym::{BlazeSymbolizer, SymbolSrcCfg, SymbolizedResult};
use std::env;
fn show_usage() {
let args: Vec<String> = env::args().collect();
println!("Usage: {} <pid> <address>", args[0]);
println!("Resolve an address in the process of the given pid, and");
println!("print its symbol, the file name of the source, and the line number.");
}
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() != 3 {
show_usage();
return;
}
let pid = args[1].parse::<u32>().unwrap();
let mut addr_str = &args[2][..];
println!("PID: {pid}");
if addr_str.len() > 2 && &addr_str[0..2] == "0x" {
// Remove prefixed 0x
addr_str = &addr_str[2..];
}
let addr = u64::from_str_radix(addr_str, 16).unwrap();
let sym_files = [SymbolSrcCfg::Process { pid: Some(pid) }];
let resolver = BlazeSymbolizer::new().unwrap();
let symlist = resolver.symbolize(&sym_files, &[addr]);
if !symlist[0].is_empty() {
let SymbolizedResult {
symbol,
start_address,
path,
line_no,
column: _,
} = &symlist[0][0];
println!(
"0x{:x} {}@0x{:x}+{} {}:{}",
addr,
symbol,
start_address,
addr - start_address,
path,
line_no
);
} else {
println!("0x{addr:x} is not found");
}
}

View File

@@ -0,0 +1,196 @@
'''Generate one page API document from rustdoc.
This script parses HTML files generated by rustoc and generates a single page API
document.
'''
from html.parser import HTMLParser
pages = ['index.html',
'struct.BlazeSymbolizer.html',
'struct.SymbolizedResult.html',
'enum.SymbolSrcCfg.html',
'enum.SymbolizerFeature.html',
'fn.blazesym_new.html',
'fn.blazesym_free.html',
'fn.blazesym_symbolize.html',
'fn.blazesym_result_free.html',
'struct.blazesym.html',
'struct.sym_src_cfg.html',
'enum.blazesym_src_type.html',
'union.ssc_params.html',
'struct.ssc_elf.html',
'struct.ssc_kernel.html',
'struct.ssc_process.html',
'struct.blazesym_result.html',
'struct.blazesym_entry.html',
'struct.blazesym_csym.html',
]
def replace_text(src, replace, start, stop):
lines = src.split('\n')
if start[0] == stop[0]:
lineno = start[0]
lines[lineno] = lines[lineno][:start[1]] + replace + lines[lineno][stop[1]:]
else:
lines[start[0]] = lines[start[0]][:start[1]]
lines[stop[0]] = lines[stop[0]][stop[1]:]
lines[start[0] + 1: stop[0]] = [replace]
pass
return '\n'.join(lines)
class MyParser(HTMLParser):
def get_doc_range(self, start, end):
lines = self.raw_data.split('\n')
lines = lines[start[0] - 1: end[0]]
lines[-1] = lines[-1][:end[1]]
lines[0] = lines[0][start[1]:]
content = '\n'.join(lines)
if hasattr(self, 'replaces'):
for replace, rstart, rstop in reversed(self.replaces):
rstart = list(rstart)
rstop = list(rstop)
rstart[0] -= start[0]
rstop[0] -= start[0]
if rstart[0] == 0:
rstart[1] -= start[1]
pass
if rstop[0] == 0:
rstop[1] -= start[1]
pass
content = replace_text(content, replace, rstart, rstop)
pass
pass
return content
def get_pos_end_tag(self):
pos = self.getpos()
lines = self.raw_data.split('\n')
line = lines[pos[0] - 1]
col = line.find('>', pos[1]) + 1
return (pos[0], col)
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
if tag == 'body':
self.doc_type = attrs['class'].split()[-1]
elif tag == 'section' and 'id' in attrs and attrs['id'] == 'main-content':
if self.doc_type == 'crate':
self.start_pos = self.getpos()
elif self.doc_type in ('struct', 'enum', 'union'):
if not hasattr(self, 'start_pos'):
self.start_pos = self.getpos()
pass
pass
elif self.doc_type == 'fn':
if not hasattr(self, 'start_pos'):
self.start_pos = self.getpos()
pass
pass
pass
elif 'class' in attrs and 'example-wrap' in attrs['class'].split():
self.replacing = '&nbsp;&nbsp;......&lt;&lt;EXAMPLE&gt;&gt......<br/>'
self.replace_depth = 0
self.replace_start = self.getpos();
elif 'class' in attrs and 'item-decl' in attrs['class'].split():
self.replacing = '&nbsp;&nbsp;......&lt;&lt;DECLARATION&gt;&gt;......<br/>'
self.replace_depth = 0
self.replace_start = self.getpos()
elif hasattr(self, 'replace_depth'):
self.replace_depth += 1
elif hasattr(self, 'depth'):
self.depth += 1
pass
elif hasattr(self, 'start_pos'):
if self.doc_type in ('struct', 'enum', 'union'):
if 'id' in attrs and attrs['id'].endswith('implementations'):
print('%s\n</section>' % self.get_doc_range(self.start_pos, self.getpos()))
self.doc_type = ''
pass
elif self.doc_type == 'fn' and not hasattr(self, 'depth'):
self.depth = 0
pass
elif self.doc_type == 'crate' and 'class' in attrs and attrs['class'].endswith('top-doc'):
self.depth = 0
pass
pass
pass
def handle_endtag(self, tag):
if hasattr(self, 'replace_depth'):
if self.replace_depth > 0:
self.replace_depth -= 1
else:
del self.replace_depth
if not hasattr(self, 'replaces'):
self.replaces = []
pass
self.replaces.append((self.replacing, self.replace_start, self.get_pos_end_tag()))
pass
elif hasattr(self, 'depth'):
if self.depth > 0:
self.depth -= 1
pass
else:
del self.depth
if self.doc_type == 'crate':
content = self.get_doc_range(self.start_pos, self.get_pos_end_tag())
print('%s\n</section>' % content)
pass
elif self.doc_type == 'fn':
print('%s\n</section>' % self.get_doc_range(self.start_pos, self.get_pos_end_tag()))
self.doc_type = ''
pass
pass
pass
pass
pass
print('<html>')
print('<head>')
print('<link rel="preload" as="font" type="font/woff2" crossorigin href="../SourceSerif4-Regular.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../FiraSans-Regular.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../FiraSans-Medium.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../SourceCodePro-Regular.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../SourceSerif4-Bold.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../SourceCodePro-Semibold.ttf.woff2">')
print('<link rel="stylesheet" type="text/css" href="../normalize.css"><link rel="stylesheet" type="text/css" href="../rustdoc.css" id="mainThemeStyle"><link rel="stylesheet" type="text/css" href="../ayu.css" disabled><link rel="stylesheet" type="text/css" href="../dark.css" disabled><link rel="stylesheet" type="text/css" href="../light.css" id="themeStyle">')
print('<link rel="stylesheet" href="../noscript.css">')
print('''<style>
.tooltip.ignore {
visibility: hidden;
}
.out-of-band {
visibility: hidden;
}
.example-wrap {
display: block;
height: 30px;
overflow: hidden;
user-select: none;
}
.example-wrap::before {
content: '......<<EXAMPLE>>......';
}
.item-decl {
display: block;
height: 30px;
overflow: hidden;
user-select: none;
}
.item-decl::before {
content: '......<<CODE BLOCK>>......';
}
</style>
''')
print('</head>')
print('<body>')
for page in pages:
print('<page filename="%s">' % page)
p = MyParser()
fo = open('blazesym/' + page, 'r')
data = fo.read()
p.raw_data = data
p.feed(data)
print('</page>')
pass
print('</body>')
print('</html>')

948
third_party/blazesym/src/c_api.rs vendored Normal file
View File

@@ -0,0 +1,948 @@
use std::alloc::{alloc, dealloc, Layout};
use std::ffi::CStr;
use std::ffi::OsStr;
use std::mem;
use std::os::raw::c_char;
use std::os::unix::ffi::OsStrExt as _;
use std::path::PathBuf;
use std::ptr;
use std::u64;
use crate::BlazeSymbolizer;
use crate::FindAddrFeature;
use crate::SymbolInfo;
use crate::SymbolSrcCfg;
use crate::SymbolType;
use crate::SymbolizedResult;
use crate::SymbolizerFeature;
/// Types of symbol sources and debug information for C API.
#[repr(C)]
#[allow(non_camel_case_types, unused)]
pub enum blazesym_src_type {
/// Symbols and debug information from an ELF file.
SRC_T_ELF,
/// Symbols and debug information from a kernel image and its kallsyms.
SRC_T_KERNEL,
/// Symbols and debug information from a process, including loaded object files.
SRC_T_PROCESS,
}
/// The parameters to load symbols and debug information from an ELF.
///
/// Describes the path and address of an ELF file loaded in a
/// process.
#[repr(C)]
pub struct ssc_elf {
/// The file name of an ELF file.
///
/// It can be an executable or shared object.
/// For example, passing "/bin/sh" will load symbols and debug information from `sh`.
/// Whereas passing "/lib/libc.so.xxx" will load symbols and debug information from the libc.
pub file_name: *const c_char,
/// The base address is where the file's executable segment(s) is loaded.
///
/// It should be the address
/// in the process mapping to the executable segment's first byte.
/// For example, in /proc/&lt;pid&gt;/maps
///
/// ```text
/// 7fe1b2dc4000-7fe1b2f80000 r-xp 00000000 00:1d 71695032 /usr/lib64/libc-2.28.so
/// 7fe1b2f80000-7fe1b3180000 ---p 001bc000 00:1d 71695032 /usr/lib64/libc-2.28.so
/// 7fe1b3180000-7fe1b3184000 r--p 001bc000 00:1d 71695032 /usr/lib64/libc-2.28.so
/// 7fe1b3184000-7fe1b3186000 rw-p 001c0000 00:1d 71695032 /usr/lib64/libc-2.28.so
/// ```
///
/// It reveals that the executable segment of libc-2.28.so was
/// loaded at 0x7fe1b2dc4000. This base address is used to
/// translate an address in the segment to the corresponding
/// address in the ELF file.
///
/// A loader would load an executable segment with the permission of `x`
/// (executable). For example, the first block is with the
/// permission of `r-xp`.
pub base_address: u64,
}
/// The parameters to load symbols and debug information from a kernel.
///
/// Use a kernel image and a snapshot of its kallsyms as a source of symbols and
/// debug information.
#[repr(C)]
pub struct ssc_kernel {
/// The path of a copy of kallsyms.
///
/// It can be `"/proc/kallsyms"` for the running kernel on the
/// device. However, you can make copies for later. In that situation,
/// you should give the path of a copy.
/// Passing a `NULL`, by default, will result in `"/proc/kallsyms"`.
pub kallsyms: *const c_char,
/// The path of a kernel image.
///
/// The path of a kernel image should be, for instance,
/// `"/boot/vmlinux-xxxx"`. For a `NULL` value, it will locate the
/// kernel image of the running kernel in `"/boot/"` or
/// `"/usr/lib/debug/boot/"`.
pub kernel_image: *const c_char,
}
/// The parameters to load symbols and debug information from a process.
///
/// Load all ELF files in a process as the sources of symbols and debug
/// information.
#[repr(C)]
pub struct ssc_process {
/// It is the PID of a process to symbolize.
///
/// BlazeSym will parse `/proc/<pid>/maps` and load all the object
/// files.
pub pid: u32,
}
/// Parameters of a symbol source.
#[repr(C)]
pub union ssc_params {
/// The variant for SRC_T_ELF
pub elf: mem::ManuallyDrop<ssc_elf>,
/// The variant for SRC_T_KERNEL
pub kernel: mem::ManuallyDrop<ssc_kernel>,
/// The variant for SRC_T_PROCESS
pub process: mem::ManuallyDrop<ssc_process>,
}
/// Description of a source of symbols and debug information for C API.
#[repr(C)]
pub struct sym_src_cfg {
/// A type of symbol source.
pub src_type: blazesym_src_type,
pub params: ssc_params,
}
/// Names of the BlazeSym features.
#[repr(C)]
#[allow(non_camel_case_types, unused)]
pub enum blazesym_feature_name {
/// Enable or disable returning line numbers of addresses.
///
/// Users should set `blazesym_feature.params.enable` to enabe or
/// disable the feature,
LINE_NUMBER_INFO,
/// Enable or disable loading symbols from DWARF.
///
/// Users should `blazesym_feature.params.enable` to enable or
/// disable the feature. This feature is disabled by default.
DEBUG_INFO_SYMBOLS,
}
#[repr(C)]
pub union blazesym_feature_params {
enable: bool,
}
/// Setting of the blazesym features.
///
/// Contain parameters to enable, disable, or customize a feature.
#[repr(C)]
pub struct blazesym_feature {
pub feature: blazesym_feature_name,
pub params: blazesym_feature_params,
}
/// A placeholder symbolizer for C API.
///
/// It is returned by [`blazesym_new()`] and should be free by
/// [`blazesym_free()`].
#[repr(C)]
pub struct blazesym {
symbolizer: *mut BlazeSymbolizer,
}
/// The result of symbolization of an address for C API.
///
/// A `blazesym_csym` is the information of a symbol found for an
/// address. One address may result in several symbols.
#[repr(C)]
pub struct blazesym_csym {
/// The symbol name is where the given address should belong to.
pub symbol: *const c_char,
/// The address (i.e.,the first byte) is where the symbol is located.
///
/// The address is already relocated to the address space of
/// the process.
pub start_address: u64,
/// The path of the source code defines the symbol.
pub path: *const c_char,
/// The instruction of the address is in the line number of the source code.
pub line_no: usize,
pub column: usize,
}
/// `blazesym_entry` is the output of symbolization for an address for C API.
///
/// Every address has an `blazesym_entry` in
/// [`blazesym_result::entries`] to collect symbols found by BlazeSym.
#[repr(C)]
pub struct blazesym_entry {
/// The number of symbols found for an address.
pub size: usize,
/// All symbols found.
///
/// `syms` is an array of blazesym_csym in the size `size`.
pub syms: *const blazesym_csym,
}
/// `blazesym_result` is the result of symbolization for C API.
///
/// The instances of blazesym_result are returned from
/// [`blazesym_symbolize()`]. They should be free by calling
/// [`blazesym_result_free()`].
#[repr(C)]
pub struct blazesym_result {
/// The number of addresses being symbolized.
pub size: usize,
/// The entries for addresses.
///
/// Symbolization occurs based on the order of addresses.
/// Therefore, every address must have an entry here on the same
/// order.
pub entries: [blazesym_entry; 0],
}
/// Create a `PathBuf` from a pointer of C string
///
/// # Safety
///
/// C string should be terminated with a null byte.
///
unsafe fn from_cstr(cstr: *const c_char) -> PathBuf {
PathBuf::from(unsafe { CStr::from_ptr(cstr) }.to_str().unwrap())
}
unsafe fn symbolsrccfg_to_rust(cfg: *const sym_src_cfg, cfg_len: u32) -> Option<Vec<SymbolSrcCfg>> {
let mut cfg_rs = Vec::<SymbolSrcCfg>::with_capacity(cfg_len as usize);
for i in 0..cfg_len {
let c = unsafe { cfg.offset(i as isize) };
match unsafe { &(*c).src_type } {
blazesym_src_type::SRC_T_ELF => {
cfg_rs.push(SymbolSrcCfg::Elf {
file_name: unsafe { from_cstr((*c).params.elf.file_name) },
base_address: unsafe { (*c).params.elf.base_address },
});
}
blazesym_src_type::SRC_T_KERNEL => {
let kallsyms = unsafe { (*c).params.kernel.kallsyms };
let kernel_image = unsafe { (*c).params.kernel.kernel_image };
cfg_rs.push(SymbolSrcCfg::Kernel {
kallsyms: if !kallsyms.is_null() {
Some(unsafe { from_cstr(kallsyms) })
} else {
None
},
kernel_image: if !kernel_image.is_null() {
Some(unsafe { from_cstr(kernel_image) })
} else {
None
},
});
}
blazesym_src_type::SRC_T_PROCESS => {
let pid = unsafe { (*c).params.process.pid };
cfg_rs.push(SymbolSrcCfg::Process {
pid: if pid > 0 { Some(pid) } else { None },
});
}
}
}
Some(cfg_rs)
}
/// Create an instance of blazesym a symbolizer for C API.
///
/// # Safety
///
/// Free the pointer with [`blazesym_free()`].
///
#[no_mangle]
pub unsafe extern "C" fn blazesym_new() -> *mut blazesym {
let symbolizer = match BlazeSymbolizer::new() {
Ok(s) => s,
Err(_) => {
return ptr::null_mut();
}
};
let symbolizer_box = Box::new(symbolizer);
let c_box = Box::new(blazesym {
symbolizer: Box::into_raw(symbolizer_box),
});
Box::into_raw(c_box)
}
/// Create an instance of blazesym a symbolizer for C API.
///
/// # Safety
///
/// Free the pointer with [`blazesym_free()`].
///
#[no_mangle]
pub unsafe extern "C" fn blazesym_new_opts(
features: *const blazesym_feature,
nfeatures: usize,
) -> *mut blazesym {
let features_v = unsafe {
Vec::<blazesym_feature>::from_raw_parts(
features as *mut blazesym_feature,
nfeatures,
nfeatures,
)
};
let features_v = mem::ManuallyDrop::new(features_v);
let features_r: Vec<_> = features_v
.iter()
.map(|x| -> SymbolizerFeature {
match x.feature {
blazesym_feature_name::LINE_NUMBER_INFO => {
SymbolizerFeature::LineNumberInfo(unsafe { x.params.enable })
}
blazesym_feature_name::DEBUG_INFO_SYMBOLS => {
SymbolizerFeature::DebugInfoSymbols(unsafe { x.params.enable })
}
}
})
.collect();
let symbolizer = match BlazeSymbolizer::new_opt(&features_r) {
Ok(s) => s,
Err(_) => {
return ptr::null_mut();
}
};
let symbolizer_box = Box::new(symbolizer);
let c_box = Box::new(blazesym {
symbolizer: Box::into_raw(symbolizer_box),
});
Box::into_raw(c_box)
}
/// Free an instance of blazesym a symbolizer for C API.
///
/// # Safety
///
/// The pointer must be returned by [`blazesym_new()`].
///
#[no_mangle]
pub unsafe extern "C" fn blazesym_free(symbolizer: *mut blazesym) {
if !symbolizer.is_null() {
drop(unsafe { Box::from_raw((*symbolizer).symbolizer) });
drop(unsafe { Box::from_raw(symbolizer) });
}
}
/// Convert SymbolizedResults to blazesym_results.
///
/// # Safety
///
/// The returned pointer should be freed by [`blazesym_result_free()`].
///
unsafe fn convert_symbolizedresults_to_c(
results: Vec<Vec<SymbolizedResult>>,
) -> *const blazesym_result {
// Allocate a buffer to contain a blazesym_result, all
// blazesym_csym, and C strings of symbol and path.
let strtab_size = results.iter().flatten().fold(0, |acc, result| {
acc + result.symbol.len() + result.path.len() + 2
});
let all_csym_size = results.iter().flatten().count();
let buf_size = strtab_size
+ mem::size_of::<blazesym_result>()
+ mem::size_of::<blazesym_entry>() * results.len()
+ mem::size_of::<blazesym_csym>() * all_csym_size;
let raw_buf_with_sz =
unsafe { alloc(Layout::from_size_align(buf_size + mem::size_of::<u64>(), 8).unwrap()) };
if raw_buf_with_sz.is_null() {
return ptr::null();
}
// prepend an u64 to keep the size of the buffer.
unsafe { *(raw_buf_with_sz as *mut u64) = buf_size as u64 };
let raw_buf = unsafe { raw_buf_with_sz.add(mem::size_of::<u64>()) };
let result_ptr = raw_buf as *mut blazesym_result;
let mut entry_last = unsafe { &mut (*result_ptr).entries as *mut blazesym_entry };
let mut csym_last = unsafe {
raw_buf.add(
mem::size_of::<blazesym_result>() + mem::size_of::<blazesym_entry>() * results.len(),
)
} as *mut blazesym_csym;
let mut cstr_last = unsafe {
raw_buf.add(
mem::size_of::<blazesym_result>()
+ mem::size_of::<blazesym_entry>() * results.len()
+ mem::size_of::<blazesym_csym>() * all_csym_size,
)
} as *mut c_char;
let mut make_cstr = |src: &str| {
let cstr = cstr_last;
unsafe { ptr::copy(src.as_ptr(), cstr as *mut u8, src.len()) };
unsafe { *cstr.add(src.len()) = 0 };
cstr_last = unsafe { cstr_last.add(src.len() + 1) };
cstr
};
unsafe { (*result_ptr).size = results.len() };
// Convert all SymbolizedResults to blazesym_entrys and blazesym_csyms
for entry in results {
unsafe { (*entry_last).size = entry.len() };
unsafe { (*entry_last).syms = csym_last };
entry_last = unsafe { entry_last.add(1) };
for r in entry {
let symbol_ptr = make_cstr(&r.symbol);
let path_ptr = make_cstr(&r.path);
let csym_ref = unsafe { &mut *csym_last };
csym_ref.symbol = symbol_ptr;
csym_ref.start_address = r.start_address;
csym_ref.path = path_ptr;
csym_ref.line_no = r.line_no;
csym_ref.column = r.column;
csym_last = unsafe { csym_last.add(1) };
}
}
result_ptr
}
/// Symbolize addresses with the sources of symbols and debug info.
///
/// Return an array of [`blazesym_result`] with the same size as the
/// number of input addresses. The caller should free the returned
/// array by calling [`blazesym_result_free()`].
///
/// # Safety
///
/// The returned pointer should be freed by [`blazesym_result_free()`].
///
#[no_mangle]
pub unsafe extern "C" fn blazesym_symbolize(
symbolizer: *mut blazesym,
sym_srcs: *const sym_src_cfg,
sym_srcs_len: u32,
addrs: *const u64,
addr_cnt: usize,
) -> *const blazesym_result {
let sym_srcs_rs =
if let Some(sym_srcs_rs) = unsafe { symbolsrccfg_to_rust(sym_srcs, sym_srcs_len) } {
sym_srcs_rs
} else {
#[cfg(debug_assertions)]
eprintln!("Fail to transform configurations of symbolizer from C to Rust");
return ptr::null_mut();
};
let symbolizer = unsafe { &*(*symbolizer).symbolizer };
let addresses = unsafe { Vec::from_raw_parts(addrs as *mut u64, addr_cnt, addr_cnt) };
let results = symbolizer.symbolize(&sym_srcs_rs, &addresses);
addresses.leak();
if results.is_empty() {
#[cfg(debug_assertions)]
eprintln!("Empty result while request for {addr_cnt}");
return ptr::null();
}
unsafe { convert_symbolizedresults_to_c(results) }
}
/// Free an array returned by blazesym_symbolize.
///
/// # Safety
///
/// The pointer must be returned by [`blazesym_symbolize()`].
///
#[no_mangle]
pub unsafe extern "C" fn blazesym_result_free(results: *const blazesym_result) {
if results.is_null() {
#[cfg(debug_assertions)]
eprintln!("blazesym_result_free(null)");
return;
}
let raw_buf_with_sz = unsafe { (results as *mut u8).offset(-(mem::size_of::<u64>() as isize)) };
let sz = unsafe { *(raw_buf_with_sz as *mut u64) } as usize + mem::size_of::<u64>();
unsafe { dealloc(raw_buf_with_sz, Layout::from_size_align(sz, 8).unwrap()) };
}
#[repr(C)]
pub struct blazesym_sym_info {
name: *const u8,
address: u64,
size: u64,
sym_type: blazesym_sym_type,
file_offset: u64,
obj_file_name: *const u8,
}
/// Convert SymbolInfos returned by BlazeSymbolizer::find_addresses() to a C array.
unsafe fn convert_syms_list_to_c(
syms_list: Vec<Vec<SymbolInfo>>,
) -> *const *const blazesym_sym_info {
let mut sym_cnt = 0;
let mut str_buf_sz = 0;
for syms in &syms_list {
sym_cnt += syms.len() + 1;
for sym in syms {
str_buf_sz += sym.name.len() + 1;
if let Some(fname) = sym.obj_file_name.as_ref() {
str_buf_sz += AsRef::<OsStr>::as_ref(fname).as_bytes().len() + 1;
}
}
}
let array_sz = ((mem::size_of::<*const u64>() * syms_list.len() + mem::size_of::<u64>() - 1)
% mem::size_of::<u64>())
* mem::size_of::<u64>();
let sym_buf_sz = mem::size_of::<blazesym_sym_info>() * sym_cnt;
let buf_size = array_sz + sym_buf_sz + str_buf_sz;
let raw_buf_with_sz =
unsafe { alloc(Layout::from_size_align(buf_size + mem::size_of::<u64>(), 8).unwrap()) };
unsafe { *(raw_buf_with_sz as *mut u64) = buf_size as u64 };
let raw_buf = unsafe { raw_buf_with_sz.add(mem::size_of::<u64>()) };
let mut syms_ptr = raw_buf as *mut *mut blazesym_sym_info;
let mut sym_ptr = unsafe { raw_buf.add(array_sz) } as *mut blazesym_sym_info;
let mut str_ptr = unsafe { raw_buf.add(array_sz + sym_buf_sz) } as *mut u8;
for syms in syms_list {
unsafe { *syms_ptr = sym_ptr };
for SymbolInfo {
name,
address,
size,
sym_type,
file_offset,
obj_file_name,
} in syms
{
let name_ptr = str_ptr as *const u8;
unsafe { ptr::copy_nonoverlapping(name.as_ptr(), str_ptr, name.len()) };
str_ptr = unsafe { str_ptr.add(name.len()) };
unsafe { *str_ptr = 0 };
str_ptr = unsafe { str_ptr.add(1) };
let obj_file_name = if let Some(fname) = obj_file_name.as_ref() {
let fname = AsRef::<OsStr>::as_ref(fname).as_bytes();
let obj_fname_ptr = str_ptr;
unsafe { ptr::copy_nonoverlapping(fname.as_ptr(), str_ptr, fname.len()) };
str_ptr = unsafe { str_ptr.add(fname.len()) };
unsafe { *str_ptr = 0 };
str_ptr = unsafe { str_ptr.add(1) };
obj_fname_ptr
} else {
ptr::null()
};
unsafe {
(*sym_ptr) = blazesym_sym_info {
name: name_ptr,
address,
size,
sym_type: match sym_type {
SymbolType::Function => blazesym_sym_type::SYM_T_FUNC,
SymbolType::Variable => blazesym_sym_type::SYM_T_VAR,
_ => blazesym_sym_type::SYM_T_UNKNOWN,
},
file_offset,
obj_file_name,
}
};
sym_ptr = unsafe { sym_ptr.add(1) };
}
unsafe {
(*sym_ptr) = blazesym_sym_info {
name: ptr::null(),
address: 0,
size: 0,
sym_type: blazesym_sym_type::SYM_T_UNKNOWN,
file_offset: 0,
obj_file_name: ptr::null(),
}
};
sym_ptr = unsafe { sym_ptr.add(1) };
syms_ptr = unsafe { syms_ptr.add(1) };
}
raw_buf as *const *const blazesym_sym_info
}
/// Convert SymbolInfos returned by BlazeSymbolizer::find_address_regex() to a C array.
unsafe fn convert_syms_to_c(syms: Vec<SymbolInfo>) -> *const blazesym_sym_info {
let mut str_buf_sz = 0;
for sym in &syms {
str_buf_sz += sym.name.len() + 1;
if let Some(fname) = sym.obj_file_name.as_ref() {
str_buf_sz += AsRef::<OsStr>::as_ref(fname).as_bytes().len() + 1;
}
}
let sym_buf_sz = mem::size_of::<blazesym_sym_info>() * (syms.len() + 1);
let buf_size = sym_buf_sz + str_buf_sz;
let raw_buf_with_sz =
unsafe { alloc(Layout::from_size_align(buf_size + mem::size_of::<u64>(), 8).unwrap()) };
unsafe { *(raw_buf_with_sz as *mut u64) = buf_size as u64 };
let raw_buf = unsafe { raw_buf_with_sz.add(mem::size_of::<u64>()) };
let mut sym_ptr = raw_buf as *mut blazesym_sym_info;
let mut str_ptr = unsafe { raw_buf.add(sym_buf_sz) } as *mut u8;
for sym in syms {
let SymbolInfo {
name,
address,
size,
sym_type,
file_offset,
obj_file_name,
} = sym;
let name_ptr = str_ptr as *const u8;
unsafe { ptr::copy_nonoverlapping(name.as_ptr(), str_ptr, name.len()) };
str_ptr = unsafe { str_ptr.add(name.len()) };
unsafe { *str_ptr = 0 };
str_ptr = unsafe { str_ptr.add(1) };
let obj_file_name = if let Some(fname) = obj_file_name.as_ref() {
let fname = AsRef::<OsStr>::as_ref(fname).as_bytes();
let obj_fname_ptr = str_ptr;
unsafe { ptr::copy_nonoverlapping(fname.as_ptr(), str_ptr, fname.len()) };
str_ptr = unsafe { str_ptr.add(fname.len()) };
unsafe { *str_ptr = 0 };
str_ptr = unsafe { str_ptr.add(1) };
obj_fname_ptr
} else {
ptr::null()
};
unsafe {
(*sym_ptr) = blazesym_sym_info {
name: name_ptr,
address,
size,
sym_type: match sym_type {
SymbolType::Function => blazesym_sym_type::SYM_T_FUNC,
SymbolType::Variable => blazesym_sym_type::SYM_T_VAR,
_ => blazesym_sym_type::SYM_T_UNKNOWN,
},
file_offset,
obj_file_name,
}
};
sym_ptr = unsafe { sym_ptr.add(1) };
}
unsafe {
(*sym_ptr) = blazesym_sym_info {
name: ptr::null(),
address: 0,
size: 0,
sym_type: blazesym_sym_type::SYM_T_UNKNOWN,
file_offset: 0,
obj_file_name: ptr::null(),
}
};
raw_buf as *const blazesym_sym_info
}
/// The types of symbols.
///
/// This type is used to choice what type of symbols you like to find
/// and indicate the types of symbols found.
#[repr(C)]
#[allow(non_camel_case_types, unused)]
#[derive(Copy, Clone)]
pub enum blazesym_sym_type {
/// Invalid type
SYM_T_INVALID,
/// You want to find a symbol of any type.
SYM_T_UNKNOWN,
/// The returned symbol is a function, or you want to find a function.
SYM_T_FUNC,
/// The returned symbol is a variable, or you want to find a variable.
SYM_T_VAR,
}
/// Feature names of looking up addresses of symbols.
#[repr(C)]
#[allow(non_camel_case_types, unused)]
pub enum blazesym_faf_type {
/// Invalid type
FAF_T_INVALID,
/// Return the offset in the file. (enable)
FAF_T_OFFSET_IN_FILE,
/// Return the file name of the shared object. (enable)
FAF_T_OBJ_FILE_NAME,
/// Return symbols having the given type. (sym_type)
FAF_T_SYMBOL_TYPE,
}
/// The parameter parts of `blazesym_faddr_feature`.
#[repr(C)]
pub union blazesym_faf_param {
enable: bool,
sym_type: blazesym_sym_type,
}
/// Switches and settings of features of looking up addresses of
/// symbols.
///
/// See [`FindAddrFeature`] for details.
#[repr(C)]
pub struct blazesym_faddr_feature {
ftype: blazesym_faf_type,
param: blazesym_faf_param,
}
unsafe fn convert_find_addr_features(
features: *const blazesym_faddr_feature,
num_features: usize,
) -> Vec<FindAddrFeature> {
let mut feature = features;
let mut features_ret = vec![];
for _ in 0..num_features {
match unsafe { &(*feature).ftype } {
blazesym_faf_type::FAF_T_SYMBOL_TYPE => {
features_ret.push(match unsafe { (*feature).param.sym_type } {
blazesym_sym_type::SYM_T_UNKNOWN => {
FindAddrFeature::SymbolType(SymbolType::Unknown)
}
blazesym_sym_type::SYM_T_FUNC => {
FindAddrFeature::SymbolType(SymbolType::Function)
}
blazesym_sym_type::SYM_T_VAR => {
FindAddrFeature::SymbolType(SymbolType::Variable)
}
_ => {
panic!("Invalid symbol type");
}
});
}
blazesym_faf_type::FAF_T_OFFSET_IN_FILE => {
features_ret.push(FindAddrFeature::OffsetInFile(unsafe {
(*feature).param.enable
}));
}
blazesym_faf_type::FAF_T_OBJ_FILE_NAME => {
features_ret.push(FindAddrFeature::ObjFileName(unsafe {
(*feature).param.enable
}));
}
_ => {
panic!("Unknown find_address feature type");
}
}
feature = unsafe { feature.add(1) };
}
features_ret
}
/// Find the addresses of symbols matching a pattern.
///
/// Return an array of `blazesym_sym_info` ending with an item having a null address.
/// input names. The caller should free the returned array by calling
/// [`blazesym_syms_free()`].
///
/// It works the same as [`blazesym_find_address_regex()`] with
/// additional controls on features.
///
/// # Safety
///
/// The returned pointer should be free by [`blazesym_syms_free()`].
///
#[no_mangle]
pub unsafe extern "C" fn blazesym_find_address_regex_opt(
symbolizer: *mut blazesym,
sym_srcs: *const sym_src_cfg,
sym_srcs_len: u32,
pattern: *const c_char,
features: *const blazesym_faddr_feature,
num_features: usize,
) -> *const blazesym_sym_info {
let sym_srcs_rs =
if let Some(sym_srcs_rs) = unsafe { symbolsrccfg_to_rust(sym_srcs, sym_srcs_len) } {
sym_srcs_rs
} else {
#[cfg(debug_assertions)]
eprintln!("Fail to transform configurations of symbolizer from C to Rust");
return ptr::null_mut();
};
let symbolizer = unsafe { &*(*symbolizer).symbolizer };
let pattern = unsafe { CStr::from_ptr(pattern) };
let features = unsafe { convert_find_addr_features(features, num_features) };
let syms =
{ symbolizer.find_address_regex_opt(&sym_srcs_rs, pattern.to_str().unwrap(), features) };
if syms.is_none() {
return ptr::null_mut();
}
unsafe { convert_syms_to_c(syms.unwrap()) }
}
/// Find the addresses of symbols matching a pattern.
///
/// Return an array of `blazesym_sym_info` ending with an item having a null address.
/// input names. The caller should free the returned array by calling
/// [`blazesym_syms_free()`].
///
/// # Safety
///
/// The returned pointer should be free by [`blazesym_syms_free()`].
///
#[no_mangle]
pub unsafe extern "C" fn blazesym_find_address_regex(
symbolizer: *mut blazesym,
sym_srcs: *const sym_src_cfg,
sym_srcs_len: u32,
pattern: *const c_char,
) -> *const blazesym_sym_info {
unsafe {
blazesym_find_address_regex_opt(symbolizer, sym_srcs, sym_srcs_len, pattern, ptr::null(), 0)
}
}
/// Free an array returned by blazesym_find_addr_regex() or
/// blazesym_find_addr_regex_opt().
///
/// # Safety
///
/// The `syms` pointer should have been allocated by one of the
/// `blazesym_find_address*` variants.
#[no_mangle]
pub unsafe extern "C" fn blazesym_syms_free(syms: *const blazesym_sym_info) {
if syms.is_null() {
#[cfg(debug_assertions)]
eprintln!("blazesym_sym_info_free(null)");
return;
}
let raw_buf_with_sz = unsafe { (syms as *mut u8).offset(-(mem::size_of::<u64>() as isize)) };
let sz = unsafe { *(raw_buf_with_sz as *mut u64) } as usize + mem::size_of::<u64>();
unsafe { dealloc(raw_buf_with_sz, Layout::from_size_align(sz, 8).unwrap()) };
}
/// Find the addresses of a list of symbols.
///
/// Return an array of `*const u64` with the same size as the
/// input names. The caller should free the returned array by calling
/// [`blazesym_syms_list_free()`].
///
/// Every name in the input name list may have more than one address.
/// The respective entry in the returned array is an array containing
/// all addresses and ended with a null (0x0).
///
/// # Safety
///
/// The returned pointer should be free by [`blazesym_syms_list_free()`].
///
#[no_mangle]
pub unsafe extern "C" fn blazesym_find_addresses_opt(
symbolizer: *mut blazesym,
sym_srcs: *const sym_src_cfg,
sym_srcs_len: u32,
names: *const *const c_char,
name_cnt: usize,
features: *const blazesym_faddr_feature,
num_features: usize,
) -> *const *const blazesym_sym_info {
let sym_srcs_rs =
if let Some(sym_srcs_rs) = unsafe { symbolsrccfg_to_rust(sym_srcs, sym_srcs_len) } {
sym_srcs_rs
} else {
#[cfg(debug_assertions)]
eprintln!("Fail to transform configurations of symbolizer from C to Rust");
return ptr::null_mut();
};
let symbolizer = unsafe { &*(*symbolizer).symbolizer };
let mut names_cstr = vec![];
for i in 0..name_cnt {
let name_c = unsafe { *names.add(i) };
let name_r = unsafe { CStr::from_ptr(name_c) };
names_cstr.push(name_r);
}
let features = unsafe { convert_find_addr_features(features, num_features) };
let syms = {
let mut names_r = vec![];
for name in names_cstr.iter().take(name_cnt) {
names_r.push(name.to_str().unwrap());
}
symbolizer.find_addresses_opt(&sym_srcs_rs, &names_r, features)
};
unsafe { convert_syms_list_to_c(syms) }
}
/// Find addresses of a symbol name.
///
/// A symbol may have multiple addressses.
///
/// # Safety
///
/// The returned data should be free by [`blazesym_syms_list_free()`].
///
#[no_mangle]
pub unsafe extern "C" fn blazesym_find_addresses(
symbolizer: *mut blazesym,
sym_srcs: *const sym_src_cfg,
sym_srcs_len: u32,
names: *const *const c_char,
name_cnt: usize,
) -> *const *const blazesym_sym_info {
unsafe {
blazesym_find_addresses_opt(
symbolizer,
sym_srcs,
sym_srcs_len,
names,
name_cnt,
ptr::null(),
0,
)
}
}
/// Free an array returned by blazesym_find_addresses.
///
/// # Safety
///
/// The pointer must be returned by [`blazesym_find_addresses()`].
///
#[no_mangle]
pub unsafe extern "C" fn blazesym_syms_list_free(syms_list: *const *const blazesym_sym_info) {
if syms_list.is_null() {
#[cfg(debug_assertions)]
eprintln!("blazesym_syms_list_free(null)");
return;
}
let raw_buf_with_sz =
unsafe { (syms_list as *mut u8).offset(-(mem::size_of::<u64>() as isize)) };
let sz = unsafe { *(raw_buf_with_sz as *mut u64) } as usize + mem::size_of::<u64>();
unsafe { dealloc(raw_buf_with_sz, Layout::from_size_align(sz, 8).unwrap()) };
}

1489
third_party/blazesym/src/dwarf.rs vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,64 @@
pub const DW_UT_compile: u8 = 0x1;
pub const DW_UT_type: u8 = 0x2;
pub const DW_TAG_array_type: u8 = 0x1;
pub const DW_TAG_enumeration_type: u8 = 0x4;
pub const DW_TAG_compile_unit: u8 = 0x11;
pub const DW_TAG_subprogram: u8 = 0x2e;
pub const DW_TAG_variable: u8 = 0x34;
pub const DW_TAG_namespace: u8 = 0x39;
pub const DW_CHILDREN_no: u8 = 0x00;
pub const DW_CHILDREN_yes: u8 = 0x01;
pub const DW_AT_sibling: u8 = 0x01;
pub const DW_AT_location: u8 = 0x02;
pub const DW_AT_name: u8 = 0x03;
pub const DW_AT_lo_pc: u8 = 0x11;
pub const DW_AT_hi_pc: u8 = 0x12;
pub const DW_AT_entry_pc: u8 = 0x52;
pub const DW_AT_linkage_name: u8 = 0x6e;
pub const DW_FORM_addr: u8 = 0x01;
pub const DW_FORM_block2: u8 = 0x03;
pub const DW_FORM_block4: u8 = 0x04;
pub const DW_FORM_data2: u8 = 0x05;
pub const DW_FORM_data4: u8 = 0x06;
pub const DW_FORM_data8: u8 = 0x07;
pub const DW_FORM_string: u8 = 0x08;
pub const DW_FORM_block: u8 = 0x09;
pub const DW_FORM_block1: u8 = 0x0a;
pub const DW_FORM_data1: u8 = 0x0b;
pub const DW_FORM_flag: u8 = 0x0c;
pub const DW_FORM_sdata: u8 = 0x0d;
pub const DW_FORM_strp: u8 = 0x0e;
pub const DW_FORM_udata: u8 = 0x0f;
pub const DW_FORM_ref_addr: u8 = 0x10;
pub const DW_FORM_ref1: u8 = 0x11;
pub const DW_FORM_ref2: u8 = 0x12;
pub const DW_FORM_ref4: u8 = 0x13;
pub const DW_FORM_ref8: u8 = 0x14;
pub const DW_FORM_ref_udata: u8 = 0x15;
pub const DW_FORM_indirect: u8 = 0x16;
pub const DW_FORM_sec_offset: u8 = 0x17;
pub const DW_FORM_exprloc: u8 = 0x18;
pub const DW_FORM_flag_present: u8 = 0x19;
pub const DW_FORM_strx: u8 = 0x1a;
pub const DW_FORM_addrx: u8 = 0x1b;
pub const DW_FORM_ref_sup4: u8 = 0x1c;
pub const DW_FORM_strp_sup: u8 = 0x1d;
pub const DW_FORM_data16: u8 = 0x1e;
pub const DW_FORM_line_strp: u8 = 0x1f;
pub const DW_FORM_ref_sig8: u8 = 0x20;
pub const DW_FORM_implicit_const: u8 = 0x21;
pub const DW_FORM_loclistx: u8 = 0x22;
pub const DW_FORM_rnglistx: u8 = 0x23;
pub const DW_FORM_ref_sup8: u8 = 0x24;
pub const DW_FORM_str1: u8 = 0x25;
pub const DW_FORM_str2: u8 = 0x26;
pub const DW_FORM_str3: u8 = 0x27;
pub const DW_FORM_str4: u8 = 0x28;
pub const DW_FORM_addrx1: u8 = 0x29;
pub const DW_FORM_addrx2: u8 = 0x2a;
pub const DW_FORM_addrx3: u8 = 0x2b;
pub const DW_FORM_addrx4: u8 = 0x2c;

View File

@@ -0,0 +1,869 @@
//! Parse the `.debug_info` section to get Debug Information Entries.
//!
//! It supports DWARFv4 now. (See <https://dwarfstd.org/doc/DWARF4.pdf>)
//! It parse DIEs from the `.debug_info` section and Abbreviations
//! from the `.debg_abbrev` section.
//!
//! The `.debug_info` section is a list of (Compile-)Units. Every
//! Unit comprises DIEs to carry debug information of a source file.
//! A Unit starts with a header to describe the size of this unit in
//! the section, the offset of its abbreviation table in the
//! `.debug_abbrev` section, ..., and DWARF version. (version 4)
//!
//! A DIE starts with an index encoded in LEB128 to the abbreviation
//! table of the Unit. The abbreviation given by the index describle
//! the content, format and layout of a DIE. The abbreviation index
//! starts from 1. 0 means a null entry. DIEs in an Unit are
//! organized as a tree, parent-children. Null entries are used to
//! signal the last sibling to end a level.
//!
//! A user starts a parser by creating an instance of [`UnitIter`].
//! It will walk through the data in the `.debug_info` and
//! `.debug_abbrev` section to return Units.
use std::io::{Error, ErrorKind};
use std::iter::Iterator;
use std::mem;
use crate::util::decode_leb128_128;
use crate::util::decode_udword;
use crate::util::decode_uhalf;
use crate::util::decode_uword;
use crate::util::ReadRaw as _;
use super::constants::*;
fn read_3bytes(data: &mut &[u8]) -> Option<u32> {
if let [b1, b2, b3] = data.read_slice(3)? {
Some(*b1 as u32 | ((*b2 as u32) << 8) | ((*b3 as u32) << 16))
} else {
unreachable!()
}
}
#[allow(unused)]
pub struct UnknownHeader {
init_length: usize,
bits64: bool,
version: u16,
unit_type: u8,
hdr_size: usize,
}
#[allow(unused)]
pub struct CUHeaderV5 {
init_length: usize,
bits64: bool,
version: u16,
unit_type: u8,
address_size: u8,
debug_abbrev_offset: u64,
hdr_size: usize,
}
#[allow(unused)]
pub struct CUHeaderV4 {
init_length: usize,
bits64: bool,
version: u16,
address_size: u8,
debug_abbrev_offset: u64, // The location of the abbreviation table.
hdr_size: usize,
}
/// The Unit header.
///
/// With DWARFv4, an unit header describe a compile unit followed by
/// DIEs of the unit in the `.debug_info` section. DWARFv5 is much
/// more complicated.
///
/// So far, BlazeSym supports only DWARFv4, that is common used.
pub enum UnitHeader {
CompileV4(CUHeaderV4),
CompileV5(CUHeaderV5),
Unknown(UnknownHeader),
}
impl UnitHeader {
fn unit_size(&self) -> usize {
match self {
UnitHeader::CompileV4(h) => h.init_length + (if h.bits64 { 12 } else { 4 }),
UnitHeader::CompileV5(h) => h.init_length + (if h.bits64 { 12 } else { 4 }),
UnitHeader::Unknown(h) => h.init_length + (if h.bits64 { 12 } else { 4 }),
}
}
fn header_size(&self) -> usize {
match self {
UnitHeader::CompileV4(h) => h.hdr_size,
UnitHeader::CompileV5(h) => h.hdr_size,
UnitHeader::Unknown(h) => h.hdr_size,
}
}
}
#[derive(Clone)]
pub struct AbbrevAttr {
name: u8,
form: u8,
opt: u128,
}
/// An abbreviation.
///
/// An abbrivation describes the format of a DIE. it comprises a list
/// of specifications that describe the names and the formats of
/// attributes. A DIE will be formated in the way described by it's
/// abbreviation.
pub struct Abbrev {
/// The index to the abbreviation table.
pub abbrev_code: u32,
/// The type of the abbreviation.
///
/// It can be a DW_TAG_compile (a compile unit),
/// DW_TAG_subprogram, DW_TAG_variable, ... etc.
pub tag: u8,
pub has_children: bool,
parsed_attrs: Vec<AbbrevAttr>,
}
impl Abbrev {
#[inline]
pub fn all_attrs(&self) -> &[AbbrevAttr] {
&self.parsed_attrs[..]
}
}
/// Parse an abbreviation from a buffer.
///
/// Include all attributes, names and forms.
#[inline]
fn parse_abbrev(data: &[u8]) -> Option<(Abbrev, usize)> {
let (abbrev_code, bytes) = decode_leb128_128(data)?;
if abbrev_code == 0 {
return Some((
Abbrev {
abbrev_code: 0,
tag: 0,
has_children: false,
parsed_attrs: vec![],
},
1,
));
}
let mut pos = bytes as usize;
let (tag, bytes) = decode_leb128_128(&data[pos..])?;
pos += bytes as usize;
let has_children = data[pos] == DW_CHILDREN_yes;
pos += 1;
let mut parsed_attrs = Vec::<AbbrevAttr>::new();
while pos < data.len() {
if let Some((name, form, opt, bytes)) = parse_abbrev_attr(&data[pos..]) {
pos += bytes;
parsed_attrs.push(AbbrevAttr { name, form, opt });
if form == 0 {
break;
}
} else {
break;
}
}
Some((
Abbrev {
abbrev_code: abbrev_code as u32,
tag: tag as u8,
has_children,
parsed_attrs,
},
pos,
))
}
/// Parse an attribute specification from a buffer.
///
/// Return the name, form, optional value and size of an abbreviation.
#[inline]
fn parse_abbrev_attr(data: &[u8]) -> Option<(u8, u8, u128, usize)> {
let mut pos = 0; // Track the size of this abbreviation.
let (name, bytes) = decode_leb128_128(&data[pos..])?;
pos += bytes as usize;
let (form, bytes) = decode_leb128_128(&data[pos..])?;
pos += bytes as usize;
let opt = if form as u8 == DW_FORM_implicit_const || form as u8 == DW_FORM_indirect {
let (c, bytes) = decode_leb128_128(&data[pos..])?;
pos += bytes as usize;
c
} else {
0
};
Some((name as u8, form as u8, opt, pos))
}
#[derive(Clone, Debug)]
pub enum AttrValue<'a> {
Signed128(i128),
Unsigned(u64),
Unsigned128(u128),
Bytes(&'a [u8]),
String(&'a str),
}
fn extract_attr_value_impl<'data>(
data: &mut &'data [u8],
form: u8,
dwarf_sz: usize,
addr_sz: usize,
) -> Option<AttrValue<'data>> {
match form {
DW_FORM_addr => {
if addr_sz == 0x4 {
Some(AttrValue::Unsigned(data.read_u32()? as u64))
} else {
Some(AttrValue::Unsigned(data.read_u64()?))
}
}
DW_FORM_block2 => {
let value = data.read_u16()?;
Some(AttrValue::Bytes(data.read_slice(value.into())?))
}
DW_FORM_block4 => {
let value = data.read_u32()?;
Some(AttrValue::Bytes(data.read_slice(value as usize)?))
}
DW_FORM_data2 => Some(AttrValue::Unsigned(data.read_u16()?.into())),
DW_FORM_data4 => Some(AttrValue::Unsigned(data.read_u32()?.into())),
DW_FORM_data8 => Some(AttrValue::Unsigned(data.read_u64()?)),
DW_FORM_string => {
let string = data.read_cstr()?;
Some(AttrValue::String(string.to_str().ok()?))
}
DW_FORM_block => {
let (value, _bytes) = data.read_u128_leb128()?;
Some(AttrValue::Bytes(data.read_slice(value as usize)?))
}
DW_FORM_block1 => {
let value = data.read_u8()?;
Some(AttrValue::Bytes(data.read_slice(value.into())?))
}
DW_FORM_data1 => Some(AttrValue::Unsigned(data.read_u8()?.into())),
DW_FORM_flag => Some(AttrValue::Unsigned(data.read_u8()?.into())),
DW_FORM_sdata => {
let (value, _bytes) = data.read_i128_leb128()?;
Some(AttrValue::Signed128(value))
}
DW_FORM_strp => {
if dwarf_sz == 0x4 {
Some(AttrValue::Unsigned(data.read_u32()?.into()))
} else {
Some(AttrValue::Unsigned(data.read_u64()?))
}
}
DW_FORM_udata => {
let (value, _bytes) = data.read_u128_leb128()?;
Some(AttrValue::Unsigned128(value))
}
DW_FORM_ref_addr => {
if dwarf_sz == 0x4 {
Some(AttrValue::Unsigned(data.read_u32()?.into()))
} else {
Some(AttrValue::Unsigned(data.read_u64()?))
}
}
DW_FORM_ref1 => Some(AttrValue::Unsigned(data.read_u8()?.into())),
DW_FORM_ref2 => Some(AttrValue::Unsigned(data.read_u16()?.into())),
DW_FORM_ref4 => Some(AttrValue::Unsigned(data.read_u32()?.into())),
DW_FORM_ref8 => Some(AttrValue::Unsigned(data.read_u64()?)),
DW_FORM_ref_udata => {
let (value, _bytes) = data.read_u128_leb128()?;
Some(AttrValue::Unsigned128(value))
}
DW_FORM_indirect => {
let (f, _bytes) = data.read_u128_leb128()?;
extract_attr_value_impl(data, f as u8, dwarf_sz, addr_sz)
}
DW_FORM_sec_offset => {
if dwarf_sz == 0x4 {
Some(AttrValue::Unsigned(data.read_u32()?.into()))
} else {
Some(AttrValue::Unsigned(data.read_u64()?))
}
}
DW_FORM_exprloc => {
let (value, _bytes) = data.read_u128_leb128()?;
Some(AttrValue::Bytes(data.read_slice(value as usize)?))
}
DW_FORM_flag_present => Some(AttrValue::Unsigned(0)),
DW_FORM_strx => {
let (value, _bytes) = data.read_u128_leb128()?;
Some(AttrValue::Unsigned(value as u64))
}
DW_FORM_addrx => {
let (value, _bytes) = data.read_u128_leb128()?;
Some(AttrValue::Unsigned(value as u64))
}
DW_FORM_ref_sup4 => Some(AttrValue::Unsigned(data.read_u32()?.into())),
DW_FORM_strp_sup => {
if dwarf_sz == 0x4 {
Some(AttrValue::Unsigned(data.read_u32()?.into()))
} else {
Some(AttrValue::Unsigned(data.read_u64()?))
}
}
DW_FORM_data16 => Some(AttrValue::Bytes(data.read_slice(16)?)),
DW_FORM_line_strp => {
if dwarf_sz == 0x4 {
Some(AttrValue::Unsigned(data.read_u32()?.into()))
} else {
Some(AttrValue::Unsigned(data.read_u64()?))
}
}
DW_FORM_ref_sig8 => Some(AttrValue::Bytes(data.read_slice(8)?)),
DW_FORM_implicit_const => Some(AttrValue::Unsigned(0)),
DW_FORM_loclistx => {
let (value, _bytes) = data.read_u128_leb128()?;
Some(AttrValue::Unsigned(value as u64))
}
DW_FORM_rnglistx => {
let (value, _bytes) = data.read_u128_leb128()?;
Some(AttrValue::Unsigned(value as u64))
}
DW_FORM_ref_sup8 => Some(AttrValue::Unsigned(data.read_u64()?)),
DW_FORM_str1 => Some(AttrValue::Unsigned(data.read_u8()?.into())),
DW_FORM_str2 => Some(AttrValue::Unsigned(data.read_u16()?.into())),
DW_FORM_str3 => Some(AttrValue::Unsigned(read_3bytes(data)?.into())),
DW_FORM_str4 => Some(AttrValue::Unsigned(data.read_u32()?.into())),
DW_FORM_addrx1 => Some(AttrValue::Unsigned(data.read_u8()?.into())),
DW_FORM_addrx2 => Some(AttrValue::Unsigned(data.read_u16()?.into())),
DW_FORM_addrx3 => Some(AttrValue::Unsigned(read_3bytes(data)?.into())),
DW_FORM_addrx4 => Some(AttrValue::Unsigned(data.read_u32()?.into())),
_ => None,
}
}
/// Extract the value of an attribute from a data buffer.
///
/// This function works with [`parse_abbrev_attr()`], that parse the
/// attribute specifications of DIEs delcared in the abbreviation
/// table in the .debug_abbrev section, by using the result of
/// [`parse_abbrev_attr()`] to parse the value of an attribute of a
/// DIE.
///
/// # Arguments
///
/// * `data` - A buffer where the value is in.
/// * `form` - The format of the value. (DW_FORM_*)
/// * `dwarf_sz` - Describe the DWARF format. (4 for 32-bits and 8 for 64-bits)
/// * `addr_sz` - The size of an address of the target platform. (4 for 32-bits and 8 for 64-bits)
///
/// Return AttrValue and the number of bytes it takes.
fn extract_attr_value(
mut data: &[u8],
form: u8,
dwarf_sz: usize,
addr_sz: usize,
) -> Option<(AttrValue<'_>, usize)> {
let data = &mut data;
let before = (*data).as_ptr();
let value = extract_attr_value_impl(data, form, dwarf_sz, addr_sz)?;
let after = (*data).as_ptr();
// TODO: Remove this workaround once callers no longer require an explicit
// byte count being passed out.
// SAFETY: Both pointers point into the same underlying byte array.
let count = unsafe { after.offset_from(before) };
Some((value, count.try_into().unwrap()))
}
/// Parse all abbreviations of an abbreviation table for a compile
/// unit.
///
/// An abbreviation table is usually for a compile unit, but not always.
///
/// Return a list of abbreviations and the number of bytes they take.
fn parse_cu_abbrevs(data: &[u8]) -> Option<(Vec<Abbrev>, usize)> {
let mut pos = 0;
let mut abbrevs = Vec::<Abbrev>::with_capacity(data.len() / 50); // Heuristic!
while pos < data.len() {
let (abbrev, bytes) = parse_abbrev(&data[pos..])?;
pos += bytes;
if abbrev.abbrev_code == 0x0 {
return Some((abbrevs, pos));
}
abbrevs.push(abbrev);
}
None
}
/// Parse an Unit Header from a buffer.
///
/// An Unit Header is the header of a compile unit, at leat for v4.
///
/// # Arguments
///
/// * `data` - is the data from the `.debug_info` section.
fn parse_unit_header(data: &[u8]) -> Option<UnitHeader> {
if data.len() < 4 {
return None;
}
let mut pos = 0;
let mut init_length = decode_uword(data) as usize;
pos += 4;
let bits64 = init_length == 0xffffffff;
if bits64 {
if (pos + 8) > data.len() {
return None;
}
init_length = decode_udword(&data[pos..]) as usize;
pos += 8;
}
if (pos + 2) > data.len() {
return None;
}
let version = decode_uhalf(&data[pos..]);
pos += 2;
if version == 0x4 {
let debug_abbrev_offset: u64 = if bits64 {
if (pos + 8) > data.len() {
return None;
}
let v = decode_udword(&data[pos..]);
pos += 8;
v
} else {
if (pos + 4) > data.len() {
return None;
}
let v = decode_uword(&data[pos..]);
pos += 4;
v as u64
};
let address_size = data[pos];
pos += 1;
return Some(UnitHeader::CompileV4(CUHeaderV4 {
init_length,
bits64,
version,
debug_abbrev_offset,
address_size,
hdr_size: pos,
}));
}
if (pos + 1) > data.len() {
return None;
}
let unit_type = data[pos];
pos += 1;
match unit_type {
DW_UT_compile => {
if (pos + 1) > data.len() {
return None;
}
let address_size = data[pos];
pos += 1;
let debug_abbrev_offset: u64 = if bits64 {
if (pos + 8) > data.len() {
return None;
}
let v = decode_udword(&data[pos..]);
pos += 8;
v
} else {
if (pos + 4) > data.len() {
return None;
}
let v = decode_uword(&data[pos..]);
pos += 4;
v as u64
};
Some(UnitHeader::CompileV5(CUHeaderV5 {
init_length,
bits64,
version,
unit_type,
address_size,
debug_abbrev_offset,
hdr_size: pos,
}))
}
_ => Some(UnitHeader::Unknown(UnknownHeader {
init_length,
bits64,
version,
unit_type,
hdr_size: pos,
})),
}
}
/// Debug Information Entry.
///
/// A DIE starts with the code of its abbreviation followed by the
/// attribute values described by the abbreviation. The code of an
/// abbreviation is an index to the abbreviation table of the compile
/// unit.
#[allow(clippy::upper_case_acronyms)]
pub struct DIE<'a> {
pub tag: u8,
pub abbrev: Option<&'a Abbrev>,
abbrev_attrs: &'a [AbbrevAttr],
abbrev_attrs_idx: usize,
data: &'a [u8],
dieiter: &'a mut DIEIter<'a>,
reading_offset: usize,
done: bool,
}
impl<'a> DIE<'a> {
#[inline]
pub fn exhaust(&mut self) -> Result<(), Error> {
let abbrev_attrs = self.abbrev_attrs;
if self.done {
return Ok(());
}
while self.abbrev_attrs_idx < abbrev_attrs.len() {
let attr = &abbrev_attrs[self.abbrev_attrs_idx];
self.abbrev_attrs_idx += 1;
if attr.form == 0 {
continue;
}
let (_value, bytes) = extract_attr_value(
&self.data[self.reading_offset..],
attr.form,
self.dieiter.dwarf_sz,
self.dieiter.addr_sz,
)
.ok_or_else(|| {
Error::new(ErrorKind::InvalidData, "failed to parse attribute values")
})?;
self.reading_offset += bytes;
}
self.dieiter.die_finish_reading(self.reading_offset);
self.done = true;
Ok(())
}
}
impl<'a> Iterator for DIE<'a> {
// name, form, opt, value
type Item = (u8, u8, u128, AttrValue<'a>);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.done {
return None;
}
self.abbrev?;
if self.abbrev_attrs_idx < self.abbrev_attrs.len() {
let AbbrevAttr { name, form, opt } = self.abbrev_attrs[self.abbrev_attrs_idx];
self.abbrev_attrs_idx += 1;
#[cfg(debug)]
if form == 0 {
assert_eq!(self.abbrev_off, abbrev.attrs.len());
}
if form == 0 {
self.dieiter.die_finish_reading(self.reading_offset);
self.done = true;
return None;
}
let (value, bytes) = extract_attr_value(
&self.data[self.reading_offset..],
form,
self.dieiter.dwarf_sz,
self.dieiter.addr_sz,
)?;
self.reading_offset += bytes;
Some((name, form, opt, value))
} else {
self.dieiter.die_finish_reading(self.reading_offset);
self.done = true;
None
}
}
}
/// The iterator of DIEs in an Unit.
pub struct DIEIter<'a> {
data: &'a [u8],
dwarf_sz: usize,
addr_sz: usize,
off: usize,
off_delta: usize,
cur_depth: usize,
abbrevs: Vec<Abbrev>,
abbrev: Option<&'a Abbrev>,
die_reading_done: bool,
done: bool,
}
impl<'a> DIEIter<'a> {
pub fn die_finish_reading(&mut self, size: usize) {
self.die_reading_done = true;
self.off += size;
}
pub fn seek_to_sibling(&mut self, off: usize) {
self.off = off - self.off_delta;
self.cur_depth -= 1;
self.die_reading_done = true;
}
#[inline]
pub fn exhaust_die(&mut self) -> Result<(), Error> {
assert!(
!self.die_reading_done,
"DIE should not have been exhausted!"
);
let abbrev = self.abbrev.unwrap();
for attr in abbrev.all_attrs() {
if attr.form == 0 {
continue;
}
let (_value, bytes) = extract_attr_value(
&self.data[self.off..],
attr.form,
self.dwarf_sz,
self.addr_sz,
)
.ok_or_else(|| {
Error::new(ErrorKind::InvalidData, "failed to parse attribute values")
})?;
self.off += bytes;
}
self.die_reading_done = true;
Ok(())
}
}
impl<'a> Iterator for DIEIter<'a> {
type Item = DIE<'a>;
fn next(&mut self) -> Option<Self::Item> {
if !self.die_reading_done {
self.exhaust_die().unwrap();
}
if self.done {
return None;
}
let (abbrev_idx, bytes) = decode_leb128_128(&self.data[self.off..])?;
self.off += bytes as usize;
if abbrev_idx == 0 {
self.cur_depth -= 1;
if self.cur_depth == 0 {
self.done = true;
}
Some(DIE {
tag: 0,
abbrev: None,
abbrev_attrs: &[],
abbrev_attrs_idx: 0,
data: &self.data[self.off..],
dieiter: unsafe { mem::transmute(self) },
reading_offset: 0,
done: false,
})
} else {
let abbrev = unsafe { mem::transmute(&self.abbrevs[abbrev_idx as usize - 1]) };
self.abbrev = Some(abbrev);
if abbrev.has_children {
self.cur_depth += 1;
}
self.die_reading_done = false;
Some(DIE {
tag: abbrev.tag,
abbrev: Some(abbrev),
abbrev_attrs: abbrev.all_attrs(),
abbrev_attrs_idx: 0,
data: &self.data[self.off..],
dieiter: unsafe { mem::transmute(self) },
reading_offset: 0,
done: false,
})
}
}
}
/// An iterator of Units in the `.debug_info` section.
///
/// An iterator is built from the content of `.debug_info` section,
/// which is a list of compile units. A compile unit usually refers
/// to a source file. In the compile units, it is a forest of DIEs,
/// which presents functions, variables and other debug information.
pub struct UnitIter<'a> {
info_data: &'a [u8],
abbrev_data: &'a [u8],
off: usize,
}
impl<'a> UnitIter<'a> {
/// Build an iterator from the content of `.debug_info` & `.debug_abbrev`.
///
/// # Arguments
///
/// * `info_data` is the content of the `.debug_info` section.
/// * `abbrev_data` is the content of the `.debug_abbrev` section.
pub fn new(info_data: &'a [u8], abbrev_data: &'a [u8]) -> UnitIter<'a> {
UnitIter {
info_data,
abbrev_data,
off: 0,
}
}
}
impl<'a> Iterator for UnitIter<'a> {
type Item = (UnitHeader, DIEIter<'a>);
fn next(&mut self) -> Option<Self::Item> {
let off = self.off;
let uh = parse_unit_header(&self.info_data[off..])?;
let hdr_sz = uh.header_size();
self.off += uh.unit_size();
match uh {
UnitHeader::CompileV4(ref cuh) => {
let dwarf_sz = if cuh.bits64 { 8 } else { 4 };
let addr_sz = cuh.address_size as usize;
let (abbrevs, _) =
parse_cu_abbrevs(&self.abbrev_data[cuh.debug_abbrev_offset as usize..])?;
Some((
uh,
DIEIter {
data: &self.info_data[off + hdr_sz..],
dwarf_sz,
addr_sz,
off: 0,
off_delta: hdr_sz,
cur_depth: 0,
abbrevs,
abbrev: None,
die_reading_done: true,
done: false,
},
))
}
UnitHeader::CompileV5(ref _cuh) => {
todo!(); // BlazeSym supports only v4 so far.
}
_ => self.next(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::elf::ElfParser;
use std::env;
use std::path::Path;
#[test]
fn test_parse_abbrev() {
let raw = [
0x01, 0x11, 0x01, 0x25, 0x0e, 0x13, 0x05, 0x03, 0x0e, 0x10, 0x17, 0x1b, 0x0e, 0xb4,
0x42, 0x19, 0x11, 0x01, 0x55, 0x17, 0x00, 0x00, 0x02, 0x39, 0x01, 0x03, 0x0e, 0x00,
0x00, 0x03, 0x04, 0x01, 0x49, 0x13, 0x6d, 0x19, 0x03, 0x0e, 0x0b, 0x0b, 0x88, 0x01,
0x0f, 0x00, 0x00, 0x04, 0x28, 0x00, 0x03, 0x0e, 0x1c, 0x0f, 0x00, 0x00, 0x05, 0x13,
0x01, 0x03, 0x0e, 0x0b, 0x0b, 0x88, 0x01, 0x0f, 0x00, 0x00,
];
let (abbrev, bytes) = parse_abbrev(&raw).unwrap();
assert_eq!(bytes, 22);
assert_eq!(abbrev.abbrev_code, 0x1);
assert_eq!(abbrev.tag, DW_TAG_compile_unit);
assert!(abbrev.has_children);
let mut pos = bytes;
let (abbrev, bytes) = parse_abbrev(&raw[pos..]).unwrap();
assert_eq!(bytes, 7);
assert_eq!(abbrev.abbrev_code, 0x2);
assert_eq!(abbrev.tag, DW_TAG_namespace);
assert!(abbrev.has_children);
pos += bytes;
let (abbrev, bytes) = parse_abbrev(&raw[pos..]).unwrap();
assert_eq!(bytes, 16);
assert_eq!(abbrev.abbrev_code, 0x3);
assert_eq!(abbrev.tag, DW_TAG_enumeration_type);
assert!(abbrev.has_children);
pos += bytes;
let (abbrev, bytes) = parse_abbrev(&raw[pos..]).unwrap();
assert_eq!(bytes, 9);
assert_eq!(abbrev.abbrev_code, 0x4);
assert!(!abbrev.has_children);
pos += bytes;
let (abbrev, bytes) = parse_abbrev(&raw[pos..]).unwrap();
assert_eq!(bytes, 12);
assert_eq!(abbrev.abbrev_code, 0x5);
assert!(abbrev.has_children);
}
#[test]
fn test_parse_cu_abbrevs() {
let raw = [
0x01, 0x11, 0x01, 0x25, 0x0e, 0x13, 0x05, 0x03, 0x0e, 0x10, 0x17, 0x1b, 0x0e, 0xb4,
0x42, 0x19, 0x11, 0x01, 0x55, 0x17, 0x00, 0x00, 0x02, 0x39, 0x01, 0x03, 0x0e, 0x00,
0x00, 0x03, 0x04, 0x01, 0x49, 0x13, 0x6d, 0x19, 0x03, 0x0e, 0x0b, 0x0b, 0x88, 0x01,
0x0f, 0x00, 0x00, 0x04, 0x28, 0x00, 0x03, 0x0e, 0x1c, 0x0f, 0x00, 0x00, 0x05, 0x13,
0x01, 0x03, 0x0e, 0x0b, 0x0b, 0x88, 0x01, 0x0f, 0x00, 0x00, 0x00,
];
let (abbrevs, bytes) = parse_cu_abbrevs(&raw).unwrap();
assert_eq!(abbrevs.len(), 0x5);
assert_eq!(bytes, raw.len());
}
#[test]
fn test_unititer() {
let bin_name = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("dwarf-example");
let elfparser = ElfParser::open(&bin_name).unwrap();
let abbrev_idx = elfparser.find_section(".debug_abbrev").unwrap();
let abbrev = elfparser.read_section_raw(abbrev_idx).unwrap();
let info_idx = elfparser.find_section(".debug_info").unwrap();
let info = elfparser.read_section_raw(info_idx).unwrap();
let iter = UnitIter::new(&info, &abbrev);
let mut cnt = 0;
let mut die_cnt = 0;
let mut attr_cnt = 0;
let mut subprog_cnt = 0;
for (_uh, dieiter) in iter {
cnt += 1;
for die in dieiter {
die_cnt += 1;
if die.tag == DW_TAG_subprogram {
subprog_cnt += 1;
}
for (_name, _form, _opt, _value) in die {
attr_cnt += 1;
}
}
}
assert_eq!(cnt, 9);
assert_eq!(die_cnt, 78752);
assert_eq!(subprog_cnt, 12451);
assert_eq!(attr_cnt, 275310);
}
}

312
third_party/blazesym/src/elf/cache.rs vendored Normal file
View File

@@ -0,0 +1,312 @@
use std::cell::RefCell;
use std::collections::HashMap;
use std::fs::File;
use std::io::Error;
use std::path::{Path, PathBuf};
use std::ptr;
use std::rc::Rc;
use nix::sys::stat::{fstat, FileStat};
use std::os::unix::io::AsRawFd;
use crate::dwarf::DwarfResolver;
use super::ElfParser;
type ElfCacheEntryKey = PathBuf;
const DFL_CACHE_MAX: usize = 1024;
#[derive(Clone)]
pub enum ElfBackend {
Dwarf(Rc<DwarfResolver>), // ELF w/ DWARF
Elf(Rc<ElfParser>), // ELF w/o DWARF
}
#[cfg(test)]
impl ElfBackend {
pub fn to_dwarf(&self) -> Option<Rc<DwarfResolver>> {
if let Self::Dwarf(dwarf) = self {
Some(Rc::clone(dwarf))
} else {
None
}
}
pub fn is_dwarf(&self) -> bool {
matches!(self, Self::Dwarf(_))
}
}
struct ElfCacheEntry {
// LRU links
prev: *mut ElfCacheEntry,
next: *mut ElfCacheEntry,
file_name: PathBuf,
dev: libc::dev_t,
inode: libc::ino_t,
size: libc::off_t,
mtime_sec: libc::time_t,
mtime_nsec: i64,
backend: ElfBackend,
}
impl ElfCacheEntry {
pub fn new(
file_name: &Path,
file: File,
line_number_info: bool,
debug_info_symbols: bool,
) -> Result<ElfCacheEntry, Error> {
let stat = fstat(file.as_raw_fd())?;
let parser = Rc::new(ElfParser::open_file(file)?);
let backend = if let Ok(dwarf) = DwarfResolver::from_parser_for_addresses(
Rc::clone(&parser),
&[],
line_number_info,
debug_info_symbols,
) {
ElfBackend::Dwarf(Rc::new(dwarf))
} else {
ElfBackend::Elf(parser)
};
Ok(ElfCacheEntry {
prev: ptr::null_mut(),
next: ptr::null_mut(),
file_name: file_name.to_path_buf(),
dev: stat.st_dev,
inode: stat.st_ino,
size: stat.st_size,
mtime_sec: stat.st_mtime,
mtime_nsec: stat.st_mtime_nsec,
backend,
})
}
fn get_key(&self) -> &Path {
&self.file_name
}
fn is_valid(&self, stat: &FileStat) -> bool {
stat.st_dev == self.dev
&& stat.st_ino == self.inode
&& stat.st_size == self.size
&& stat.st_mtime == self.mtime_sec
&& stat.st_mtime_nsec == self.mtime_nsec
}
fn get_backend(&self) -> ElfBackend {
self.backend.clone()
}
}
/// Maintain a LRU linked list of entries
struct ElfCacheLru {
head: *mut ElfCacheEntry,
tail: *mut ElfCacheEntry,
}
impl ElfCacheLru {
/// # Safety
///
/// Make all entries are valid.
unsafe fn touch(&mut self, ent: &ElfCacheEntry) {
unsafe { self.remove(ent) };
unsafe { self.push_back(ent) };
}
/// # Safety
///
/// Make all entries are valid.
unsafe fn remove(&mut self, ent: &ElfCacheEntry) {
let ent_ptr = ent as *const ElfCacheEntry as *mut ElfCacheEntry;
let prev = unsafe { (*ent_ptr).prev };
let next = unsafe { (*ent_ptr).next };
if !prev.is_null() {
unsafe { (*prev).next = next };
} else {
self.head = next;
}
if !next.is_null() {
unsafe { (*next).prev = prev };
} else {
self.tail = prev;
}
}
/// # Safety
///
/// Make all entries are valid.
unsafe fn push_back(&mut self, ent: &ElfCacheEntry) {
let ent_ptr = ent as *const ElfCacheEntry as *mut ElfCacheEntry;
if self.head.is_null() {
unsafe { (*ent_ptr).next = ptr::null_mut() };
unsafe { (*ent_ptr).prev = ptr::null_mut() };
self.head = ent_ptr;
self.tail = ent_ptr;
} else {
unsafe { (*ent_ptr).next = ptr::null_mut() };
unsafe { (*self.tail).next = ent_ptr };
unsafe { (*ent_ptr).prev = self.tail };
self.tail = ent_ptr;
}
}
/// # Safety
///
/// Make all entries are valid.
unsafe fn pop_head(&mut self) -> *mut ElfCacheEntry {
let ent = self.head;
if !ent.is_null() {
unsafe { self.remove(&*ent) };
}
ent
}
}
struct _ElfCache {
elfs: HashMap<ElfCacheEntryKey, Box<ElfCacheEntry>>,
lru: ElfCacheLru,
max_elfs: usize,
line_number_info: bool,
debug_info_symbols: bool,
}
impl _ElfCache {
fn new(line_number_info: bool, debug_info_symbols: bool) -> _ElfCache {
_ElfCache {
elfs: HashMap::new(),
lru: ElfCacheLru {
head: ptr::null_mut(),
tail: ptr::null_mut(),
},
max_elfs: DFL_CACHE_MAX,
line_number_info,
debug_info_symbols,
}
}
/// # Safety
///
/// The returned reference is only valid before next time calling
/// create_entry().
///
unsafe fn find_entry(&mut self, file_name: &Path) -> Option<&ElfCacheEntry> {
let ent = self.elfs.get(file_name)?;
unsafe { self.lru.touch(ent) };
Some(ent.as_ref())
}
/// # Safety
///
/// The returned reference is only valid before next time calling
/// create_entry().
///
unsafe fn create_entry(
&mut self,
file_name: &Path,
file: File,
) -> Result<&ElfCacheEntry, Error> {
let ent = Box::new(ElfCacheEntry::new(
file_name,
file,
self.line_number_info,
self.debug_info_symbols,
)?);
let key = ent.get_key().to_path_buf();
self.elfs.insert(key.clone(), ent);
unsafe { self.lru.push_back(self.elfs.get(&key).unwrap().as_ref()) };
unsafe { self.ensure_size() };
Ok(unsafe { &*self.lru.tail }) // Get 'static lifetime
}
/// # Safety
///
/// This funciton may make some cache entries invalid. Callers
/// should be careful about all references of cache entries they
/// are holding.
unsafe fn ensure_size(&mut self) {
if self.elfs.len() > self.max_elfs {
let to_remove = unsafe { self.lru.pop_head() };
self.elfs.remove(unsafe { (*to_remove).get_key() }).unwrap();
}
}
fn find_or_create_backend(
&mut self,
file_name: &Path,
file: File,
) -> Result<ElfBackend, Error> {
if let Some(ent) = unsafe { self.find_entry(file_name) } {
let stat = fstat(file.as_raw_fd())?;
if ent.is_valid(&stat) {
return Ok(ent.get_backend());
}
// Purge the entry and load it from the filesystem.
unsafe {
let ent = &*(ent as *const ElfCacheEntry); // static lifetime to decouple borrowing
self.lru.remove(ent)
};
self.elfs.remove(file_name);
}
Ok(unsafe { self.create_entry(file_name, file)? }.get_backend())
}
pub fn find(&mut self, path: &Path) -> Result<ElfBackend, Error> {
let file = File::open(path)?;
self.find_or_create_backend(path, file)
}
}
pub struct ElfCache {
cache: RefCell<_ElfCache>,
}
impl ElfCache {
pub fn new(line_number_info: bool, debug_info_symbols: bool) -> ElfCache {
ElfCache {
cache: RefCell::new(_ElfCache::new(line_number_info, debug_info_symbols)),
}
}
pub fn find(&self, path: &Path) -> Result<ElfBackend, Error> {
let mut cache = self.cache.borrow_mut();
cache.find(path)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::env;
#[test]
fn test_cache() {
let bin_name = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test-no-debug.bin");
let cache = ElfCache::new(true, false);
let backend_first = cache.find(Path::new(&bin_name));
let backend_second = cache.find(Path::new(&bin_name));
assert!(backend_first.is_ok());
assert!(backend_second.is_ok());
let backend_first = backend_first.unwrap();
let backend_second = backend_second.unwrap();
assert!(backend_first.is_dwarf());
assert!(backend_second.is_dwarf());
assert_eq!(
ptr::addr_of!(*backend_first.to_dwarf().unwrap().get_parser()),
ptr::addr_of!(*backend_second.to_dwarf().unwrap().get_parser())
);
}
}

8
third_party/blazesym/src/elf/mod.rs vendored Normal file
View File

@@ -0,0 +1,8 @@
mod cache;
mod parser;
mod resolver;
mod types;
pub use cache::ElfCache;
pub use parser::ElfParser;
pub use resolver::ElfResolver;

699
third_party/blazesym/src/elf/parser.rs vendored Normal file
View File

@@ -0,0 +1,699 @@
use std::cell::RefCell;
use std::ffi::CStr;
use std::fs::File;
use std::io::{Error, ErrorKind, Read, Seek, SeekFrom};
use std::mem;
#[cfg(test)]
use std::path::Path;
use regex::Regex;
use crate::util::{extract_string, search_address_opt_key};
use crate::FindAddrOpts;
use crate::SymbolInfo;
use crate::SymbolType;
use super::types::Elf64_Ehdr;
use super::types::Elf64_Phdr;
use super::types::Elf64_Shdr;
use super::types::Elf64_Sym;
use super::types::SHN_UNDEF;
#[cfg(test)]
use super::types::STT_FUNC;
fn read_u8(file: &mut File, off: u64, size: usize) -> Result<Vec<u8>, Error> {
let mut buf = vec![0; size];
file.seek(SeekFrom::Start(off))?;
file.read_exact(buf.as_mut_slice())?;
Ok(buf)
}
fn read_elf_header(file: &mut File) -> Result<Elf64_Ehdr, Error> {
let mut buffer = [0u8; mem::size_of::<Elf64_Ehdr>()];
let () = file.read_exact(&mut buffer)?;
let pointer = buffer.as_ptr() as *const Elf64_Ehdr;
// SAFETY: `buffer` is valid for reads and the `Elf64_Ehdr` object that we
// read is comprised only of members that are valid for any bit
// pattern.
let elf_header = unsafe { pointer.read_unaligned() };
Ok(elf_header)
}
fn read_elf_sections(file: &mut File, ehdr: &Elf64_Ehdr) -> Result<Vec<Elf64_Shdr>, Error> {
const HDRSIZE: usize = mem::size_of::<Elf64_Shdr>();
let off = ehdr.e_shoff as usize;
let num = ehdr.e_shnum as usize;
let mut buf = read_u8(file, off as u64, num * HDRSIZE)?;
let shdrs: Vec<Elf64_Shdr> = unsafe {
let shdrs_ptr = buf.as_mut_ptr() as *mut Elf64_Shdr;
buf.leak();
Vec::from_raw_parts(shdrs_ptr, num, num)
};
Ok(shdrs)
}
fn read_elf_program_headers(file: &mut File, ehdr: &Elf64_Ehdr) -> Result<Vec<Elf64_Phdr>, Error> {
const HDRSIZE: usize = mem::size_of::<Elf64_Phdr>();
let off = ehdr.e_phoff as usize;
let num = ehdr.e_phnum as usize;
let mut buf = read_u8(file, off as u64, num * HDRSIZE)?;
let phdrs: Vec<Elf64_Phdr> = unsafe {
let phdrs_ptr = buf.as_mut_ptr() as *mut Elf64_Phdr;
buf.leak();
Vec::from_raw_parts(phdrs_ptr, num, num)
};
Ok(phdrs)
}
fn read_elf_section_raw(file: &mut File, section: &Elf64_Shdr) -> Result<Vec<u8>, Error> {
read_u8(file, section.sh_offset, section.sh_size as usize)
}
fn read_elf_section_seek(file: &mut File, section: &Elf64_Shdr) -> Result<(), Error> {
file.seek(SeekFrom::Start(section.sh_offset))?;
Ok(())
}
fn get_elf_section_name<'a>(sect: &Elf64_Shdr, strtab: &'a [u8]) -> Option<&'a str> {
extract_string(strtab, sect.sh_name as usize)
}
struct ElfParserBack {
ehdr: Option<Elf64_Ehdr>,
shdrs: Option<Vec<Elf64_Shdr>>,
shstrtab: Option<Vec<u8>>,
phdrs: Option<Vec<Elf64_Phdr>>,
symtab: Option<Vec<Elf64_Sym>>, // in address order
symtab_origin: Option<Vec<Elf64_Sym>>, // The copy in the same order as the file
strtab: Option<Vec<u8>>,
str2symtab: Option<Vec<(usize, usize)>>, // strtab offset to symtab in the dictionary order
sect_cache: Vec<Option<Vec<u8>>>,
}
/// A parser against ELF64 format.
///
pub struct ElfParser {
file: RefCell<File>,
backobj: RefCell<ElfParserBack>,
}
impl ElfParser {
pub fn open_file(file: File) -> Result<ElfParser, Error> {
let parser = ElfParser {
file: RefCell::new(file),
backobj: RefCell::new(ElfParserBack {
ehdr: None,
shdrs: None,
shstrtab: None,
phdrs: None,
symtab: None,
symtab_origin: None,
strtab: None,
str2symtab: None,
sect_cache: vec![],
}),
};
Ok(parser)
}
#[cfg(test)]
pub fn open(filename: &Path) -> Result<ElfParser, Error> {
let file = File::open(filename)?;
let parser = Self::open_file(file);
if let Ok(parser) = parser {
Ok(parser)
} else {
parser
}
}
fn ensure_ehdr(&self) -> Result<(), Error> {
let mut me = self.backobj.borrow_mut();
if me.ehdr.is_some() {
return Ok(());
}
let ehdr = read_elf_header(&mut self.file.borrow_mut())?;
if !(ehdr.e_ident[0] == 0x7f
&& ehdr.e_ident[1] == 0x45
&& ehdr.e_ident[2] == 0x4c
&& ehdr.e_ident[3] == 0x46)
{
return Err(Error::new(ErrorKind::InvalidData, "e_ident is wrong"));
}
me.ehdr = Some(ehdr);
Ok(())
}
fn ensure_shdrs(&self) -> Result<(), Error> {
self.ensure_ehdr()?;
let mut me = self.backobj.borrow_mut();
if me.shdrs.is_some() {
return Ok(());
}
let shdrs = read_elf_sections(&mut self.file.borrow_mut(), me.ehdr.as_ref().unwrap())?;
me.sect_cache.resize(shdrs.len(), None);
me.shdrs = Some(shdrs);
Ok(())
}
fn ensure_phdrs(&self) -> Result<(), Error> {
self.ensure_ehdr()?;
let mut me = self.backobj.borrow_mut();
if me.phdrs.is_some() {
return Ok(());
}
let phdrs =
read_elf_program_headers(&mut self.file.borrow_mut(), me.ehdr.as_ref().unwrap())?;
me.phdrs = Some(phdrs);
Ok(())
}
fn ensure_shstrtab(&self) -> Result<(), Error> {
self.ensure_shdrs()?;
let mut me = self.backobj.borrow_mut();
if me.shstrtab.is_some() {
return Ok(());
}
let shstrndx = me.ehdr.as_ref().unwrap().e_shstrndx;
let shstrtab_sec = &me.shdrs.as_ref().unwrap()[shstrndx as usize];
let shstrtab = read_elf_section_raw(&mut self.file.borrow_mut(), shstrtab_sec)?;
me.shstrtab = Some(shstrtab);
Ok(())
}
fn ensure_symtab(&self) -> Result<(), Error> {
{
let me = self.backobj.borrow();
if me.symtab.is_some() {
return Ok(());
}
}
let sect_idx = if let Ok(idx) = self.find_section(".symtab") {
idx
} else {
self.find_section(".dynsym")?
};
let symtab_raw = self.read_section_raw(sect_idx)?;
if symtab_raw.len() % mem::size_of::<Elf64_Sym>() != 0 {
return Err(Error::new(
ErrorKind::InvalidData,
"size of the .symtab section does not match",
));
}
let cnt = symtab_raw.len() / mem::size_of::<Elf64_Sym>();
let mut symtab: Vec<Elf64_Sym> = unsafe {
let symtab_ptr = symtab_raw.as_ptr() as *mut Elf64_Sym;
symtab_raw.leak();
Vec::from_raw_parts(symtab_ptr, cnt, cnt)
};
let origin = symtab.clone();
symtab.sort_by_key(|x| x.st_value);
let mut me = self.backobj.borrow_mut();
me.symtab = Some(symtab);
me.symtab_origin = Some(origin);
Ok(())
}
fn ensure_strtab(&self) -> Result<(), Error> {
{
let me = self.backobj.borrow();
if me.strtab.is_some() {
return Ok(());
}
}
let sect_idx = if let Ok(idx) = self.find_section(".strtab") {
idx
} else {
self.find_section(".dynstr")?
};
let strtab = self.read_section_raw(sect_idx)?;
let mut me = self.backobj.borrow_mut();
me.strtab = Some(strtab);
Ok(())
}
fn ensure_str2symtab(&self) -> Result<(), Error> {
self.ensure_symtab()?;
self.ensure_strtab()?;
let mut me = self.backobj.borrow_mut();
if me.str2symtab.is_some() {
return Ok(());
}
// Build strtab offsets to symtab indices
let strtab = me.strtab.as_ref().unwrap();
let symtab = me.symtab.as_ref().unwrap();
let mut str2symtab = Vec::<(usize, usize)>::with_capacity(symtab.len());
for (sym_i, sym) in symtab.iter().enumerate() {
let name_off = sym.st_name;
str2symtab.push((name_off as usize, sym_i));
}
// Sort in the dictionary order
str2symtab
.sort_by_key(|&x| unsafe { CStr::from_ptr(&strtab[x.0] as *const u8 as *const i8) });
me.str2symtab = Some(str2symtab);
Ok(())
}
pub fn get_elf_file_type(&self) -> Result<u16, Error> {
self.ensure_ehdr()?;
let me = self.backobj.borrow();
Ok(me.ehdr.as_ref().unwrap().e_type)
}
fn check_section_index(&self, sect_idx: usize) -> Result<(), Error> {
let nsects = self.get_num_sections()?;
if nsects <= sect_idx {
return Err(Error::new(ErrorKind::InvalidInput, "the index is too big"));
}
Ok(())
}
pub fn section_seek(&self, sect_idx: usize) -> Result<(), Error> {
self.check_section_index(sect_idx)?;
self.ensure_shdrs()?;
let me = self.backobj.borrow();
read_elf_section_seek(
&mut self.file.borrow_mut(),
&me.shdrs.as_ref().unwrap()[sect_idx],
)
}
/// Read the raw data of the section of a given index.
pub fn read_section_raw(&self, sect_idx: usize) -> Result<Vec<u8>, Error> {
self.check_section_index(sect_idx)?;
self.ensure_shdrs()?;
let me = self.backobj.borrow();
read_elf_section_raw(
&mut self.file.borrow_mut(),
&me.shdrs.as_ref().unwrap()[sect_idx],
)
}
/// Read the raw data of the section of a given index.
pub fn read_section_raw_cache(&self, sect_idx: usize) -> Result<&[u8], Error> {
self.check_section_index(sect_idx)?;
self.ensure_shdrs()?;
let mut me = self.backobj.borrow_mut();
if me.sect_cache[sect_idx].is_none() {
let buf = read_elf_section_raw(
&mut self.file.borrow_mut(),
&me.shdrs.as_ref().unwrap()[sect_idx],
)?;
me.sect_cache[sect_idx] = Some(buf);
}
Ok(unsafe { mem::transmute(me.sect_cache[sect_idx].as_ref().unwrap().as_slice()) })
}
/// Get the name of the section of a given index.
pub fn get_section_name(&self, sect_idx: usize) -> Result<&str, Error> {
self.check_section_index(sect_idx)?;
self.ensure_shstrtab()?;
let me = self.backobj.borrow();
let sect = &me.shdrs.as_ref().unwrap()[sect_idx];
let name = get_elf_section_name(sect, unsafe {
(*self.backobj.as_ptr()).shstrtab.as_ref().unwrap()
});
if name.is_none() {
return Err(Error::new(ErrorKind::InvalidData, "invalid section name"));
}
Ok(name.unwrap())
}
pub fn get_section_size(&self, sect_idx: usize) -> Result<usize, Error> {
self.check_section_index(sect_idx)?;
self.ensure_shdrs()?;
let me = self.backobj.borrow();
let sect = &me.shdrs.as_ref().unwrap()[sect_idx];
Ok(sect.sh_size as usize)
}
pub fn get_num_sections(&self) -> Result<usize, Error> {
self.ensure_ehdr()?;
let me = self.backobj.borrow();
Ok(me.ehdr.as_ref().unwrap().e_shnum as usize)
}
/// Find the section of a given name.
///
/// This function return the index of the section if found.
pub fn find_section(&self, name: &str) -> Result<usize, Error> {
let nsects = self.get_num_sections()?;
for i in 0..nsects {
if self.get_section_name(i)? == name {
return Ok(i);
}
}
Err(Error::new(
ErrorKind::NotFound,
format!("unable to find ELF section: {name}"),
))
}
pub fn find_symbol(&self, address: u64, st_type: u8) -> Result<(&str, u64), Error> {
self.ensure_symtab()?;
self.ensure_strtab()?;
let me = self.backobj.borrow();
let idx_r =
search_address_opt_key(me.symtab.as_ref().unwrap(), address, &|sym: &Elf64_Sym| {
if sym.st_info & 0xf != st_type || sym.st_shndx == SHN_UNDEF {
None
} else {
Some(sym.st_value)
}
});
if idx_r.is_none() {
return Err(Error::new(
ErrorKind::NotFound,
"Does not found a symbol for the given address",
));
}
let idx = idx_r.unwrap();
let sym = &me.symtab.as_ref().unwrap()[idx];
let sym_name = match extract_string(
unsafe { (*self.backobj.as_ptr()).strtab.as_ref().unwrap().as_slice() },
sym.st_name as usize,
) {
Some(sym_name) => sym_name,
None => {
return Err(Error::new(
ErrorKind::InvalidData,
"invalid symbol name string/offset",
));
}
};
Ok((sym_name, sym.st_value))
}
pub fn find_address(&self, name: &str, opts: &FindAddrOpts) -> Result<Vec<SymbolInfo>, Error> {
if let SymbolType::Variable = opts.sym_type {
return Err(Error::new(ErrorKind::Unsupported, "Not implemented"));
}
self.ensure_str2symtab()?;
let me = self.backobj.borrow();
let str2symtab = me.str2symtab.as_ref().unwrap();
let strtab = me.strtab.as_ref().unwrap();
let r = str2symtab.binary_search_by_key(&name.to_string(), |&x| {
String::from(
unsafe { CStr::from_ptr(&strtab[x.0] as *const u8 as *const i8) }
.to_str()
.unwrap(),
)
});
match r {
Ok(str2sym_i) => {
let mut idx = str2sym_i;
while idx > 0 {
let name_seek = unsafe {
CStr::from_ptr(&strtab[str2symtab[idx].0] as *const u8 as *const i8)
.to_str()
.unwrap()
};
if !name_seek.eq(name) {
idx += 1;
break;
}
idx -= 1;
}
let mut found = vec![];
for idx in idx..str2symtab.len() {
let name_visit = unsafe {
CStr::from_ptr(&strtab[str2symtab[idx].0] as *const u8 as *const i8)
.to_str()
.unwrap()
};
if !name_visit.eq(name) {
break;
}
let sym_i = str2symtab[idx].1;
let sym_ref = &me.symtab.as_ref().unwrap()[sym_i];
if sym_ref.st_shndx != SHN_UNDEF {
found.push(SymbolInfo {
name: name.to_string(),
address: sym_ref.st_value,
size: sym_ref.st_size,
sym_type: SymbolType::Function,
..Default::default()
});
}
}
Ok(found)
}
Err(_) => Ok(vec![]),
}
}
pub fn find_address_regex(
&self,
pattern: &str,
opts: &FindAddrOpts,
) -> Result<Vec<SymbolInfo>, Error> {
if let SymbolType::Variable = opts.sym_type {
return Err(Error::new(ErrorKind::Unsupported, "Not implemented"));
}
self.ensure_str2symtab()?;
let me = self.backobj.borrow();
let str2symtab = me.str2symtab.as_ref().unwrap();
let strtab = me.strtab.as_ref().unwrap();
let re = Regex::new(pattern).unwrap();
let mut syms = vec![];
for (str_off, sym_i) in str2symtab {
let sname = unsafe {
CStr::from_ptr(&strtab[*str_off] as *const u8 as *const i8)
.to_str()
.unwrap()
};
if re.is_match(sname) {
let sym_ref = &me.symtab.as_ref().unwrap()[*sym_i];
if sym_ref.st_shndx != SHN_UNDEF {
syms.push(SymbolInfo {
name: sname.to_string(),
address: sym_ref.st_value,
size: sym_ref.st_size,
sym_type: SymbolType::Function,
..Default::default()
});
}
}
}
Ok(syms)
}
#[cfg(test)]
fn get_symbol(&self, idx: usize) -> Result<&Elf64_Sym, Error> {
self.ensure_symtab()?;
let me = self.backobj.as_ptr();
Ok(unsafe { &(*me).symtab.as_mut().unwrap()[idx] })
}
#[cfg(test)]
fn get_symbol_name(&self, idx: usize) -> Result<&str, Error> {
let sym = self.get_symbol(idx)?;
let me = self.backobj.as_ptr();
let sym_name = match extract_string(
unsafe { (*me).strtab.as_ref().unwrap().as_slice() },
sym.st_name as usize,
) {
Some(name) => name,
None => {
return Err(Error::new(
ErrorKind::InvalidData,
"invalid symb name string/offset",
));
}
};
Ok(sym_name)
}
pub fn get_all_program_headers(&self) -> Result<&[Elf64_Phdr], Error> {
self.ensure_phdrs()?;
let phdrs = unsafe {
let me = self.backobj.as_ptr();
let phdrs_ref = (*me).phdrs.as_mut().unwrap();
phdrs_ref
};
Ok(phdrs)
}
#[cfg(test)]
fn pick_symtab_addr(&self) -> (&str, u64) {
self.ensure_symtab().unwrap();
self.ensure_strtab().unwrap();
let me = self.backobj.borrow();
let symtab = me.symtab.as_ref().unwrap();
let mut idx = symtab.len() / 2;
while symtab[idx].st_info & 0xf != STT_FUNC || symtab[idx].st_shndx == SHN_UNDEF {
idx += 1;
}
let sym = &symtab[idx];
let addr = sym.st_value;
drop(me);
let sym_name = self.get_symbol_name(idx).unwrap();
(sym_name, addr)
}
/// Read raw data from the file at the current position.
///
/// The caller can use section_seek() to move the current position
/// of the backed file. However, this function doesn't promise to
/// not cross the boundary of the section. The caller should take
/// care about it.
pub unsafe fn read_raw(&self, buf: &mut [u8]) -> Result<(), Error> {
self.file.borrow_mut().read_exact(buf)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::env;
#[test]
fn test_elf_header_sections() {
let bin_name = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test-no-debug.bin");
let mut bin_file = File::open(bin_name).unwrap();
let ehdr = read_elf_header(&mut bin_file);
assert!(ehdr.is_ok());
let ehdr = ehdr.unwrap();
assert_eq!(
ehdr.e_ident,
[
0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00
]
);
assert_eq!(ehdr.e_version, 0x1);
assert_eq!(ehdr.e_shentsize as usize, mem::size_of::<Elf64_Shdr>());
let shdrs = read_elf_sections(&mut bin_file, &ehdr);
assert!(shdrs.is_ok());
let shdrs = shdrs.unwrap();
let shstrndx = ehdr.e_shstrndx as usize;
let shstrtab_sec = &shdrs[shstrndx];
let shstrtab = read_elf_section_raw(&mut bin_file, shstrtab_sec);
assert!(shstrtab.is_ok());
let shstrtab = shstrtab.unwrap();
let sec_name = get_elf_section_name(shstrtab_sec, &shstrtab);
assert!(sec_name.is_some());
assert_eq!(sec_name.unwrap(), ".shstrtab");
}
#[test]
fn test_elf64_parser() {
let bin_name = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test-no-debug.bin");
let parser = ElfParser::open(bin_name.as_ref()).unwrap();
assert!(parser.find_section(".shstrtab").is_ok());
}
#[test]
fn test_elf64_symtab() {
let bin_name = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test-no-debug.bin");
let parser = ElfParser::open(bin_name.as_ref()).unwrap();
assert!(parser.find_section(".shstrtab").is_ok());
let (sym_name, addr) = parser.pick_symtab_addr();
let sym_r = parser.find_symbol(addr, STT_FUNC);
assert!(sym_r.is_ok());
let (sym_name_ret, addr_ret) = sym_r.unwrap();
assert_eq!(addr_ret, addr);
assert_eq!(sym_name_ret, sym_name);
}
#[test]
fn test_elf64_find_address() {
let bin_name = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test-no-debug.bin");
let parser = ElfParser::open(bin_name.as_ref()).unwrap();
assert!(parser.find_section(".shstrtab").is_ok());
let (sym_name, addr) = parser.pick_symtab_addr();
println!("{sym_name}");
let opts = FindAddrOpts {
offset_in_file: false,
obj_file_name: false,
sym_type: SymbolType::Unknown,
};
let addr_r = parser.find_address(sym_name, &opts).unwrap();
assert_eq!(addr_r.len(), 1);
assert!(addr_r.iter().any(|x| x.address == addr));
}
}

186
third_party/blazesym/src/elf/resolver.rs vendored Normal file
View File

@@ -0,0 +1,186 @@
use std::io::ErrorKind;
use std::path::Path;
use std::path::PathBuf;
use crate::AddressLineInfo;
use crate::CacheHolder;
use crate::Error;
use crate::FindAddrOpts;
use crate::SymResolver;
use crate::SymbolInfo;
use super::cache::ElfBackend;
use super::types::ET_DYN;
use super::types::ET_EXEC;
use super::types::PF_X;
use super::types::PT_LOAD;
use super::types::STT_FUNC;
use super::ElfParser;
/// The symbol resolver for a single ELF file.
///
/// An ELF file may be loaded into an address space with a relocation.
/// The callers should provide the path of an ELF file and where it's
/// executable segment(s) is loaded.
///
/// For some ELF files, they are located at a specific address
/// determined during compile-time. For these cases, just pass `0` as
/// it's loaded address.
pub struct ElfResolver {
backend: ElfBackend,
loaded_address: u64,
loaded_to_virt: u64,
foff_to_virt: u64,
size: u64,
file_name: PathBuf,
}
impl ElfResolver {
pub(crate) fn new(
file_name: &Path,
loaded_address: u64,
cache_holder: &CacheHolder,
) -> Result<ElfResolver, Error> {
let backend = cache_holder.get_elf_cache().find(file_name)?;
let parser = match &backend {
ElfBackend::Dwarf(dwarf) => dwarf.get_parser(),
ElfBackend::Elf(parser) => parser,
};
let e_type = parser.get_elf_file_type()?;
let phdrs = parser.get_all_program_headers()?;
// Find the size of the block where the ELF file is/was
// mapped.
let mut max_addr = 0;
let mut low_addr = 0xffffffffffffffff;
let mut low_off = 0xffffffffffffffff;
if e_type == ET_DYN || e_type == ET_EXEC {
for phdr in phdrs {
if phdr.p_type != PT_LOAD {
continue;
}
if (phdr.p_flags & PF_X) != PF_X {
continue;
}
let end_at = phdr.p_vaddr + phdr.p_memsz;
if max_addr < end_at {
max_addr = end_at;
}
if phdr.p_vaddr < low_addr {
low_addr = phdr.p_vaddr;
low_off = phdr.p_offset;
}
}
} else {
return Err(Error::new(ErrorKind::InvalidData, "unknown e_type"));
}
let loaded_address = if e_type == ET_EXEC {
low_addr
} else {
loaded_address
};
let loaded_to_virt = low_addr;
let foff_to_virt = low_addr - low_off;
let size = max_addr - low_addr;
Ok(ElfResolver {
backend,
loaded_address,
loaded_to_virt,
foff_to_virt,
size,
file_name: file_name.to_path_buf(),
})
}
fn get_parser(&self) -> Option<&ElfParser> {
match &self.backend {
ElfBackend::Dwarf(dwarf) => Some(dwarf.get_parser()),
ElfBackend::Elf(parser) => Some(parser),
}
}
}
impl SymResolver for ElfResolver {
fn get_address_range(&self) -> (u64, u64) {
(self.loaded_address, self.loaded_address + self.size)
}
fn find_symbols(&self, addr: u64) -> Vec<(&str, u64)> {
let off = addr - self.loaded_address + self.loaded_to_virt;
let parser = if let Some(parser) = self.get_parser() {
parser
} else {
return vec![];
};
match parser.find_symbol(off, STT_FUNC) {
Ok((name, start_addr)) => {
vec![(name, start_addr - self.loaded_to_virt + self.loaded_address)]
}
Err(_) => vec![],
}
}
fn find_address(&self, name: &str, opts: &FindAddrOpts) -> Option<Vec<SymbolInfo>> {
let mut addr_res = match &self.backend {
ElfBackend::Dwarf(dwarf) => dwarf.find_address(name, opts),
ElfBackend::Elf(parser) => parser.find_address(name, opts),
}
.ok()?;
for x in &mut addr_res {
x.address = x.address - self.loaded_to_virt + self.loaded_address;
}
Some(addr_res)
}
fn find_address_regex(&self, pattern: &str, opts: &FindAddrOpts) -> Option<Vec<SymbolInfo>> {
let syms = match &self.backend {
ElfBackend::Dwarf(dwarf) => dwarf.find_address_regex(pattern, opts),
ElfBackend::Elf(parser) => parser.find_address_regex(pattern, opts),
};
if syms.is_err() {
return None;
}
let mut syms = syms.unwrap();
for sym in &mut syms {
sym.address = sym.address - self.loaded_to_virt + self.loaded_address;
}
Some(syms)
}
fn find_line_info(&self, addr: u64) -> Option<AddressLineInfo> {
let off = addr - self.loaded_address + self.loaded_to_virt;
if let ElfBackend::Dwarf(dwarf) = &self.backend {
let (directory, file, line_no) = dwarf.find_line_as_ref(off)?;
let mut path = String::from(directory);
if !path.is_empty() && &path[(path.len() - 1)..] != "/" {
path.push('/');
}
path.push_str(file);
Some(AddressLineInfo {
path,
line_no,
column: 0,
})
} else {
None
}
}
fn addr_file_off(&self, addr: u64) -> Option<u64> {
Some(addr - self.loaded_address + self.loaded_to_virt - self.foff_to_virt)
}
fn get_obj_file_name(&self) -> &Path {
&self.file_name
}
fn repr(&self) -> String {
match self.backend {
ElfBackend::Dwarf(_) => format!("DWARF {}", self.file_name.display()),
ElfBackend::Elf(_) => format!("ELF {}", self.file_name.display()),
}
}
}

41
third_party/blazesym/src/elf/types.rs vendored Normal file
View File

@@ -0,0 +1,41 @@
pub use libc::Elf64_Addr;
pub use libc::Elf64_Half;
pub use libc::Elf64_Off;
pub use libc::Elf64_Phdr;
pub use libc::Elf64_Shdr;
pub use libc::Elf64_Sxword;
pub use libc::Elf64_Sym;
pub use libc::Elf64_Word;
pub use libc::Elf64_Xword;
pub use libc::Elf64_Ehdr;
pub use libc::ET_CORE;
pub use libc::ET_DYN;
pub use libc::ET_EXEC;
pub use libc::ET_HIPROC;
pub use libc::ET_LOPROC;
pub use libc::ET_NONE;
pub use libc::ET_REL;
pub use libc::PF_R;
pub use libc::PF_W;
pub use libc::PF_X;
pub use libc::PT_DYNAMIC;
pub use libc::PT_GNU_EH_FRAME;
pub use libc::PT_GNU_STACK;
pub use libc::PT_HIOS;
pub use libc::PT_HIPROC;
pub use libc::PT_INTERP;
pub use libc::PT_LOAD;
pub use libc::PT_LOOS;
pub use libc::PT_LOPROC;
pub use libc::PT_NOTE;
pub use libc::PT_NULL;
pub use libc::PT_PHDR;
pub use libc::PT_SHLIB;
pub use libc::PT_TLS;
pub const SHN_UNDEF: u16 = 0;
pub const STT_FUNC: u8 = 2;

147
third_party/blazesym/src/gsym/linetab.rs vendored Normal file
View File

@@ -0,0 +1,147 @@
//! Opcode runner of GSYM line table.
use crate::util::decode_leb128;
use crate::util::decode_leb128_s;
/// End of the line table
const END_SEQUENCE: u8 = 0x00;
/// Set [`LineTableRow.file_idx`], don't push a row.
const SET_FILE: u8 = 0x01;
/// Increment [`LineTableRow.address`], and push a row.
const ADVANCE_PC: u8 = 0x02;
/// Set [`LineTableRow.file_line`], don't push a row.
const ADVANCE_LINE: u8 = 0x03;
/// All special opcodes push a row.
const FIRST_SPECIAL: u8 = 0x04;
#[derive(Debug)]
pub enum RunResult {
/// Run the operator successfully.
Ok(usize),
/// This operator creates a new row.
NewRow(usize),
/// The end of the program (the operator stream.)
End,
/// Fails to run the operator at the position.
Err,
}
#[derive(Debug)]
pub struct LineTableHeader {
/// `min_data` & `max_delta` together is used to set the range and encoding
/// of line delta in special operator. Line delta is the number of lines
/// that a line table row is different from the previous row.
pub min_delta: i64,
pub max_delta: i64,
pub first_line: u32,
}
#[derive(Clone, Debug)]
pub struct LineTableRow {
pub address: u64,
pub file_idx: u32,
pub file_line: u32,
}
impl LineTableRow {
/// Create a `LineTableRow` to use as the states of a line table virtual
/// machine.
///
/// The returned `LineTableRow` can be passed to [`run_op`] as `ctx`.
///
/// # Arguments
///
/// * `header` - is a [`LineTableHeader`] returned by [`parse_line_table_header()`].
/// * `symaddr` - the address of the symbol that `header` belongs to.
pub fn line_table_row_from(header: &LineTableHeader, symaddr: u64) -> LineTableRow {
Self {
address: symaddr,
file_idx: 1,
file_line: header.first_line,
}
}
}
/// Run a GSYM line table operator/instruction in the buffer.
///
/// # Arguments
///
/// * `ctx` - a line table row to present the current states of the virtual
/// machine. [`line_table_row_from()`] can create a `LineTableRow` to
/// keep the states of a virtual machine.
/// * `header` - is a `LineTableHeader`.
/// * `ops` - is the buffer of the operators following the `LineTableHeader` in
/// a GSYM file.
/// * `pc` - is the program counter of the virtual machine.
///
/// Returns a [`RunResult`]. `Ok` and `NewRow` will return the size of this
/// instruction. The caller should adjust the value of `pc` according to the
/// value returned.
pub fn run_op(
ctx: &mut LineTableRow,
header: &LineTableHeader,
ops: &[u8],
pc: usize,
) -> RunResult {
let mut off = pc;
let op = ops[off];
off += 1;
match op {
END_SEQUENCE => RunResult::End,
SET_FILE => {
if let Some((f, bytes)) = decode_leb128(&ops[off..]) {
off += bytes as usize;
ctx.file_idx = f as u32;
RunResult::Ok(off - pc)
} else {
RunResult::Err
}
}
ADVANCE_PC => {
if let Some((adv, bytes)) = decode_leb128(&ops[off..]) {
off += bytes as usize;
ctx.address += adv;
RunResult::NewRow(off - pc)
} else {
RunResult::Err
}
}
ADVANCE_LINE => {
if let Some((adv, bytes)) = decode_leb128_s(&ops[off..]) {
off += bytes as usize;
ctx.file_line = (ctx.file_line as i64 + adv) as u32;
RunResult::Ok(off - pc)
} else {
RunResult::Err
}
}
// Special operators.
//
// All operators that have a value greater than or equal to
// FIRST_SPECIAL are considered special operators. These operators
// change both the line number and address of the virtual machine and
// emit a new row.
_ => {
let adjusted = (op - FIRST_SPECIAL) as i64;
// The range of line number delta is from min_delta to max_delta,
// including max_delta.
let range = header.max_delta - header.min_delta + 1;
if range == 0 {
return RunResult::Err;
}
let line_delta = header.min_delta + (adjusted % range);
let addr_delta = adjusted / range;
let file_line = ctx.file_line as i32 + line_delta as i32;
if file_line < 1 {
return RunResult::Err;
}
ctx.file_line = file_line as u32;
ctx.address = (ctx.address as i64 + addr_delta) as u64;
RunResult::NewRow(off - pc)
}
}
}

6
third_party/blazesym/src/gsym/mod.rs vendored Normal file
View File

@@ -0,0 +1,6 @@
mod linetab;
mod parser;
mod resolver;
mod types;
pub use resolver::GsymResolver;

469
third_party/blazesym/src/gsym/parser.rs vendored Normal file
View File

@@ -0,0 +1,469 @@
//! Parser of GSYM format.
//!
//! The layout of a standalone GSYM contains following sections in the order.
//!
//! * Header
//! * Address Table
//! * Address Data Offset Table
//! * File Table
//! * String Table
//! * Address Data
//!
//! The standalone GSYM starts with a Header, which describes the
//! size of an entry in the address table, the number of entries in
//! the address table, and the location and the size of the string
//! table.
//!
//! Since the Address Table is immediately after the Header, the
//! Header describes only the size of an entry and number of entries
//! in the table but not where it is. The Address Table comprises
//! addresses of symbols in the ascending order, so we can find the
//! symbol an address belonging to by doing a binary search to find
//! the most close address but smaller or equal.
//!
//! The Address Data Offset Table has the same number of entries as
//! the Address Table. Every entry in one table will has
//! corresponding entry at the same offset in the other table. The
//! entries in the Address Data Offset Table are always 32bits
//! (4bytes.) It is the file offset to the respective Address
//! Data. (AddressInfo actually)
//!
//! An AddressInfo comprises the size and name of a symbol. The name
//! is an offset in the string table. You will find a null terminated
//! C string at the give offset. The size is the number of bytes of
//! the respective object; ex, a function or variable.
//!
//! See <https://reviews.llvm.org/D53379>
use std::ffi::CStr;
use std::io::{Error, ErrorKind};
use crate::util::decode_leb128;
use crate::util::decode_leb128_s;
use crate::util::decode_udword;
use crate::util::decode_uhalf;
use crate::util::decode_uword;
use super::linetab::LineTableHeader;
use super::types::AddressData;
use super::types::AddressInfo;
use super::types::FileInfo;
use super::types::Header;
use super::types::InfoTypeEndOfList;
use super::types::InfoTypeInlineInfo;
use super::types::InfoTypeLineTableInfo;
use super::types::ADDR_DATA_OFFSET_SIZE;
use super::types::FILE_INFO_SIZE;
use super::types::GSYM_MAGIC;
use super::types::GSYM_VERSION;
/// Hold the major parts of a standalone GSYM file.
///
/// GsymContext provides functions to access major entities in GSYM.
/// GsymContext can find respective AddressInfo for an address. But,
/// it doesn't parse AddressData to get line numbers.
///
/// The developers should use [`parse_address_data()`],
/// [`parse_line_table_header()`], and [`linetab::run_op()`] to get
/// line number information from [`AddressInfo`].
pub struct GsymContext<'a> {
header: Header,
addr_tab: &'a [u8],
addr_data_off_tab: &'a [u8],
file_tab: &'a [u8],
str_tab: &'a [u8],
raw_data: &'a [u8],
}
impl<'a> GsymContext<'a> {
/// Parse the Header of a standalone GSYM file.
///
/// # Arguments
///
/// * `data` - is the content of a standalone GSYM.
///
/// Returns a GsymContext, which includes the Header and other important tables.
pub fn parse_header(data: &[u8]) -> Result<GsymContext, Error> {
let mut off = 0;
// Parse Header
let magic = decode_uword(data);
if magic != GSYM_MAGIC {
return Err(Error::new(ErrorKind::InvalidData, "invalid magic number"));
}
off += 4;
let version = decode_uhalf(&data[off..]);
if version != GSYM_VERSION {
return Err(Error::new(ErrorKind::InvalidData, "unknown version number"));
}
off += 2;
let addr_off_size = data[off];
off += 1;
let uuid_size = data[off];
off += 1;
let base_address = decode_udword(&data[off..]);
off += 8;
let num_addrs = decode_uword(&data[off..]);
off += 4;
let strtab_offset = decode_uword(&data[off..]);
off += 4;
let strtab_size = decode_uword(&data[off..]);
off += 4;
let uuid: [u8; 20] = (&data[off..(off + 20)])
.try_into()
.expect("input data is too short");
off += 20;
// Get the slices of the Address Table, Address Data Offset Table,
// and String table.
let end_off = off + num_addrs as usize * addr_off_size as usize;
if end_off > data.len() {
return Err(Error::new(
ErrorKind::InvalidData,
"the size of the file is smaller than expectation (address table)",
));
}
let addr_tab = &data[off..end_off];
off = (end_off + 0x3) & !0x3;
let end_off = off + num_addrs as usize * ADDR_DATA_OFFSET_SIZE;
if end_off > data.len() {
return Err(Error::new(
ErrorKind::InvalidData,
"the size of the file is smaller than expectation (address data offset table)",
));
}
let addr_data_off_tab = &data[off..end_off];
off += num_addrs as usize * ADDR_DATA_OFFSET_SIZE;
let file_num = decode_uword(&data[off..]);
off += 4;
let end_off = off + file_num as usize * FILE_INFO_SIZE;
if end_off > data.len() {
return Err(Error::new(
ErrorKind::InvalidData,
"the size of the file is smaller than expectation (file table)",
));
}
let file_tab = &data[off..end_off];
let end_off = strtab_offset as usize + strtab_size as usize;
if end_off > data.len() {
return Err(Error::new(
ErrorKind::InvalidData,
"the size of the file is smaller than expectation (string table)",
));
}
let str_tab = &data[strtab_offset as usize..end_off];
Ok(GsymContext {
header: Header {
magic,
version,
addr_off_size,
uuid_size,
base_address,
num_addrs,
strtab_offset,
strtab_size,
uuid,
},
addr_tab,
addr_data_off_tab,
file_tab,
str_tab,
raw_data: data,
})
}
pub fn num_addresses(&self) -> usize {
self.header.num_addrs as usize
}
/// Get the address of an entry in the Address Table.
pub fn addr_at(&self, idx: usize) -> Option<u64> {
if idx >= self.header.num_addrs as usize {
return None;
}
let off = idx * self.header.addr_off_size as usize;
let mut addr = 0u64;
let mut shift = 0;
for d in &self.addr_tab[off..(off + self.header.addr_off_size as usize)] {
addr |= (*d as u64) << shift;
shift += 8;
}
addr += self.header.base_address;
Some(addr)
}
/// Get the AddressInfo of an address given by an index.
pub fn addr_info(&self, idx: usize) -> Option<AddressInfo> {
if idx >= self.header.num_addrs as usize {
return None;
}
let off = idx * ADDR_DATA_OFFSET_SIZE;
let ad_off = decode_uword(&self.addr_data_off_tab[off..]) as usize;
let size = decode_uword(&self.raw_data[ad_off..]);
let name = decode_uword(&self.raw_data[ad_off + 4..]);
let info = AddressInfo {
size,
name,
data: &self.raw_data[ad_off + 8..],
};
Some(info)
}
/// Get the string at the given offset from the String Table.
pub fn get_str(&self, off: usize) -> Option<&str> {
if off >= self.str_tab.len() {
return None;
}
// Ensure there is a null byte.
let mut null_off = self.str_tab.len() - 1;
while null_off > off && self.str_tab[null_off] != 0 {
null_off -= 1;
}
if null_off == off {
return Some("");
}
// SAFETY: the lifetime of `CStr` can live as long as `self`.
// The returned reference can also live as long as `self`.
unsafe {
CStr::from_ptr(self.str_tab[off..].as_ptr() as *const i8)
.to_str()
.ok()
}
}
pub fn file_info(&self, idx: usize) -> Option<FileInfo> {
if idx >= self.file_tab.len() / FILE_INFO_SIZE {
return None;
}
let mut off = idx * FILE_INFO_SIZE;
let directory = decode_uword(&self.file_tab[off..(off + 4)]);
off += 4;
let filename = decode_uword(&self.file_tab[off..(off + 4)]);
let info = FileInfo {
directory,
filename,
};
Some(info)
}
}
/// Find the index of an entry in the address table most likely
/// containing the given address.
///
/// The callers should check the respective `AddressInfo` to make sure
/// it is what they request for.
pub fn find_address(ctx: &GsymContext, addr: u64) -> Option<usize> {
let mut left = 0;
let mut right = ctx.num_addresses();
if right == 0 {
return None;
}
if addr < ctx.addr_at(0)? {
return None;
}
while (left + 1) < right {
let v = (left + right) / 2;
let cur_addr = ctx.addr_at(v)?;
if addr == cur_addr {
return Some(v);
}
if addr < cur_addr {
right = v;
} else {
left = v;
}
}
Some(left)
}
/// Parse AddressData.
///
/// AddressDatas are items following AndressInfo.
/// [`GsymContext::addr_info()`] returns the raw data of AddressDatas as a
/// slice at [`AddressInfo::data`].
///
/// # Arguments
///
/// * `data` - is the slice from AddressInfo::data.
///
/// Returns a vector of [`AddressData`].
pub fn parse_address_data(data: &[u8]) -> Vec<AddressData> {
let mut data_objs = vec![];
let mut off = 0;
while off < data.len() {
let typ = decode_uword(&data[off..]);
off += 4;
let length = decode_uword(&data[off..]);
off += 4;
let d = &data[off..(off + length as usize)];
data_objs.push(AddressData {
typ,
length,
data: d,
});
off += length as usize;
#[allow(non_upper_case_globals)]
match typ {
InfoTypeEndOfList => {
break;
}
InfoTypeLineTableInfo | InfoTypeInlineInfo => {}
_ => {
#[cfg(debug_assertions)]
eprintln!("unknown info type");
}
}
}
data_objs
}
/// Parse AddressData of InfoTypeLineTableInfo.
///
/// An `AddressData` of `InfoTypeLineTableInfo` type is a table of line numbers
/// for a symbol. `AddressData` is the payload of `AddressInfo`. One
/// `AddressInfo` may have several `AddressData` entries in its payload. Each
/// `AddressData` entry stores a type of data relates to the symbol the
/// `AddressInfo` presents.
///
/// # Arguments
///
/// * `data` - is what [`AddressData::data`] is.
///
/// Returns the `LineTableHeader` and the size of the header of a
/// `AddressData` entry of `InfoTypeLineTableInfo` type in the payload
/// of an `Addressinfo`.
pub fn parse_line_table_header(data: &[u8]) -> Option<(LineTableHeader, usize)> {
let mut off = 0;
let (min_delta, bytes) = decode_leb128_s(&data[off..])?;
off += bytes as usize;
let (max_delta, bytes) = decode_leb128_s(&data[off..])?;
off += bytes as usize;
let (first_line, bytes) = decode_leb128(&data[off..])?;
off += bytes as usize;
let header = LineTableHeader {
min_delta,
max_delta,
first_line: first_line as u32,
};
Some((header, off))
}
#[cfg(test)]
mod tests {
use super::*;
use std::env;
use std::fs::File;
use std::io::{Read, Write};
use std::path::Path;
#[test]
fn test_parse_context() {
let test_gsym = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test.gsym");
let mut gsym_fo = File::open(test_gsym).unwrap();
let mut data = vec![];
gsym_fo.read_to_end(&mut data).unwrap();
let ctx = GsymContext::parse_header(&data).unwrap();
let idx = find_address(&ctx, 0x0000000002000000).unwrap();
let addrinfo = ctx.addr_info(idx).unwrap();
assert_eq!(ctx.get_str(addrinfo.name as usize).unwrap(), "main");
let idx = find_address(&ctx, 0x0000000002000100).unwrap();
let addrinfo = ctx.addr_info(idx).unwrap();
assert_eq!(ctx.get_str(addrinfo.name as usize).unwrap(), "factorial");
}
#[test]
fn test_find_address() {
let test_gsym = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test.gsym");
let mut gsym_fo = File::open(test_gsym).unwrap();
let mut data = vec![];
const TEST_SIZE: usize = 6;
gsym_fo.read_to_end(&mut data).unwrap();
let mut addr_tab = Vec::<u8>::new();
addr_tab.resize(TEST_SIZE * 4, 0);
let mut values: Vec<u32> = (0_u32..(TEST_SIZE as u32)).collect();
let copy_to_addr_tab = |values: &[u32], addr_tab: &mut Vec<u8>| {
addr_tab.clear();
for v in values {
let r = addr_tab.write(&v.to_ne_bytes());
assert!(r.is_ok());
}
};
// Generate all possible sequences that values are in strictly
// ascending order and `< TEST_SIZE * 2`.
let gen_values = |values: &mut [u32]| {
let mut carry_out = TEST_SIZE as u32 * 2;
for i in (0..values.len()).rev() {
values[i] += 1;
if values[i] >= carry_out {
carry_out -= 1;
continue;
}
// Make all values at right side minimal and strictly
// ascending.
for j in (i + 1)..values.len() {
values[j] = values[j - 1] + 1;
}
break;
}
};
while values[0] <= TEST_SIZE as u32 {
copy_to_addr_tab(&values, &mut addr_tab);
for addr in 0..(TEST_SIZE * 2) {
let addr_tab = addr_tab.clone();
let mut ctx = GsymContext::parse_header(&data).unwrap();
ctx.header.num_addrs = TEST_SIZE as u32;
ctx.header.addr_off_size = 4;
ctx.header.base_address = 0;
ctx.addr_tab = addr_tab.as_slice();
let idx = find_address(&ctx, addr as u64).unwrap_or(0);
let addr_u32 = addr as u32;
let idx1 = match values.binary_search(&addr_u32) {
Ok(idx) => idx,
Err(idx) => {
// When the searching value is falling in
// between two values, it will return the
// index of the later one. But we want the
// earlier one.
if idx > 0 {
idx - 1
} else {
0
}
}
};
assert_eq!(idx, idx1);
}
gen_values(&mut values);
}
}
}

View File

@@ -0,0 +1,224 @@
use std::fs::File;
use std::io::{Error, Read};
use std::mem;
use std::path::{Path, PathBuf};
use crate::{AddressLineInfo, FindAddrOpts, SymResolver, SymbolInfo};
use super::linetab::run_op;
use super::linetab::LineTableRow;
use super::linetab::RunResult;
use super::parser::find_address;
use super::parser::parse_address_data;
use super::parser::parse_line_table_header;
use super::parser::GsymContext;
use super::types::InfoTypeLineTableInfo;
/// The symbol resolver for the GSYM format.
pub struct GsymResolver {
file_name: PathBuf,
ctx: GsymContext<'static>,
_data: Vec<u8>,
loaded_address: u64,
}
impl GsymResolver {
pub fn new(file_name: PathBuf, loaded_address: u64) -> Result<GsymResolver, Error> {
let mut fo = File::open(&file_name)?;
let mut data = vec![];
fo.read_to_end(&mut data)?;
let ctx = GsymContext::parse_header(&data)?;
Ok(GsymResolver {
file_name,
// SAFETY: the lifetime of ctx depends on data, which is
// owned by the object. So, it is safe to strip the
// lifetime of ctx.
ctx: unsafe { mem::transmute(ctx) },
_data: data,
loaded_address,
})
}
}
impl SymResolver for GsymResolver {
fn get_address_range(&self) -> (u64, u64) {
let sz = self.ctx.num_addresses();
if sz == 0 {
return (0, 0);
}
// TODO: Must not unwrap.
let start = self.ctx.addr_at(0).unwrap() + self.loaded_address;
// TODO: Must not unwrap.
let end = self.ctx.addr_at(sz - 1).unwrap()
+ self.ctx.addr_info(sz - 1).unwrap().size as u64
+ self.loaded_address;
(start, end)
}
fn find_symbols(&self, addr: u64) -> Vec<(&str, u64)> {
let addr = addr - self.loaded_address;
let idx = if let Some(idx) = find_address(&self.ctx, addr) {
idx
} else {
return vec![];
};
let found = if let Some(addr) = self.ctx.addr_at(idx) {
addr
} else {
return vec![];
};
if addr < found {
return vec![];
}
let info = if let Some(info) = self.ctx.addr_info(idx) {
info
} else {
return Vec::new();
};
let name = if let Some(name) = self.ctx.get_str(info.name as usize) {
name
} else {
return Vec::new();
};
vec![(name, found + self.loaded_address)]
}
fn find_address(&self, _name: &str, _opts: &FindAddrOpts) -> Option<Vec<SymbolInfo>> {
// It is inefficient to find the address of a symbol with
// GSYM. We may support it in the future if needed.
None
}
fn find_address_regex(&self, _pattern: &str, _opts: &FindAddrOpts) -> Option<Vec<SymbolInfo>> {
None
}
/// Finds the source code location for a given address.
///
/// This function takes in an address and returns the file path,
/// line number and column of the line in the source code that
/// the address corresponds to. If it doesn't find any match it
/// returns `None`.
///
/// # Arguments
///
/// * `addr` - The address to find the source code location for.
///
/// # Returns
///
/// The `AddressLineInfo` corresponding to the address or `None`.
fn find_line_info(&self, addr: u64) -> Option<AddressLineInfo> {
let addr = addr.checked_sub(self.loaded_address)?;
let idx = find_address(&self.ctx, addr)?;
let symaddr = self.ctx.addr_at(idx)?;
if addr < symaddr {
return None;
}
let addrinfo = self.ctx.addr_info(idx)?;
if addr >= (symaddr + addrinfo.size as u64) {
return None;
}
let addrdatas = parse_address_data(addrinfo.data);
for adr_ent in addrdatas {
if adr_ent.typ != InfoTypeLineTableInfo {
continue;
}
// Continue to execute all GSYM line table operations
// until the end of the buffer is reached or a row
// containing addr is located.
let (lntab_hdr, hdr_bytes) = parse_line_table_header(adr_ent.data)?;
let ops = &adr_ent.data[hdr_bytes..];
let mut lntab_row = LineTableRow::line_table_row_from(&lntab_hdr, symaddr);
let mut last_lntab_row = lntab_row.clone();
let mut row_cnt = 0;
let mut pc = 0;
while pc < ops.len() {
match run_op(&mut lntab_row, &lntab_hdr, ops, pc) {
RunResult::Ok(bytes) => {
pc += bytes;
}
RunResult::NewRow(bytes) => {
pc += bytes;
row_cnt += 1;
if addr < lntab_row.address {
if row_cnt == 1 {
// The address is lower than the first row.
return None;
}
// Rollback to the last row.
lntab_row = last_lntab_row;
break;
}
last_lntab_row = lntab_row.clone();
}
RunResult::End | RunResult::Err => {
break;
}
}
}
if row_cnt == 0 {
continue;
}
let finfo = self.ctx.file_info(lntab_row.file_idx as usize)?;
let dirname = self.ctx.get_str(finfo.directory as usize)?;
let filename = self.ctx.get_str(finfo.filename as usize)?;
let path = Path::new(dirname).join(filename).to_str()?.to_string();
return Some(AddressLineInfo {
path,
line_no: lntab_row.file_line as usize,
column: 0,
});
}
None
}
fn addr_file_off(&self, _addr: u64) -> Option<u64> {
// Unavailable
None
}
fn get_obj_file_name(&self) -> &Path {
&self.file_name
}
fn repr(&self) -> String {
format!("GSYM {:?}", self.file_name)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::env;
/// Make sure that we can find file line information for a function, if available.
#[test]
fn test_find_line_info() {
let test_gsym = Path::new(&env!("CARGO_MANIFEST_DIR"))
.join("data")
.join("test.gsym");
let resolver = GsymResolver::new(test_gsym, 0).unwrap();
// `main` resides at address 0x2000000, and it's located at line 19.
let info = resolver.find_line_info(0x2000000).unwrap();
assert_eq!(info.line_no, 19);
assert!(info.path.ends_with("test-gsym.c"));
// `factorial` resides at address 0x2000100, and it's located at line 7.
let info = resolver.find_line_info(0x2000100).unwrap();
assert_eq!(info.line_no, 7);
assert!(info.path.ends_with("test-gsym.c"));
}
}

46
third_party/blazesym/src/gsym/types.rs vendored Normal file
View File

@@ -0,0 +1,46 @@
pub const GSYM_MAGIC: u32 = 0x4753594d;
pub const GSYM_VERSION: u16 = 1;
/// The size of address data offsets in GSYM.
pub const ADDR_DATA_OFFSET_SIZE: usize = 4;
/// The size of a GSYM `FileInfo` object.
pub const FILE_INFO_SIZE: usize = 8;
/// GSYM File Header
pub struct Header {
pub magic: u32,
pub version: u16,
pub addr_off_size: u8,
pub uuid_size: u8,
pub base_address: u64,
pub num_addrs: u32,
pub strtab_offset: u32,
pub strtab_size: u32,
pub uuid: [u8; 20],
}
pub struct FileInfo {
pub directory: u32,
pub filename: u32,
}
pub struct AddressInfo<'a> {
pub size: u32,
pub name: u32,
/// The raw data comprises a list of [`AddressData`].
pub data: &'a [u8],
}
pub struct AddressData<'a> {
/// The data type. Its value should be one of InfoType*.
pub typ: u32,
pub length: u32,
pub data: &'a [u8],
}
#[allow(non_upper_case_globals)]
pub const InfoTypeEndOfList: u32 = 0;
#[allow(non_upper_case_globals)]
pub const InfoTypeLineTableInfo: u32 = 1;
#[allow(non_upper_case_globals)]
pub const InfoTypeInlineInfo: u32 = 2;

415
third_party/blazesym/src/ksym.rs vendored Normal file
View File

@@ -0,0 +1,415 @@
use super::{FindAddrOpts, SymbolInfo, SymbolType};
use std::cell::RefCell;
use std::collections::HashMap;
use std::default::Default;
use std::fs::File;
use std::io::{BufRead, BufReader, Error};
use std::path::{Path, PathBuf};
use std::rc::Rc;
use std::u64;
use crate::SymResolver;
use regex::Regex;
const KALLSYMS: &str = "/proc/kallsyms";
const DFL_KSYM_CAP: usize = 200000;
pub struct Ksym {
pub addr: u64,
pub name: String,
}
/// The symbol resolver for /proc/kallsyms.
///
/// The users should provide the path of kallsyms, so you can provide
/// a copy from other devices.
pub struct KSymResolver {
syms: Vec<Ksym>,
sym_to_addr: RefCell<HashMap<&'static str, u64>>,
file_name: PathBuf,
}
impl KSymResolver {
pub fn new() -> KSymResolver {
Default::default()
}
pub fn load_file_name(&mut self, filename: PathBuf) -> Result<(), std::io::Error> {
let f = File::open(&filename)?;
let mut reader = BufReader::new(f);
let mut line = String::new();
while let Ok(sz) = reader.read_line(&mut line) {
if sz == 0 {
break;
}
let tokens: Vec<&str> = line.split_whitespace().collect();
if tokens.len() < 3 {
break;
}
let (addr, _symbol, func) = (tokens[0], tokens[1], tokens[2]);
if let Ok(addr) = u64::from_str_radix(addr, 16) {
if addr == 0 {
line.truncate(0);
continue;
}
let name = String::from(func);
self.syms.push(Ksym { addr, name });
}
line.truncate(0);
}
self.syms.sort_by(|a, b| a.addr.cmp(&b.addr));
self.file_name = filename;
Ok(())
}
pub fn load(&mut self) -> Result<(), std::io::Error> {
self.load_file_name(PathBuf::from(KALLSYMS))
}
fn ensure_sym_to_addr(&self) {
if self.sym_to_addr.borrow().len() > 0 {
return;
}
let mut sym_to_addr = self.sym_to_addr.borrow_mut();
for Ksym { name, addr } in self.syms.iter() {
// Performance & lifetime hacking
let name_static = unsafe { &*(name as *const String) };
sym_to_addr.insert(name_static, *addr);
}
}
pub fn find_addresses_ksym(&self, addr: u64) -> impl Iterator<Item = &Ksym> {
let mut l = 0;
let mut r = self.syms.len();
while l < r {
let m = (l + r) / 2;
let sym = &self.syms[m];
if addr < sym.addr {
r = m;
} else {
l = m + 1;
}
}
debug_assert!(
(l == 0 || l >= self.syms.len())
|| (self.syms[l - 1].addr <= addr && addr < self.syms[l].addr)
);
self.syms[0..l]
.iter()
.rev()
.take_while(move |sym| sym.addr == self.syms[l - 1].addr)
}
#[cfg(test)]
pub fn find_addresses_ksym_simple(&self, addr: u64) -> impl Iterator<Item = &Ksym> {
let mut i = 0;
while i < self.syms.len() && addr >= self.syms[i].addr {
i += 1;
}
self.syms[..i]
.iter()
.rev()
.take_while(move |x| x.addr == self.syms[i - 1].addr)
}
}
impl Default for KSymResolver {
fn default() -> Self {
KSymResolver {
syms: Vec::with_capacity(DFL_KSYM_CAP),
sym_to_addr: RefCell::new(HashMap::new()),
file_name: PathBuf::from(""),
}
}
}
impl SymResolver for KSymResolver {
fn get_address_range(&self) -> (u64, u64) {
(0xffffffff80000000, 0xffffffffffffffff)
}
fn find_symbols(&self, addr: u64) -> Vec<(&str, u64)> {
self.find_addresses_ksym(addr)
.map(|sym| (sym.name.as_str(), sym.addr))
.collect()
}
fn find_address(&self, name: &str, opts: &FindAddrOpts) -> Option<Vec<SymbolInfo>> {
if let SymbolType::Variable = opts.sym_type {
return None;
}
self.ensure_sym_to_addr();
if let Some(addr) = self.sym_to_addr.borrow().get(name) {
return Some(vec![SymbolInfo {
name: name.to_string(),
address: *addr,
size: 0,
sym_type: SymbolType::Function,
..Default::default()
}]);
}
None
}
fn find_address_regex(&self, pattern: &str, opts: &FindAddrOpts) -> Option<Vec<SymbolInfo>> {
if let SymbolType::Variable = opts.sym_type {
return None;
}
self.ensure_sym_to_addr();
let re = Regex::new(pattern).unwrap();
let mut syms = vec![];
for (name, addr) in self.sym_to_addr.borrow().iter() {
if re.is_match(name) {
syms.push(SymbolInfo {
name: name.to_string(),
address: *addr,
size: 0,
sym_type: SymbolType::Function,
..Default::default()
});
}
}
Some(syms)
}
fn find_line_info(&self, _addr: u64) -> Option<super::AddressLineInfo> {
None
}
fn addr_file_off(&self, _addr: u64) -> Option<u64> {
None
}
fn get_obj_file_name(&self) -> &Path {
&self.file_name
}
fn repr(&self) -> String {
String::from("KSymResolver")
}
}
/// Cache of KSymResolver.
///
/// It returns the same isntance if path is the same.
pub struct KSymCache {
resolvers: RefCell<HashMap<PathBuf, Rc<KSymResolver>>>,
}
impl KSymCache {
pub fn new() -> KSymCache {
KSymCache {
resolvers: RefCell::new(HashMap::new()),
}
}
/// Find an instance of KSymResolver from the cache or create a new one.
pub fn get_resolver(&self, path: &Path) -> Result<Rc<KSymResolver>, Error> {
let mut resolvers = self.resolvers.borrow_mut();
if let Some(resolver) = resolvers.get(path) {
return Ok(resolver.clone());
}
let mut resolver = Rc::new(KSymResolver::new());
Rc::get_mut(&mut resolver)
.unwrap()
.load_file_name(path.to_path_buf())?;
resolvers.insert(path.to_path_buf(), resolver.clone());
Ok(resolver)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::cmp::Ordering;
// This test case is skipped by default for /proc/kallsyms may
// not available in some environment.
#[test]
#[ignore = "system-dependent; may fail"]
fn ksym_resolver_load_find() {
let mut resolver = KSymResolver::new();
assert!(resolver.load().is_ok());
assert!(
resolver.syms.len() > 10000,
"kallsyms seems to be unavailable or with all 0 addresses. (Check /proc/kallsyms)"
);
// Find the address of the symbol placed at the middle
let sym = &resolver.syms[resolver.syms.len() / 2];
let addr = sym.addr;
let name = sym.name.clone();
let found = resolver.find_symbols(addr);
assert!(!found.is_empty());
assert!(found.iter().any(|x| x.0 == name));
let addr = addr + 1;
let found = resolver.find_symbols(addr);
assert!(!found.is_empty());
assert!(found.iter().any(|x| x.0 == name));
// 0 is an invalid address. We remove all symbols with 0 as
// thier address from the list.
let found = resolver.find_symbols(0);
assert!(found.is_empty());
// Find the address of the last symbol
let sym = &resolver.syms.last().unwrap();
let addr = sym.addr;
let name = sym.name.clone();
let found = resolver.find_symbols(addr);
assert!(!found.is_empty());
assert!(found.iter().any(|x| x.0 == name));
let found = resolver.find_symbols(addr + 1);
assert!(!found.is_empty());
assert!(found.iter().any(|x| x.0 == name));
// Find the symbol placed at the one third
let sym = &resolver.syms[resolver.syms.len() / 3];
let addr = sym.addr;
let name = sym.name.clone();
let opts = FindAddrOpts {
offset_in_file: false,
obj_file_name: false,
sym_type: SymbolType::Function,
};
let found = resolver.find_address(&name, &opts);
assert!(found.is_some());
assert!(found.unwrap().iter().any(|x| x.address == addr));
}
#[test]
fn ksym_cache() {
let cache = KSymCache::new();
let resolver = cache.get_resolver(Path::new(KALLSYMS));
let resolver1 = cache.get_resolver(Path::new(KALLSYMS));
assert!(resolver.is_ok());
assert!(resolver1.is_ok());
}
#[test]
fn find_addresses_ksym() {
let mut resolver = KSymResolver::new();
resolver.syms = vec![
Ksym {
addr: 0x123,
name: "1".to_string(),
},
Ksym {
addr: 0x123,
name: "1.5".to_string(),
},
Ksym {
addr: 0x1234,
name: "2".to_string(),
},
Ksym {
addr: 0x12345,
name: "3".to_string(),
},
];
// The address is less than the smallest address of all symbols.
assert!(resolver.find_addresses_ksym(1).next().is_none());
// The address match symbols exactly (the first address.)
let syms = resolver.find_addresses_ksym(0x123).collect::<Vec<_>>();
assert_eq!(syms.len(), 2);
assert_eq!(syms[0].addr, 0x123);
assert_eq!(syms[0].name, "1.5");
assert_eq!(syms[1].addr, 0x123);
assert_eq!(syms[1].name, "1");
// The address is in between two symbols (the first address.)
let syms = resolver.find_addresses_ksym(0x124).collect::<Vec<_>>();
assert_eq!(syms.len(), 2);
assert_eq!(syms[0].addr, 0x123);
assert_eq!(syms[0].name, "1.5");
assert_eq!(syms[1].addr, 0x123);
assert_eq!(syms[1].name, "1");
// The address match symbols exactly.
let syms = resolver.find_addresses_ksym(0x1234).collect::<Vec<_>>();
assert_eq!(syms.len(), 1);
assert_eq!(syms[0].addr, 0x1234);
assert_eq!(syms[0].name, "2");
// The address is in between two symbols.
let syms = resolver.find_addresses_ksym(0x1235).collect::<Vec<_>>();
assert_eq!(syms.len(), 1);
assert_eq!(syms[0].addr, 0x1234);
assert_eq!(syms[0].name, "2");
// The address match symbols exactly (the biggest address.)
let syms = resolver.find_addresses_ksym(0x12345).collect::<Vec<_>>();
assert_eq!(syms.len(), 1);
assert_eq!(syms[0].addr, 0x12345);
assert_eq!(syms[0].name, "3");
// The address is bigger than the biggest address of all symbols.
let syms = resolver.find_addresses_ksym(0x1234568).collect::<Vec<_>>();
assert_eq!(syms.len(), 1);
assert_eq!(syms[0].addr, 0x12345);
assert_eq!(syms[0].name, "3");
}
#[test]
fn find_addresses_ksym_exhaust() {
let syms_sz = 10;
let mut resolver = KSymResolver::new();
resolver.syms = (0..syms_sz)
.map(|x| Ksym {
addr: 1,
name: x.to_string(),
})
.collect();
// A full-adder has a carry-out signal, right?
// Yes! Here it is.
let raised_carry_out = |addr| addr > syms_sz as u64;
while !raised_carry_out(resolver.syms[0].addr) {
// Test find_addresses_ksym() against every address in the
// range [0..syms_sz+1].
for i in 0..=(syms_sz + 1) {
let result: Vec<_> = resolver.find_addresses_ksym(i as u64).collect();
let result_s: Vec<_> = resolver.find_addresses_ksym_simple(i as u64).collect();
assert_eq!(result.len(), result_s.len());
assert_eq!(
result
.iter()
.map(|x| x.name.as_str())
.cmp(result_s.iter().map(|x| x.name.as_str())),
Ordering::Equal
);
}
let mut i = syms_sz - 1;
// Increase the address of the last symbol.
resolver.syms[i].addr += 1;
while i > 0 && raised_carry_out(resolver.syms[i].addr) {
// Bring the raised carry-out it to the left.
i -= 1;
resolver.syms[i].addr += 1;
}
// Every symbol on the right side have a raised carry-out.
// Reset their addresses.
let low_addr = resolver.syms[i].addr;
while i < (syms_sz - 1) {
i += 1;
resolver.syms[i].addr = low_addr;
}
}
}
}

1058
third_party/blazesym/src/lib.rs vendored Normal file

File diff suppressed because it is too large Load Diff

444
third_party/blazesym/src/util.rs vendored Normal file
View File

@@ -0,0 +1,444 @@
use std::ffi::CStr;
use std::fs;
use std::io::{BufRead, BufReader, Error, ErrorKind};
use std::mem::size_of;
use std::path::PathBuf;
use regex::Regex;
pub fn search_address_key<T, V: Ord>(
data: &[T],
address: V,
keyfn: &dyn Fn(&T) -> V,
) -> Option<usize> {
let mut left = 0;
let mut right = data.len();
if right == 0 {
return None;
}
if address < keyfn(&data[0]) {
return None;
}
while (left + 1) < right {
let v = (left + right) / 2;
let key = keyfn(&data[v]);
if key == address {
return Some(v);
}
if address < key {
right = v;
} else {
left = v;
}
}
Some(left)
}
/// Do binary search but skip entries not having a key.
pub fn search_address_opt_key<T, V: Ord>(
data: &[T],
address: V,
keyfn: &dyn Fn(&T) -> Option<V>,
) -> Option<usize> {
let mut left = 0;
let mut right = data.len();
while left < right {
let left_key = keyfn(&data[left]);
if left_key.is_some() {
break;
}
left += 1;
}
if left == right {
return None;
}
if address < keyfn(&data[left]).unwrap() {
return None;
}
while (left + 1) < right {
let mut v = (left + right) / 2;
let v_saved = v;
// Skip entries not having a key
while v < right {
let key = keyfn(&data[v]);
if key.is_some() {
break;
}
v += 1;
}
// All entries at the right side haven't keys.
// Shrink to the left side.
if v == right {
right = v_saved;
continue;
}
let key = keyfn(&data[v]).unwrap();
if key == address {
return Some(v);
}
if address < key {
right = v;
} else {
left = v;
}
}
Some(left)
}
pub fn extract_string(raw: &[u8], off: usize) -> Option<&str> {
let mut end = off;
if off >= raw.len() {
return None;
}
while end < raw.len() && raw[end] != 0 {
end += 1;
}
if end >= raw.len() {
return None;
}
CStr::from_bytes_with_nul(&raw[off..=end])
.ok()?
.to_str()
.ok()
}
pub struct LinuxMapsEntry {
pub loaded_address: u64,
pub end_address: u64,
pub mode: u8,
pub offset: u64,
pub path: PathBuf,
}
pub fn parse_maps(pid: u32) -> Result<Vec<LinuxMapsEntry>, Error> {
let mut entries = Vec::<LinuxMapsEntry>::new();
let file_name = if pid == 0 {
String::from("/proc/self/maps")
} else {
format!("/proc/{pid}/maps")
};
let file = fs::File::open(file_name)?;
let mut reader = BufReader::new(file);
let mut line = String::new();
let re_ptn = Regex::new(
r"^([0-9a-f]+)-([0-9a-f]+) ([rwxp\\-]+) ([0-9a-f]+) [0-9a-f]+:[0-9a-f]+ [0-9]+ *((/[^/]+)+)$",
);
if re_ptn.is_err() {
println!("{re_ptn:?}");
return Err(Error::new(ErrorKind::InvalidData, "Failed to build regex"));
}
let re_ptn = re_ptn.unwrap();
while reader.read_line(&mut line)? > 0 {
if let Some(caps) = re_ptn.captures(&line) {
let loaded_address_str = caps.get(1).unwrap().as_str();
let loaded_address = u64::from_str_radix(loaded_address_str, 16).unwrap();
let end_address_str = caps.get(2).unwrap().as_str();
let end_address = u64::from_str_radix(end_address_str, 16).unwrap();
let mode_str = caps.get(3).unwrap().as_str();
let mut mode = 0;
for c in mode_str.chars() {
mode = (mode << 1) | u8::from(c != '-');
}
let offset = u64::from_str_radix(caps.get(4).unwrap().as_str(), 16).unwrap();
let path = caps.get(5).unwrap().as_str().strip_suffix('\n').unwrap();
let mut path_str = path.to_string();
if let Some(pos) = path.rfind(" (deleted)") {
if pos == path.len() - " (deleted)".len() {
path_str = format!("/proc/{pid}/map_files/{loaded_address:x}-{end_address:x}");
}
}
let entry = LinuxMapsEntry {
loaded_address,
end_address,
mode,
offset,
path: PathBuf::from(path_str),
};
entries.push(entry);
}
line.clear();
}
Ok(entries)
}
#[inline]
pub fn decode_leb128_128(mut data: &[u8]) -> Option<(u128, u8)> {
data.read_u128_leb128()
}
#[inline]
pub fn decode_leb128(mut data: &[u8]) -> Option<(u64, u8)> {
data.read_u128_leb128().map(|(v, s)| (v as u64, s))
}
#[inline]
pub fn decode_leb128_s(mut data: &[u8]) -> Option<(i64, u8)> {
data.read_i128_leb128().map(|(v, s)| (v as i64, s))
}
#[inline]
pub fn decode_uhalf(mut data: &[u8]) -> u16 {
// TODO: Need to handle errors more gracefully.
data.read_u16().unwrap()
}
#[inline]
pub fn decode_uword(mut data: &[u8]) -> u32 {
// TODO: Need to handle errors more gracefully.
data.read_u32().unwrap()
}
#[inline]
pub fn decode_udword(mut data: &[u8]) -> u64 {
// TODO: Need to handle errors more gracefully.
data.read_u64().unwrap()
}
mod sealed {
/// A marker trait for "plain old data" data types.
///
/// # Safety
/// Only safe to implement for types that are valid for any bit pattern.
pub unsafe trait Pod {}
unsafe impl Pod for i8 {}
unsafe impl Pod for u8 {}
unsafe impl Pod for i16 {}
unsafe impl Pod for u16 {}
unsafe impl Pod for i32 {}
unsafe impl Pod for u32 {}
unsafe impl Pod for i64 {}
unsafe impl Pod for u64 {}
unsafe impl Pod for i128 {}
unsafe impl Pod for u128 {}
}
/// An trait providing utility functions for reading data from a byte buffer.
pub trait ReadRaw<'data> {
/// Ensure that `len` bytes are available for consumption.
fn ensure(&self, len: usize) -> Option<()>;
/// Consume and return `len` bytes.
fn read_slice(&mut self, len: usize) -> Option<&'data [u8]>;
/// Read a NUL terminated string.
fn read_cstr(&mut self) -> Option<&'data CStr>;
/// Read anything implementing `Pod`.
#[inline]
fn read_pod<T>(&mut self) -> Option<T>
where
T: sealed::Pod,
{
let data = self.read_slice(size_of::<T>())?;
// SAFETY: `T` is `Pod` and hence valid for any bit pattern. The pointer
// is guaranteed to be valid and to point to memory of at least
// `sizeof(T)` bytes.
let value = unsafe { data.as_ptr().cast::<T>().read_unaligned() };
Some(value)
}
/// Read a `u8` value.
#[inline]
fn read_u8(&mut self) -> Option<u8> {
self.read_pod::<u8>()
}
/// Read a `i16` value.
#[inline]
fn read_i16(&mut self) -> Option<i16> {
self.read_pod::<i16>()
}
/// Read a `u16` value.
#[inline]
fn read_u16(&mut self) -> Option<u16> {
self.read_pod::<u16>()
}
/// Read a `i32` value.
#[inline]
fn read_i32(&mut self) -> Option<i32> {
self.read_pod::<i32>()
}
/// Read a `u32` value.
#[inline]
fn read_u32(&mut self) -> Option<u32> {
self.read_pod::<u32>()
}
/// Read a `u64` value.
#[inline]
fn read_u64(&mut self) -> Option<u64> {
self.read_pod::<u64>()
}
/// Read a `u128` encoded as unsigned variable length little endian base 128
/// value.
///
/// The function returns the value read along with the number of bytes
/// consumed.
fn read_u128_leb128(&mut self) -> Option<(u128, u8)> {
let mut shift = 0;
let mut value = 0u128;
while let Some(bytes) = self.read_slice(1) {
if let [byte] = bytes {
value |= ((byte & 0b0111_1111) as u128) << shift;
shift += 7;
if (byte & 0b1000_0000) == 0 {
return Some((value, shift / 7));
}
} else {
unreachable!()
}
}
None
}
/// Read a `u128` encoded as signed variable length little endian base 128
/// value.
///
/// The function returns the value read along with the number of bytes
/// consumed.
fn read_i128_leb128(&mut self) -> Option<(i128, u8)> {
let (value, shift) = self.read_u128_leb128()?;
let sign_bits = 128 - shift * 7;
let value = ((value as i128) << sign_bits) >> sign_bits;
Some((value, shift))
}
}
impl<'data> ReadRaw<'data> for &'data [u8] {
#[inline]
fn ensure(&self, len: usize) -> Option<()> {
if len > self.len() {
return None;
}
Some(())
}
#[inline]
fn read_slice(&mut self, len: usize) -> Option<&'data [u8]> {
self.ensure(len)?;
let (a, b) = self.split_at(len);
*self = b;
Some(a)
}
#[inline]
fn read_cstr(&mut self) -> Option<&'data CStr> {
let idx = self.iter().position(|byte| *byte == b'\0')?;
CStr::from_bytes_with_nul(self.read_slice(idx + 1)?).ok()
}
}
#[cfg(test)]
mod tests {
use super::*;
/// Make sure that `[u8]::ensure` works as expected.
#[test]
fn u8_slice_len_ensurance() {
let slice = [0u8; 0].as_slice();
assert_eq!(slice.ensure(0), Some(()));
assert_eq!(slice.ensure(1), None);
let slice = [1u8].as_slice();
assert_eq!(slice.ensure(0), Some(()));
assert_eq!(slice.ensure(1), Some(()));
assert_eq!(slice.ensure(2), None);
}
/// Check that we can read various integers from a slice.
#[test]
fn pod_reading() {
macro_rules! test {
($type:ty) => {{
let max = <$type>::MAX.to_ne_bytes();
let one = (1 as $type).to_ne_bytes();
let mut data = Vec::new();
let () = data.extend_from_slice(&max);
let () = data.extend_from_slice(&one);
let () = data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
let mut raw = data.as_slice();
let uword = raw.read_pod::<$type>().unwrap();
assert_eq!(uword, <$type>::MAX);
let uword = raw.read_pod::<$type>().unwrap();
assert_eq!(uword, 1);
}};
}
test!(i8);
test!(u8);
test!(i16);
test!(u16);
test!(i32);
test!(u32);
test!(i64);
test!(u64);
test!(i128);
test!(u128);
}
/// Test reading of signed and unsigned 16 and 32 bit values against known
/// results.
#[test]
fn word_reading() {
let data = 0xf936857fu32.to_ne_bytes();
assert_eq!(data.as_slice().read_u16().unwrap(), 0x857f);
assert_eq!(data.as_slice().read_i16().unwrap(), -31361);
assert_eq!(data.as_slice().read_u32().unwrap(), 0xf936857f);
assert_eq!(data.as_slice().read_i32().unwrap(), -113867393);
}
/// Make sure that we can read leb128 encoded values.
#[test]
fn leb128_reading() {
let data = [0xf4, 0xf3, 0x75];
let (v, s) = data.as_slice().read_u128_leb128().unwrap();
assert_eq!(v, 0x1d79f4);
assert_eq!(s, 3);
let (v, s) = data.as_slice().read_i128_leb128().unwrap();
assert_eq!(v, -165388);
assert_eq!(s, 3);
}
/// Check that we can read a NUL terminated string from a slice.
#[test]
fn cstr_reading() {
let mut slice = b"abc\x001234".as_slice();
let cstr = slice.read_cstr().unwrap();
assert_eq!(cstr, CStr::from_bytes_with_nul(b"abc\0").unwrap());
// No terminating NUL byte.
let mut slice = b"abc".as_slice();
assert_eq!(slice.read_cstr(), None);
}
}