mirror of
https://github.com/eunomia-bpf/bpf-developer-tutorial.git
synced 2026-02-07 04:14:16 +08:00
470 lines
15 KiB
Rust
470 lines
15 KiB
Rust
//! Parser of GSYM format.
|
|
//!
|
|
//! The layout of a standalone GSYM contains following sections in the order.
|
|
//!
|
|
//! * Header
|
|
//! * Address Table
|
|
//! * Address Data Offset Table
|
|
//! * File Table
|
|
//! * String Table
|
|
//! * Address Data
|
|
//!
|
|
//! The standalone GSYM starts with a Header, which describes the
|
|
//! size of an entry in the address table, the number of entries in
|
|
//! the address table, and the location and the size of the string
|
|
//! table.
|
|
//!
|
|
//! Since the Address Table is immediately after the Header, the
|
|
//! Header describes only the size of an entry and number of entries
|
|
//! in the table but not where it is. The Address Table comprises
|
|
//! addresses of symbols in the ascending order, so we can find the
|
|
//! symbol an address belonging to by doing a binary search to find
|
|
//! the most close address but smaller or equal.
|
|
//!
|
|
//! The Address Data Offset Table has the same number of entries as
|
|
//! the Address Table. Every entry in one table will has
|
|
//! corresponding entry at the same offset in the other table. The
|
|
//! entries in the Address Data Offset Table are always 32bits
|
|
//! (4bytes.) It is the file offset to the respective Address
|
|
//! Data. (AddressInfo actually)
|
|
//!
|
|
//! An AddressInfo comprises the size and name of a symbol. The name
|
|
//! is an offset in the string table. You will find a null terminated
|
|
//! C string at the give offset. The size is the number of bytes of
|
|
//! the respective object; ex, a function or variable.
|
|
//!
|
|
//! See <https://reviews.llvm.org/D53379>
|
|
|
|
use std::ffi::CStr;
|
|
use std::io::{Error, ErrorKind};
|
|
|
|
use crate::util::decode_leb128;
|
|
use crate::util::decode_leb128_s;
|
|
use crate::util::decode_udword;
|
|
use crate::util::decode_uhalf;
|
|
use crate::util::decode_uword;
|
|
|
|
use super::linetab::LineTableHeader;
|
|
use super::types::AddressData;
|
|
use super::types::AddressInfo;
|
|
use super::types::FileInfo;
|
|
use super::types::Header;
|
|
use super::types::InfoTypeEndOfList;
|
|
use super::types::InfoTypeInlineInfo;
|
|
use super::types::InfoTypeLineTableInfo;
|
|
use super::types::ADDR_DATA_OFFSET_SIZE;
|
|
use super::types::FILE_INFO_SIZE;
|
|
use super::types::GSYM_MAGIC;
|
|
use super::types::GSYM_VERSION;
|
|
|
|
/// Hold the major parts of a standalone GSYM file.
|
|
///
|
|
/// GsymContext provides functions to access major entities in GSYM.
|
|
/// GsymContext can find respective AddressInfo for an address. But,
|
|
/// it doesn't parse AddressData to get line numbers.
|
|
///
|
|
/// The developers should use [`parse_address_data()`],
|
|
/// [`parse_line_table_header()`], and [`linetab::run_op()`] to get
|
|
/// line number information from [`AddressInfo`].
|
|
pub struct GsymContext<'a> {
|
|
header: Header,
|
|
addr_tab: &'a [u8],
|
|
addr_data_off_tab: &'a [u8],
|
|
file_tab: &'a [u8],
|
|
str_tab: &'a [u8],
|
|
raw_data: &'a [u8],
|
|
}
|
|
|
|
impl<'a> GsymContext<'a> {
|
|
/// Parse the Header of a standalone GSYM file.
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `data` - is the content of a standalone GSYM.
|
|
///
|
|
/// Returns a GsymContext, which includes the Header and other important tables.
|
|
pub fn parse_header(data: &[u8]) -> Result<GsymContext, Error> {
|
|
let mut off = 0;
|
|
// Parse Header
|
|
let magic = decode_uword(data);
|
|
if magic != GSYM_MAGIC {
|
|
return Err(Error::new(ErrorKind::InvalidData, "invalid magic number"));
|
|
}
|
|
off += 4;
|
|
let version = decode_uhalf(&data[off..]);
|
|
if version != GSYM_VERSION {
|
|
return Err(Error::new(ErrorKind::InvalidData, "unknown version number"));
|
|
}
|
|
off += 2;
|
|
let addr_off_size = data[off];
|
|
off += 1;
|
|
let uuid_size = data[off];
|
|
off += 1;
|
|
let base_address = decode_udword(&data[off..]);
|
|
off += 8;
|
|
let num_addrs = decode_uword(&data[off..]);
|
|
off += 4;
|
|
let strtab_offset = decode_uword(&data[off..]);
|
|
off += 4;
|
|
let strtab_size = decode_uword(&data[off..]);
|
|
off += 4;
|
|
let uuid: [u8; 20] = (&data[off..(off + 20)])
|
|
.try_into()
|
|
.expect("input data is too short");
|
|
off += 20;
|
|
|
|
// Get the slices of the Address Table, Address Data Offset Table,
|
|
// and String table.
|
|
let end_off = off + num_addrs as usize * addr_off_size as usize;
|
|
if end_off > data.len() {
|
|
return Err(Error::new(
|
|
ErrorKind::InvalidData,
|
|
"the size of the file is smaller than expectation (address table)",
|
|
));
|
|
}
|
|
let addr_tab = &data[off..end_off];
|
|
off = (end_off + 0x3) & !0x3;
|
|
let end_off = off + num_addrs as usize * ADDR_DATA_OFFSET_SIZE;
|
|
if end_off > data.len() {
|
|
return Err(Error::new(
|
|
ErrorKind::InvalidData,
|
|
"the size of the file is smaller than expectation (address data offset table)",
|
|
));
|
|
}
|
|
let addr_data_off_tab = &data[off..end_off];
|
|
off += num_addrs as usize * ADDR_DATA_OFFSET_SIZE;
|
|
let file_num = decode_uword(&data[off..]);
|
|
off += 4;
|
|
let end_off = off + file_num as usize * FILE_INFO_SIZE;
|
|
if end_off > data.len() {
|
|
return Err(Error::new(
|
|
ErrorKind::InvalidData,
|
|
"the size of the file is smaller than expectation (file table)",
|
|
));
|
|
}
|
|
let file_tab = &data[off..end_off];
|
|
let end_off = strtab_offset as usize + strtab_size as usize;
|
|
if end_off > data.len() {
|
|
return Err(Error::new(
|
|
ErrorKind::InvalidData,
|
|
"the size of the file is smaller than expectation (string table)",
|
|
));
|
|
}
|
|
let str_tab = &data[strtab_offset as usize..end_off];
|
|
|
|
Ok(GsymContext {
|
|
header: Header {
|
|
magic,
|
|
version,
|
|
addr_off_size,
|
|
uuid_size,
|
|
base_address,
|
|
num_addrs,
|
|
strtab_offset,
|
|
strtab_size,
|
|
uuid,
|
|
},
|
|
addr_tab,
|
|
addr_data_off_tab,
|
|
file_tab,
|
|
str_tab,
|
|
raw_data: data,
|
|
})
|
|
}
|
|
|
|
pub fn num_addresses(&self) -> usize {
|
|
self.header.num_addrs as usize
|
|
}
|
|
|
|
/// Get the address of an entry in the Address Table.
|
|
pub fn addr_at(&self, idx: usize) -> Option<u64> {
|
|
if idx >= self.header.num_addrs as usize {
|
|
return None;
|
|
}
|
|
|
|
let off = idx * self.header.addr_off_size as usize;
|
|
let mut addr = 0u64;
|
|
let mut shift = 0;
|
|
for d in &self.addr_tab[off..(off + self.header.addr_off_size as usize)] {
|
|
addr |= (*d as u64) << shift;
|
|
shift += 8;
|
|
}
|
|
addr += self.header.base_address;
|
|
Some(addr)
|
|
}
|
|
|
|
/// Get the AddressInfo of an address given by an index.
|
|
pub fn addr_info(&self, idx: usize) -> Option<AddressInfo> {
|
|
if idx >= self.header.num_addrs as usize {
|
|
return None;
|
|
}
|
|
|
|
let off = idx * ADDR_DATA_OFFSET_SIZE;
|
|
let ad_off = decode_uword(&self.addr_data_off_tab[off..]) as usize;
|
|
let size = decode_uword(&self.raw_data[ad_off..]);
|
|
let name = decode_uword(&self.raw_data[ad_off + 4..]);
|
|
let info = AddressInfo {
|
|
size,
|
|
name,
|
|
data: &self.raw_data[ad_off + 8..],
|
|
};
|
|
|
|
Some(info)
|
|
}
|
|
|
|
/// Get the string at the given offset from the String Table.
|
|
pub fn get_str(&self, off: usize) -> Option<&str> {
|
|
if off >= self.str_tab.len() {
|
|
return None;
|
|
}
|
|
|
|
// Ensure there is a null byte.
|
|
let mut null_off = self.str_tab.len() - 1;
|
|
while null_off > off && self.str_tab[null_off] != 0 {
|
|
null_off -= 1;
|
|
}
|
|
if null_off == off {
|
|
return Some("");
|
|
}
|
|
|
|
// SAFETY: the lifetime of `CStr` can live as long as `self`.
|
|
// The returned reference can also live as long as `self`.
|
|
unsafe {
|
|
CStr::from_ptr(self.str_tab[off..].as_ptr() as *const i8)
|
|
.to_str()
|
|
.ok()
|
|
}
|
|
}
|
|
|
|
pub fn file_info(&self, idx: usize) -> Option<FileInfo> {
|
|
if idx >= self.file_tab.len() / FILE_INFO_SIZE {
|
|
return None;
|
|
}
|
|
let mut off = idx * FILE_INFO_SIZE;
|
|
let directory = decode_uword(&self.file_tab[off..(off + 4)]);
|
|
off += 4;
|
|
let filename = decode_uword(&self.file_tab[off..(off + 4)]);
|
|
let info = FileInfo {
|
|
directory,
|
|
filename,
|
|
};
|
|
Some(info)
|
|
}
|
|
}
|
|
|
|
/// Find the index of an entry in the address table most likely
|
|
/// containing the given address.
|
|
///
|
|
/// The callers should check the respective `AddressInfo` to make sure
|
|
/// it is what they request for.
|
|
pub fn find_address(ctx: &GsymContext, addr: u64) -> Option<usize> {
|
|
let mut left = 0;
|
|
let mut right = ctx.num_addresses();
|
|
|
|
if right == 0 {
|
|
return None;
|
|
}
|
|
if addr < ctx.addr_at(0)? {
|
|
return None;
|
|
}
|
|
|
|
while (left + 1) < right {
|
|
let v = (left + right) / 2;
|
|
let cur_addr = ctx.addr_at(v)?;
|
|
|
|
if addr == cur_addr {
|
|
return Some(v);
|
|
}
|
|
if addr < cur_addr {
|
|
right = v;
|
|
} else {
|
|
left = v;
|
|
}
|
|
}
|
|
Some(left)
|
|
}
|
|
|
|
/// Parse AddressData.
|
|
///
|
|
/// AddressDatas are items following AndressInfo.
|
|
/// [`GsymContext::addr_info()`] returns the raw data of AddressDatas as a
|
|
/// slice at [`AddressInfo::data`].
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `data` - is the slice from AddressInfo::data.
|
|
///
|
|
/// Returns a vector of [`AddressData`].
|
|
pub fn parse_address_data(data: &[u8]) -> Vec<AddressData> {
|
|
let mut data_objs = vec![];
|
|
|
|
let mut off = 0;
|
|
while off < data.len() {
|
|
let typ = decode_uword(&data[off..]);
|
|
off += 4;
|
|
let length = decode_uword(&data[off..]);
|
|
off += 4;
|
|
let d = &data[off..(off + length as usize)];
|
|
data_objs.push(AddressData {
|
|
typ,
|
|
length,
|
|
data: d,
|
|
});
|
|
off += length as usize;
|
|
|
|
#[allow(non_upper_case_globals)]
|
|
match typ {
|
|
InfoTypeEndOfList => {
|
|
break;
|
|
}
|
|
InfoTypeLineTableInfo | InfoTypeInlineInfo => {}
|
|
_ => {
|
|
#[cfg(debug_assertions)]
|
|
eprintln!("unknown info type");
|
|
}
|
|
}
|
|
}
|
|
|
|
data_objs
|
|
}
|
|
|
|
/// Parse AddressData of InfoTypeLineTableInfo.
|
|
///
|
|
/// An `AddressData` of `InfoTypeLineTableInfo` type is a table of line numbers
|
|
/// for a symbol. `AddressData` is the payload of `AddressInfo`. One
|
|
/// `AddressInfo` may have several `AddressData` entries in its payload. Each
|
|
/// `AddressData` entry stores a type of data relates to the symbol the
|
|
/// `AddressInfo` presents.
|
|
///
|
|
/// # Arguments
|
|
///
|
|
/// * `data` - is what [`AddressData::data`] is.
|
|
///
|
|
/// Returns the `LineTableHeader` and the size of the header of a
|
|
/// `AddressData` entry of `InfoTypeLineTableInfo` type in the payload
|
|
/// of an `Addressinfo`.
|
|
pub fn parse_line_table_header(data: &[u8]) -> Option<(LineTableHeader, usize)> {
|
|
let mut off = 0;
|
|
let (min_delta, bytes) = decode_leb128_s(&data[off..])?;
|
|
off += bytes as usize;
|
|
let (max_delta, bytes) = decode_leb128_s(&data[off..])?;
|
|
off += bytes as usize;
|
|
let (first_line, bytes) = decode_leb128(&data[off..])?;
|
|
off += bytes as usize;
|
|
|
|
let header = LineTableHeader {
|
|
min_delta,
|
|
max_delta,
|
|
first_line: first_line as u32,
|
|
};
|
|
Some((header, off))
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
use std::env;
|
|
use std::fs::File;
|
|
use std::io::{Read, Write};
|
|
use std::path::Path;
|
|
|
|
|
|
#[test]
|
|
fn test_parse_context() {
|
|
let test_gsym = Path::new(&env!("CARGO_MANIFEST_DIR"))
|
|
.join("data")
|
|
.join("test.gsym");
|
|
let mut gsym_fo = File::open(test_gsym).unwrap();
|
|
let mut data = vec![];
|
|
|
|
gsym_fo.read_to_end(&mut data).unwrap();
|
|
let ctx = GsymContext::parse_header(&data).unwrap();
|
|
|
|
let idx = find_address(&ctx, 0x0000000002000000).unwrap();
|
|
let addrinfo = ctx.addr_info(idx).unwrap();
|
|
assert_eq!(ctx.get_str(addrinfo.name as usize).unwrap(), "main");
|
|
|
|
let idx = find_address(&ctx, 0x0000000002000100).unwrap();
|
|
let addrinfo = ctx.addr_info(idx).unwrap();
|
|
assert_eq!(ctx.get_str(addrinfo.name as usize).unwrap(), "factorial");
|
|
}
|
|
|
|
#[test]
|
|
fn test_find_address() {
|
|
let test_gsym = Path::new(&env!("CARGO_MANIFEST_DIR"))
|
|
.join("data")
|
|
.join("test.gsym");
|
|
let mut gsym_fo = File::open(test_gsym).unwrap();
|
|
let mut data = vec![];
|
|
|
|
const TEST_SIZE: usize = 6;
|
|
|
|
gsym_fo.read_to_end(&mut data).unwrap();
|
|
|
|
let mut addr_tab = Vec::<u8>::new();
|
|
addr_tab.resize(TEST_SIZE * 4, 0);
|
|
|
|
let mut values: Vec<u32> = (0_u32..(TEST_SIZE as u32)).collect();
|
|
|
|
let copy_to_addr_tab = |values: &[u32], addr_tab: &mut Vec<u8>| {
|
|
addr_tab.clear();
|
|
for v in values {
|
|
let r = addr_tab.write(&v.to_ne_bytes());
|
|
assert!(r.is_ok());
|
|
}
|
|
};
|
|
// Generate all possible sequences that values are in strictly
|
|
// ascending order and `< TEST_SIZE * 2`.
|
|
let gen_values = |values: &mut [u32]| {
|
|
let mut carry_out = TEST_SIZE as u32 * 2;
|
|
for i in (0..values.len()).rev() {
|
|
values[i] += 1;
|
|
if values[i] >= carry_out {
|
|
carry_out -= 1;
|
|
continue;
|
|
}
|
|
// Make all values at right side minimal and strictly
|
|
// ascending.
|
|
for j in (i + 1)..values.len() {
|
|
values[j] = values[j - 1] + 1;
|
|
}
|
|
break;
|
|
}
|
|
};
|
|
|
|
while values[0] <= TEST_SIZE as u32 {
|
|
copy_to_addr_tab(&values, &mut addr_tab);
|
|
|
|
for addr in 0..(TEST_SIZE * 2) {
|
|
let addr_tab = addr_tab.clone();
|
|
let mut ctx = GsymContext::parse_header(&data).unwrap();
|
|
ctx.header.num_addrs = TEST_SIZE as u32;
|
|
ctx.header.addr_off_size = 4;
|
|
ctx.header.base_address = 0;
|
|
ctx.addr_tab = addr_tab.as_slice();
|
|
|
|
let idx = find_address(&ctx, addr as u64).unwrap_or(0);
|
|
let addr_u32 = addr as u32;
|
|
let idx1 = match values.binary_search(&addr_u32) {
|
|
Ok(idx) => idx,
|
|
Err(idx) => {
|
|
// When the searching value is falling in
|
|
// between two values, it will return the
|
|
// index of the later one. But we want the
|
|
// earlier one.
|
|
if idx > 0 {
|
|
idx - 1
|
|
} else {
|
|
0
|
|
}
|
|
}
|
|
};
|
|
assert_eq!(idx, idx1);
|
|
}
|
|
|
|
gen_values(&mut values);
|
|
}
|
|
}
|
|
}
|