-
Notifications
You must be signed in to change notification settings - Fork 9
DWARF5 Support #87
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DWARF5 Support #87
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,27 @@ | |
#include "orc/settings.hpp" | ||
#include "orc/tracy.hpp" | ||
|
||
// | ||
// A note on SPECREF: | ||
// | ||
// SPECREF (specification references) are bookmarks to the specification where more context | ||
// can be found about what the code is doing. The reference should always take the following | ||
// format: | ||
// | ||
// `SPECREF:` version `page` abspage `(`relpage`)` [`line` lineno] [`--` comment] | ||
// | ||
// Where: | ||
// - version: The version of the DWARF spec (DWARF4, DWARF5, etc.) | ||
// - abspage: The absolute page number of the official PDF; the first page is 1. | ||
// - relpage: The page number as shown on the page itself (e.g., "Page 123") | ||
// - lineno: DWARF5 and beyond have line numbers, which go here. | ||
// - comment: An optional comment preceded by `--` that adds brief context. | ||
// If the comment is multiline the SPECREF should end with `--` | ||
// and the comment follow immediately thereafter. | ||
// | ||
// As specifications evolve this reference format should, too. | ||
// | ||
|
||
/**************************************************************************************************/ | ||
|
||
#define ORC_PRIVATE_FEATURE_PROFILE_DIE_DETAILS() (ORC_PRIVATE_FEATURE_TRACY() && 0) | ||
|
@@ -30,7 +51,7 @@ namespace { | |
|
||
/**************************************************************************************************/ | ||
|
||
std::uint32_t form_length(dw::form f, freader& s) { | ||
std::uint32_t form_length(dw::form f, freader& s, std::uint16_t version) { | ||
static constexpr std::uint32_t length_size_k{4}; // REVISIT: (fosterbrereton) 8 on 64bit DWARF | ||
|
||
auto leb_block = [&] { | ||
|
@@ -102,7 +123,22 @@ std::uint32_t form_length(dw::form f, freader& s) { | |
case dw::form::strx: | ||
return uleb128(s); // length of LEB _not_ included | ||
case dw::form::addrx: | ||
return uleb128(s); // length of LEB _not_ included | ||
if (version == 4) { | ||
return uleb128(s); // length of LEB _not_ included | ||
} else if (version == 5) { | ||
// SPECREF: DWARF5 page 45 (27) line 13 -- | ||
// addrx has changed to be a single ULEB; | ||
// therefore we need to return the length | ||
// of the ULEB instead of its value. | ||
return temp_seek(s, [&]{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are you actually capturing anything? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes - the ULEB is variable-length binary data. In this case we're reading it just to see how big it is on disk, and returning that length. |
||
const std::size_t beginning = s.tellg(); | ||
(void)uleb128(s); // do the uleb read to find out how much was read. | ||
const std::size_t end = s.tellg(); | ||
return static_cast<std::uint32_t>(end - beginning); | ||
}); | ||
} else { | ||
assert(!"unhandled DWARF version"); | ||
} | ||
case dw::form::ref_sup4: | ||
return 4; | ||
case dw::form::strp_sup: | ||
|
@@ -238,7 +274,8 @@ std::size_t die_hash(const die& d, const attribute_sequence& attributes) { | |
struct cu_header { | ||
std::uint64_t _length{0}; // 4 bytes (or 12 if extended length is used.) | ||
bool _is_64_bit{false}; | ||
std::uint16_t _version{0}; | ||
std::uint16_t _version{0}; // DWARF spec version (DWARF4, DWARF5, etc.) | ||
std::uint8_t _unit_type{0}; // SPECREF: DWARF5 page 218 (200) line 15 | ||
std::uint64_t _debug_abbrev_offset{0}; // 4 (!_is_64_bit) or 8 (_is_64_bit) bytes | ||
std::uint32_t _address_size{0}; | ||
|
||
|
@@ -252,11 +289,24 @@ void cu_header::read(freader& s, bool needs_byteswap) { | |
// REVISIT: (fbrereto) handle extended length / DWARF64 | ||
// For DWARF64 `_length` will be 0xffffffff. | ||
// See section 7.5.1.1 on how to handle this. | ||
_is_64_bit = true; | ||
throw std::runtime_error("unsupported length / DWARF64"); | ||
} | ||
|
||
_version = read_pod<std::uint16_t>(s, needs_byteswap); | ||
|
||
if (_version == 4) { | ||
// Do nothing. We started this project with DWARF4 | ||
// so the baseline implementation should match that. | ||
} else if (_version == 5) { | ||
// SPECREF: DWARF5 page 218 (200) line 15 -- | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. loving the SPECREF |
||
// just read the value here, but do not interpret | ||
// it until it is necessary to do so. | ||
_unit_type = read_pod<std::uint8_t>(s, needs_byteswap); | ||
} else { | ||
throw std::runtime_error("unknown DWARF version: " + std::to_string(_version)); | ||
} | ||
|
||
// note the read_pod types differ. | ||
if (_is_64_bit) { | ||
_debug_abbrev_offset = read_pod<std::uint64_t>(s, needs_byteswap); | ||
|
@@ -276,6 +326,8 @@ struct line_header { | |
// account for it differently. | ||
std::uint64_t _length{0}; // 4 (DWARF) or 8 (DWARF64) bytes | ||
std::uint16_t _version{0}; | ||
std::int8_t _address_size{0}; // new for DWARF5 | ||
std::int8_t _segment_selector_size{0}; // new for DWARF5 | ||
std::uint32_t _header_length{0}; // 4 (DWARF) or 8 (DWARF64) bytes | ||
std::uint32_t _min_instruction_length{0}; | ||
std::uint32_t _max_ops_per_instruction{0}; // DWARF4 or greater | ||
|
@@ -297,8 +349,17 @@ void line_header::read(freader& s, bool needs_byteswap) { | |
throw std::runtime_error("unsupported length"); | ||
} | ||
_version = read_pod<std::uint16_t>(s, needs_byteswap); | ||
if (_version > 4) { | ||
// REVISIT: (fbrereto) handle DWARF5 and later. | ||
if (_version == 4) { | ||
/* do nothing */ | ||
} else if (_version == 5) { | ||
// SPECREF: DWARF5 page 26 (8) line 11 -- changes from DWARF4 to DWARF5 | ||
|
||
// SPECREF: DWARF5 page 172 (154) line 10 | ||
_address_size = read_pod<std::int8_t>(s, needs_byteswap); | ||
|
||
// SPECREF: DWARF5 page 172 (154) line 16 | ||
_segment_selector_size = read_pod<std::int8_t>(s, needs_byteswap); | ||
} else { | ||
throw std::runtime_error("unhandled DWARF version (" + std::to_string(_version) + ")"); | ||
} | ||
_header_length = read_pod<std::uint32_t>(s, needs_byteswap); | ||
|
@@ -451,12 +512,14 @@ struct dwarf::implementation { | |
std::uint32_t read8(); | ||
std::uint32_t read_uleb(); | ||
std::int32_t read_sleb(); | ||
std::uint64_t read_initial_length(); | ||
|
||
void read_abbreviations(); | ||
void read_lines(std::size_t header_offset); | ||
const abbrev& find_abbreviation(std::uint32_t code) const; | ||
|
||
pool_string read_debug_str(std::size_t offset); | ||
pool_string read_debug_str_offs(std::size_t offset); | ||
|
||
void path_identifier_push(); | ||
void path_identifier_set(pool_string name); | ||
|
@@ -488,6 +551,7 @@ struct dwarf::implementation { | |
std::vector<pool_string> _decl_files; | ||
std::unordered_map<std::size_t, pool_string> _type_cache; | ||
std::unordered_map<std::size_t, pool_string> _debug_str_cache; | ||
std::unordered_map<std::size_t, pool_string> _debug_str_offs_cache; | ||
pool_string _last_typedef_name; // for unnamed structs - see https://github.com/adobe/orc/issues/84 | ||
cu_header _cu_header; | ||
std::size_t _cu_header_offset{0}; // offset of the compilation unit header. Relative to __debug_info. | ||
|
@@ -498,6 +562,7 @@ struct dwarf::implementation { | |
section _debug_info; | ||
section _debug_line; | ||
section _debug_str; | ||
section _debug_str_offsets; | ||
bool _ready{false}; | ||
}; | ||
|
||
|
@@ -520,6 +585,28 @@ std::uint32_t dwarf::implementation::read_uleb() { return uleb128(_s); } | |
|
||
std::int32_t dwarf::implementation::read_sleb() { return sleb128(_s); } | ||
|
||
std::uint64_t dwarf::implementation::read_initial_length() { | ||
// SPECREF: DWARF5 page 202 (184) line 25 -- initial length definition | ||
// SPECREF: DWARF5 page 214 (196) line 15 -- 32- v. 64-bit length representation | ||
std::uint64_t result = read32(); | ||
|
||
if (result < 0xfffffff0) { | ||
return result; | ||
} else if (result == 0xffffffff) { | ||
// We still need to communicate that this is a 64-bit field so subsequent | ||
// reads can call read64 instead of read32. Hence the assertion here. | ||
assert(!"Gotta tell the caller that this is a 64 bit structure"); | ||
result = read64(); | ||
} else { | ||
// "the values 0xfffffff0 through 0xffffffff are reserved by DWARF | ||
// to indicate some form of extension relative to DWARF Version 2; | ||
// such values must not be interpreted as a length field." | ||
assert(!"unsupported DWARF2 extension"); | ||
} | ||
|
||
return result; | ||
} | ||
|
||
/**************************************************************************************************/ | ||
|
||
void dwarf::implementation::register_section(const std::string& name, | ||
|
@@ -537,6 +624,11 @@ void dwarf::implementation::register_section(const std::string& name, | |
_debug_abbrev = section{offset, size}; | ||
} else if (name == "__debug_line") { | ||
_debug_line = section{offset, size}; | ||
} else if (name == "__debug_str_offs__DWARF") { | ||
_debug_str_offsets = section{offset, size}; | ||
} else { | ||
// save for debugging. | ||
// std::cout << "skipped " << name << '\n'; | ||
} | ||
} | ||
|
||
|
@@ -644,6 +736,56 @@ pool_string dwarf::implementation::read_debug_str(std::size_t offset) { | |
[&] { return empool(_s.read_c_string_view()); }); | ||
} | ||
|
||
/**************************************************************************************************/ | ||
// SPECREF: DWARF5 page 26 (8) line 28 -- v4 -> v5 changes | ||
pool_string dwarf::implementation::read_debug_str_offs(std::size_t entry) { | ||
if (const auto found = _debug_str_offs_cache.find(entry); found != _debug_str_offs_cache.end()) { | ||
return found->second; | ||
} | ||
|
||
// SPECREF: DWARF5 page 259 (241) line 6 -- | ||
// Apparently `DW_AT_str_offsets_base` points to the first entry in this table, but I am not | ||
// sure where that attribute lives. So we'll take the time to derive that offset every time. | ||
// If that becomes too expensive we can revisit hunting down `DW_AT_str_offsets_base` and | ||
// caching it. | ||
|
||
// This section contains a header, then a series of offsets stored as 4- or 8-byte | ||
// values, then a series of strings. So we have to jump twice: first to get | ||
// the offset, then to get the string. The 0th string immediately follows the last | ||
// entry offset. | ||
|
||
const std::size_t entry_offset = temp_seek(_s, _debug_str_offsets._offset, [&] { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you need to capture anything? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not quite sure what you mean with the question? I'm using the lambda to convert the string entry that comes in to an offset with a specific DWARF5 data section. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh capturing within the lambda? The |
||
const std::size_t startoff = _s.tellg(); | ||
// SPECREF: DWARF5 page 258 (240) line 9 -- string offsets table details | ||
const std::uint64_t length = read_initial_length(); | ||
const std::uint16_t version = read16(); | ||
assert(version == 5); | ||
const std::uint16_t padding = read16(); | ||
assert(padding == 0); | ||
const std::size_t endoff = _s.tellg(); | ||
const std::size_t header_size = endoff - startoff; | ||
|
||
// length does not include itself. So the on-disk size taken | ||
// up by the entry offsets is the length minus version and padding. | ||
const std::size_t entry_offsets_size = length - 4; | ||
|
||
// At this point tellg() is at the 0th entry offset value. | ||
// To get the entry offset value we are interested in, we | ||
// temp seek to its location and read 4 bytes. (Note that | ||
// all of this assumes 32-bit DWARF.) | ||
std::size_t entry_offset = 4 * entry; | ||
const std::uint32_t entry_offset_value = temp_seek(_s, entry_offset, std::ios::cur, [&]{ | ||
return read32(); | ||
}); | ||
|
||
// This result is relative to `_debug_str_offsets._offset`. | ||
return header_size + entry_offsets_size + entry_offset_value; | ||
}); | ||
|
||
return _debug_str_offs_cache[entry] = temp_seek(_s, _debug_str_offsets._offset + entry_offset, | ||
[&] { return empool(_s.read_c_string_view()); }); | ||
} | ||
|
||
/**************************************************************************************************/ | ||
|
||
void dwarf::implementation::path_identifier_push() { _path.push_back(pool_string()); } | ||
|
@@ -1236,7 +1378,7 @@ attribute_value dwarf::implementation::process_form(const attribute& attr, | |
to_string(attr._name) + ")"); | ||
} | ||
result.passover(); | ||
auto size = form_length(attr._form, _s); | ||
auto size = form_length(attr._form, _s, _cu_header._version); | ||
_s.seekg(size, std::ios::cur); | ||
}; | ||
|
||
|
@@ -1346,6 +1488,26 @@ attribute_value dwarf::implementation::process_form(const attribute& attr, | |
case dw::form::block: { | ||
maybe_handle_block(block_type::uleb); | ||
} break; | ||
case dw::form::strx: { | ||
// First seen in Xcode 16.1 w/ DWARF5. | ||
// SPECREF: DWARF5 page 236 (218) line 31 | ||
result.string(read_debug_str_offs(read_uleb())); | ||
} break; | ||
case dw::form::strx1: { | ||
// First seen in Xcode 16.1 w/ DWARF5. | ||
// SPECREF: DWARF5 page 236 (218) line 31 | ||
result.string(read_debug_str_offs(read8())); | ||
} break; | ||
case dw::form::strx2: { | ||
// First seen in Xcode 16.1 w/ DWARF5. | ||
// SPECREF: DWARF5 page 236 (218) line 31 | ||
result.string(read_debug_str_offs(read16())); | ||
} break; | ||
case dw::form::strx4: { | ||
// First seen in Xcode 16.1 w/ DWARF5. | ||
// SPECREF: DWARF5 page 236 (218) line 31 | ||
result.string(read_debug_str_offs(read32())); | ||
} break; | ||
default: { | ||
handle_passover(); | ||
} break; | ||
|
@@ -1434,6 +1596,18 @@ die_pair dwarf::implementation::abbreviation_to_die(std::size_t die_address, pro | |
die._cu_header_offset = _cu_header_offset; | ||
die._arch = _details._arch; | ||
|
||
// | ||
// This is the offset a tool like dwarfdump will emit for a given object. | ||
// By setting the conditional to that offset, you can break on specific | ||
// dies to debug them. | ||
// | ||
// Save this for debugging. | ||
// | ||
// if (die._offset == 0xc) { | ||
// int x{42}; | ||
// (void)x; // <-- breakpoint here. | ||
// } | ||
|
||
std::size_t abbrev_code = read_uleb(); | ||
|
||
if (abbrev_code == 0) return std::make_tuple(std::move(die), std::move(attributes)); | ||
|
@@ -1672,6 +1846,12 @@ void dwarf::implementation::process_all_dies() { | |
if (die._tag == dw::tag::none) { | ||
path_identifier_pop(); | ||
|
||
// | ||
// If this fires, you've got an imbalanced push/pop, meaning you have | ||
// a NONE tag that didn't have a prior die with `die._has_children == true`. | ||
// | ||
assert(!_path.empty()); | ||
|
||
if (_path.size() == 1) { | ||
break; // end of the compilation unit | ||
} | ||
|
@@ -1814,8 +1994,19 @@ die_pair dwarf::implementation::fetch_one_die(std::size_t die_offset, | |
_cu_header_offset = cu_header_offset; | ||
|
||
if (cu_die_offset != die_offset) { | ||
// This loads some state into the dwarf::implementation that makes the `abbreviation_to_die` | ||
// call more meaningful for the original die we are trying to fetch. | ||
// If you're in here, we are reading some compilation unit information prior | ||
// to reading the actual die that was requested. This loads some state into | ||
// the `dwarf::implementation` that makes the `abbreviation_to_die` call | ||
// more meaningful for the original die we are trying to fetch. | ||
|
||
// Read the compilation unit header. We need this to know what version of | ||
// DWARF we are processing, which can affect how dies are processed (e.g., | ||
// `form_length`'s `dw::form::addrx` passover.) | ||
temp_seek(_s, _debug_info._offset + _cu_header_offset, [&]{ | ||
_cu_header.read(_s, _details._needs_byteswap); | ||
}); | ||
|
||
// Now grab the compilation unit die itself to fill in additional state details. | ||
die_pair cu_pair = fetch_one_die(cu_die_offset, cu_header_offset, cu_die_offset); | ||
post_process_compilation_unit_die(std::get<0>(cu_pair), std::get<1>(cu_pair)); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is very cool. Might be worth a link to the specref ref. (Although that is super meta.) But this one: https://github.com/tobie/specref?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I pulled that term out of thin air. It's an actual thing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I poked around it for a bit; looks limited to Web specs, and contains no information for DWARF :(
That said, I could certainly put a link to the "official" PDFs here in this file.