Hotfixes (#69)

bheath-adobe · web-flow · commit 7c06f50415e1 · 2024-01-11T12:52:32.000-06:00
* Hotfixes:
- Fix for reading correct ref_addr in dwarf v2.
- Workaround for only reading partial decl_files list.

* Added tests and fix issue where a a test compilation fails but is not recognized because the failure output goes to stderr, which wasn't being read before.

* Fix typo.

* Put declaration back into die_hash.
diff --git a/include/orc/dwarf_structs.hpp b/include/orc/dwarf_structs.hpp
@@ -81,7 +81,7 @@ struct attribute_value {
         return _string.hash();
     }
 
-    void reference(std::uint32_t offset) {
+    void reference(std::uint64_t offset) {
         _type |= type::reference;
         _uint = offset;
     }
diff --git a/src/dwarf.cpp b/src/dwarf.cpp
@@ -198,22 +198,33 @@ bool has_flag_attribute(const attribute_sequence& attributes, dw::at name) {
 
 std::size_t die_hash(const die& d, const attribute_sequence& attributes) {
     ZoneScoped;
-
-    // Tag used to be a part of the fatal hash computation. Unfortunately it causes `class` and
-    // `struct` definitions to be considered different, when they should not be. As a general
-    // rule, I think _all_ symbols regardless of tag should be uniquely defined, so pulling the tag
-    // from the hash below should catch more issues without adding any false positives.
+    
+    // Ideally, tag would not be part of this hash and all symbols, regardless of tag, would be
+    // unique. However, that fails in at least one case:
+    //
+    //     typedef struct S {} S;
+    //
+    // This results in both a `typedef` element and a `struct` element, with the same symbol path,
+    // but which is not an ODRV.
+    //
+    // On the other hand, including tag in the hash results in missed ODRVs in cases like:
+    //
+    //    struct S1 {}
+    //    ...
+    //    class S1 { int i; }
     //
-    // The `declaration` attribute used to be a part of this hash, too. Given that a declaration
-    // is not a definition, they cannot contribute to an ODRV, so were instead added to the
-    // `skip_die` logic, and removed from this hash.
+    // which results in a `struct` element and a `class` element with the same symbol path, but
+    // differing definitions, which _is_ an ODRV.
     //
-    // Thus, the only two things that contribute to the ODR hash of a die are its architecture and
-    // symbol path.
+    // So, we will include the tag in the hash, but combine the tag values for `struct` and `class`
+    // into a single value.
+    auto tag = d._tag == dw::tag::structure_type ? dw::tag::class_type : d._tag;
 
     // clang-tidy off
     return orc::hash_combine(0,
                              static_cast<std::size_t>(d._arch),
+                             static_cast<std::size_t>(tag),
+                             has_flag_attribute(attributes, dw::at::declaration),
                              d._path.hash());
     // clang-tidy on
 };
@@ -440,6 +451,7 @@ struct dwarf::implementation {
     std::vector<pool_string> _decl_files;
     std::unordered_map<std::size_t, pool_string> _type_cache;
     std::unordered_map<std::size_t, pool_string> _debug_str_cache;
+    cu_header _cu_header;
     std::size_t _cu_address{0};
     std::uint32_t _ofd_index{0}; // index to the obj_registry in macho.cpp
     section _debug_abbrev;
@@ -661,8 +673,17 @@ attribute dwarf::implementation::process_attribute(const attribute& attr,
     // decl_file comes back as a uint, but that's a debug_str offset that needs to be resolved.
     if (result._name == dw::at::decl_file) {
         auto decl_file_index = result._value.uint();
-        assert(decl_file_index < _decl_files.size());
-        result._value.string(_decl_files[decl_file_index]);
+        // We currently only process the `file_names` part of the `debug_line` section header to
+        // determine the decl_files list. However, this is only a partial list as the line number
+        // program can also contain DW_LNE_define_file ops, which we don't currently process.
+        // See https://github.com/adobe/orc/issues/67
+        // For now, we will ignore file indexes too large for our list.
+        //assert(decl_file_index < _decl_files.size());
+        if (decl_file_index < _decl_files.size()) {
+            result._value.string(_decl_files[decl_file_index]);
+        } else {
+            result._value.string(empool("<unsupported file index>"));
+        }
     } else if (result._name == dw::at::calling_convention) {
         auto convention = result._value.uint();
         assert(convention > 0 && convention <= 0xff);
@@ -1099,7 +1120,11 @@ attribute_value dwarf::implementation::process_form(const attribute& attr,
             result.uint(read64());
         } break;
         case dw::form::ref_addr: {
-            result.reference(read32());
+            if (_cu_header._version == 2) {
+                result.reference(read64());
+            } else {
+                result.reference(read32());
+            }
         } break;
         case dw::form::ref1: {
             handle_reference(read8());
@@ -1375,11 +1400,9 @@ void dwarf::implementation::process_all_dies() {
     dies dies;
 
     while (_s.tellg() < section_end) {
-        cu_header header;
-
         _cu_address = _s.tellg();
 
-        header.read(_s, _details._needs_byteswap);
+        _cu_header.read(_s, _details._needs_byteswap);
 
         // process dies one at a time, recording things like addresses along the way.
         while (true) {
diff --git a/test/battery/class_struct/class.cpp b/test/battery/class_struct/class.cpp
@@ -0,0 +1,6 @@
+class object {
+    bool _x;
+    bool _y;
+};
+
+void f(const object& o) { }
diff --git a/test/battery/class_struct/odrv_test.toml b/test/battery/class_struct/odrv_test.toml
@@ -0,0 +1,8 @@
+[[source]]
+    path = "class.cpp"
+
+[[source]]
+    path = "struct.cpp"
+
+[[odrv]]
+    category = "structure:byte_size"
diff --git a/test/battery/class_struct/struct.cpp b/test/battery/class_struct/struct.cpp
@@ -0,0 +1,5 @@
+struct object {
+    bool _x;
+};
+
+void f(const object& o) { }
diff --git a/test/battery/typedef_struct/odrv_test.toml b/test/battery/typedef_struct/odrv_test.toml
@@ -0,0 +1,2 @@
+[[source]]
+    path = "src.cpp"
diff --git a/test/battery/typedef_struct/src.cpp b/test/battery/typedef_struct/src.cpp
@@ -0,0 +1,4 @@
+typedef struct S {int _i;} S;
+
+// Required so the compiler generates a symbol.
+int get(S s) { return s._i; }
diff --git a/test/src/main.cpp b/test/src/main.cpp
@@ -172,8 +172,9 @@ auto object_file_path(const std::filesystem::path& battery_path, const compilati
 
 /**************************************************************************************************/
 
-std::string exec(const char* cmd) {
-    std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
+std::string exec(std::string cmd) {
+    cmd += " 2>&1";
+    std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd.c_str(), "r"), pclose);
 
     if (!pipe) {
         throw std::runtime_error("popen() failed!");

Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ struct attribute_value {`
`81`	`81`	`return _string.hash();`
`82`	`82`	`}`
`83`	`83`
`84`		`- void reference(std::uint32_t offset) {`
	`84`	`+ void reference(std::uint64_t offset) {`
`85`	`85`	`_type \|= type::reference;`
`86`	`86`	`_uint = offset;`
`87`	`87`	`}`