diff --git a/examples/dwarfdump.rs b/examples/dwarfdump.rs index 752010d63..c4e99e23c 100644 --- a/examples/dwarfdump.rs +++ b/examples/dwarfdump.rs @@ -530,6 +530,7 @@ where let debug_addr = load_section(&arena, file, endian); let debug_info = load_section(&arena, file, endian); let debug_line = load_section(&arena, file, endian); + let debug_line_str = load_section(&arena, file, endian); let debug_str = load_section(&arena, file, endian); let debug_str_offsets = load_section(&arena, file, endian); let debug_types = load_section(&arena, file, endian); @@ -548,6 +549,7 @@ where debug_addr, debug_info, debug_line, + debug_line_str, debug_str, debug_str_offsets, debug_str_sup: no_reader.clone().into(), @@ -975,8 +977,8 @@ struct Unit { encoding: gimli::Encoding, base_address: u64, line_program: Option>, - comp_dir: Option, - comp_name: Option, + comp_dir: Option>, + comp_name: Option>, str_offsets_base: gimli::DebugStrOffsetsBase, addr_base: gimli::DebugAddrBase, loclists_base: gimli::DebugLocListsBase, @@ -1046,12 +1048,8 @@ fn dump_entries( Some(gimli::AttributeValue::Addr(address)) => address, _ => 0, }; - unit.comp_dir = entry - .attr(gimli::DW_AT_comp_dir)? - .and_then(|attr| dwarf.attr_string(&attr)); - unit.comp_name = entry - .attr(gimli::DW_AT_name)? - .and_then(|attr| dwarf.attr_string(&attr)); + unit.comp_dir = entry.attr_value(gimli::DW_AT_comp_dir)?; + unit.comp_name = entry.attr_value(gimli::DW_AT_name)?; unit.line_program = match entry.attr_value(gimli::DW_AT_stmt_list)? { Some(gimli::AttributeValue::DebugLineRef(offset)) => dwarf .debug_line @@ -1281,6 +1279,13 @@ fn dump_attr_value( writeln!(w, "<.debug_str+0x{:08x}>", offset.0)?; } } + gimli::AttributeValue::DebugLineStrRef(offset) => { + if let Ok(s) = dwarf.debug_line_str.get_str(offset) { + writeln!(w, "{}", s.to_string_lossy()?)?; + } else { + writeln!(w, "<.debug_line_str=0x{:08x}>", offset.0)?; + } + } gimli::AttributeValue::String(s) => { writeln!(w, "{}", s.to_string_lossy()?)?; } @@ -1322,7 +1327,7 @@ fn dump_attr_value( } gimli::AttributeValue::FileIndex(value) => { write!(w, "0x{:08x}", value)?; - dump_file_index(w, value, unit)?; + dump_file_index(w, value, unit, dwarf)?; writeln!(w)?; } } @@ -1345,7 +1350,12 @@ fn dump_type_signature( Ok(()) } -fn dump_file_index(w: &mut W, file: u64, unit: &Unit) -> Result<()> { +fn dump_file_index( + w: &mut W, + file: u64, + unit: &Unit, + dwarf: &gimli::Dwarf, +) -> Result<()> { if file == 0 { return Ok(()); } @@ -1362,15 +1372,24 @@ fn dump_file_index(w: &mut W, file: u64, unit: &Unit) -> }; write!(w, " ")?; if let Some(directory) = file.directory(header) { + let directory = dwarf.attr_string(directory)?; let directory = directory.to_string_lossy()?; if !directory.starts_with('/') { if let Some(ref comp_dir) = unit.comp_dir { - write!(w, "{}/", comp_dir.to_string_lossy()?)?; + write!( + w, + "{}/", + dwarf.attr_string(comp_dir.clone())?.to_string_lossy()? + )?; } } write!(w, "{}/", directory)?; } - write!(w, "{}", file.path_name().to_string_lossy()?)?; + write!( + w, + "{}", + dwarf.attr_string(file.path_name())?.to_string_lossy()? + )?; Ok(()) } @@ -1886,6 +1905,11 @@ fn dump_line_program( "DWARF version: {}", header.version() )?; + writeln!( + w, + "Address size: {}", + header.address_size() + )?; writeln!( w, "Prologue length: {}", @@ -1930,27 +1954,48 @@ fn dump_line_program( .iter() .enumerate() { - writeln!(w, " Opcode {} as {} args", i + 1, length)?; + writeln!(w, " Opcode {} has {} args", i + 1, length)?; } + let base = if header.version() >= 5 { 0 } else { 1 }; writeln!(w)?; writeln!(w, "The Directory Table:")?; for (i, dir) in header.include_directories().iter().enumerate() { - writeln!(w, " {} {}", i + 1, dir.to_string_lossy()?)?; + writeln!( + w, + " {} {}", + base + i, + dwarf.attr_string(dir.clone())?.to_string_lossy()? + )?; } writeln!(w)?; writeln!(w, "The File Name Table")?; - writeln!(w, " Entry\tDir\tTime\tSize\tName")?; + write!(w, " Entry\tDir\tTime\tSize")?; + if header.file_has_md5() { + write!(w, "\tMD5\t\t\t\t")?; + } + writeln!(w, "\tName")?; for (i, file) in header.file_names().iter().enumerate() { - writeln!( + write!( w, - " {}\t{}\t{}\t{}\t{}", - i + 1, + " {}\t{}\t{}\t{}", + base + i, file.directory_index(), - file.last_modification(), - file.length(), - file.path_name().to_string_lossy()? + file.timestamp(), + file.size(), + )?; + if header.file_has_md5() { + let md5 = file.md5(); + write!(w, "\t")?; + for i in 0..16 { + write!(w, "{:02X}", md5[i])?; + } + } + writeln!( + w, + "\t{}", + dwarf.attr_string(file.path_name())?.to_string_lossy()? )?; } @@ -2002,11 +2047,15 @@ fn dump_line_program( write!( w, " uri: \"{}/{}\"", - directory.to_string_lossy()?, - file.path_name().to_string_lossy()? + dwarf.attr_string(directory)?.to_string_lossy()?, + dwarf.attr_string(file.path_name())?.to_string_lossy()? )?; } else { - write!(w, " uri: \"{}\"", file.path_name().to_string_lossy()?)?; + write!( + w, + " uri: \"{}\"", + dwarf.attr_string(file.path_name())?.to_string_lossy()? + )?; } } } diff --git a/src/common.rs b/src/common.rs index 8fe0cd3f4..2a1528d06 100644 --- a/src/common.rs +++ b/src/common.rs @@ -71,6 +71,10 @@ pub struct DebugInfoOffset(pub T); #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct DebugLineOffset(pub T); +/// An offset into the `.debug_line_str` section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DebugLineStrOffset(pub T); + /// An offset into either the `.debug_loc` section or the `.debug_loclists` section, /// depending on the version of the unit the offset was contained in. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] diff --git a/src/read/cfi.rs b/src/read/cfi.rs index 6e462208b..8b757b3f3 100644 --- a/src/read/cfi.rs +++ b/src/read/cfi.rs @@ -228,7 +228,7 @@ impl<'a, R: Reader + 'a> EhHdrTable<'a, R> { pub fn lookup(&self, address: u64, bases: &BaseAddresses) -> Result { let size = match self.hdr.table_enc.format() { constants::DW_EH_PE_uleb128 | constants::DW_EH_PE_sleb128 => { - return Err(Error::VariableLengthSearchTable) + return Err(Error::VariableLengthSearchTable); } constants::DW_EH_PE_sdata2 | constants::DW_EH_PE_udata2 => 2, constants::DW_EH_PE_sdata4 | constants::DW_EH_PE_udata4 => 4, diff --git a/src/read/dwarf.rs b/src/read/dwarf.rs index 0a6794ec9..7c7179c6a 100644 --- a/src/read/dwarf.rs +++ b/src/read/dwarf.rs @@ -1,7 +1,7 @@ use constants; use read::{ - Abbreviations, Attribute, AttributeValue, CompilationUnitHeader, CompilationUnitHeadersIter, - DebugAbbrev, DebugAddr, DebugInfo, DebugLine, DebugStr, DebugStrOffsets, DebugTypes, Error, + Abbreviations, AttributeValue, CompilationUnitHeader, CompilationUnitHeadersIter, DebugAbbrev, + DebugAddr, DebugInfo, DebugLine, DebugLineStr, DebugStr, DebugStrOffsets, DebugTypes, Error, IncompleteLineProgram, LocationLists, RangeLists, Reader, Result, TypeUnitHeader, TypeUnitHeadersIter, }; @@ -30,6 +30,9 @@ where /// The `.debug_line` section. pub debug_line: DebugLine, + /// The `.debug_line_str` section. + pub debug_line_str: DebugLineStr, + /// The `.debug_str` section. pub debug_str: DebugStr, @@ -104,30 +107,34 @@ where Some(_) => return Err(Error::UnsupportedAttributeForm), None => return Ok(None), }; - let comp_dir = root - .attr(constants::DW_AT_comp_dir)? - .and_then(|attr| self.attr_string(&attr)); - let comp_name = root - .attr(constants::DW_AT_name)? - .and_then(|attr| self.attr_string(&attr)); + let comp_dir = root.attr_value(constants::DW_AT_comp_dir)?; + let comp_name = root.attr_value(constants::DW_AT_name)?; self.debug_line .program(offset, unit.address_size(), comp_dir, comp_name) .map(Option::Some) } - /// Try to return an attribute's value as a string slice. + /// Try to return an attribute value as a string slice. + /// + /// If the attribute value is one of: + /// + /// - an inline `DW_FORM_string` string + /// - a `DW_FORM_strp` reference to an offset into the `.debug_str` section + /// - a `DW_FORM_strp_sup` reference to an offset into a supplementary + /// object file + /// - a `DW_FORM_line_strp` reference to an offset into the `.debug_line_str` + /// section /// - /// If the attribute's value is either an inline `DW_FORM_string` string, - /// or a `DW_FORM_strp` reference to an offset into the `.debug_str` - /// section, or a `DW_FORM_strp_sup` reference to an offset into a supplementary - /// object file, return the attribute's string value as `Some`. Other attribute - /// value forms are returned as `None`. - pub fn attr_string(&self, attr: &Attribute) -> Option { - match attr.value() { - AttributeValue::String(ref string) => Some(string.clone()), - AttributeValue::DebugStrRef(offset) => self.debug_str.get_str(offset).ok(), - AttributeValue::DebugStrRefSup(offset) => self.debug_str_sup.get_str(offset).ok(), - _ => None, + /// then return the attribute's string value. Returns an error if the attribute + /// value does not have a string form, or if a string form has an invalid value. + // TODO: handle `DW_FORM_strx`, but that requires knowing the DebugStrOffsetsBase + pub fn attr_string(&self, attr: AttributeValue) -> Result { + match attr { + AttributeValue::String(string) => Ok(string), + AttributeValue::DebugStrRef(offset) => self.debug_str.get_str(offset), + AttributeValue::DebugStrRefSup(offset) => self.debug_str_sup.get_str(offset), + AttributeValue::DebugLineStrRef(offset) => self.debug_line_str.get_str(offset), + _ => Err(Error::ExpectedStringAttributeValue), } } } diff --git a/src/read/line.rs b/src/read/line.rs index 4f1e3304a..5c2018122 100644 --- a/src/read/line.rs +++ b/src/read/line.rs @@ -2,10 +2,12 @@ use std::fmt; use std::result; use vec::Vec; -use common::{DebugLineOffset, Encoding, Format}; +use common::{ + DebugLineOffset, DebugLineStrOffset, DebugStrOffset, DebugStrOffsetsIndex, Encoding, Format, +}; use constants; use endianity::Endianity; -use read::{EndianSlice, Error, Reader, ReaderOffset, Result, Section}; +use read::{AttributeValue, EndianSlice, Error, Reader, ReaderOffset, Result, Section}; /// The `DebugLine` struct contains the source location to instruction mapping /// found in the `.debug_line` section. @@ -66,8 +68,8 @@ impl DebugLine { &self, offset: DebugLineOffset, address_size: u8, - comp_dir: Option, - comp_name: Option, + comp_dir: Option>, + comp_name: Option>, ) -> Result> { let input = &mut self.debug_line_section.clone(); input.skip(offset.0)?; @@ -104,7 +106,7 @@ where /// Get a reference to the held `LineProgramHeader`. fn header(&self) -> &LineProgramHeader; /// Add a file to the file table if necessary. - fn add_file(&mut self, file: FileEntry); + fn add_file(&mut self, file: FileEntry); } impl LineProgram for IncompleteLineProgram @@ -115,7 +117,7 @@ where fn header(&self) -> &LineProgramHeader { &self.header } - fn add_file(&mut self, file: FileEntry) { + fn add_file(&mut self, file: FileEntry) { self.header.file_names.push(file); } } @@ -128,7 +130,7 @@ where fn header(&self) -> &LineProgramHeader { &self.header } - fn add_file(&mut self, _: FileEntry) { + fn add_file(&mut self, _: FileEntry) { // Nop. Our file table is already complete. } } @@ -231,11 +233,15 @@ where /// Deprecated. `Opcode` has been renamed to `LineInstruction`. #[deprecated(note = "Opcode has been renamed to LineInstruction, use that instead.")] -pub type Opcode = LineInstruction; +pub type Opcode = LineInstruction::Offset>; /// A parsed line number program instruction. #[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum LineInstruction { +pub enum LineInstruction +where + R: Reader, + Offset: ReaderOffset, +{ /// > ### 6.2.5.1 Special Opcodes /// > /// > Each ubyte special opcode has the following effect on the state machine: @@ -354,7 +360,7 @@ pub enum LineInstruction { /// Defines a new source file in the line number program and appends it to /// the line number program header's list of source files. - DefineFile(FileEntry), + DefineFile(FileEntry), /// "The DW_LNE_set_discriminator opcode takes a single parameter, an /// unsigned LEB128 integer. It sets the discriminator register to the new @@ -365,11 +371,15 @@ pub enum LineInstruction { UnknownExtended(constants::DwLne, R), } -impl LineInstruction { +impl LineInstruction +where + R: Reader, + Offset: ReaderOffset, +{ fn parse<'header>( header: &'header LineProgramHeader, input: &mut R, - ) -> Result> + ) -> Result> where R: 'header, { @@ -388,9 +398,16 @@ impl LineInstruction { } constants::DW_LNE_define_file => { - let path_name = instr_rest.read_null_terminated_slice()?; - let entry = FileEntry::parse(&mut instr_rest, path_name)?; - Ok(LineInstruction::DefineFile(entry)) + if header.version() <= 4 { + let path_name = instr_rest.read_null_terminated_slice()?; + let entry = FileEntry::parse(&mut instr_rest, path_name)?; + Ok(LineInstruction::DefineFile(entry)) + } else { + Ok(LineInstruction::UnknownExtended( + constants::DW_LNE_define_file, + instr_rest, + )) + } } constants::DW_LNE_set_discriminator => { @@ -472,7 +489,11 @@ impl LineInstruction { } } -impl fmt::Display for LineInstruction { +impl fmt::Display for LineInstruction +where + R: Reader, + Offset: ReaderOffset, +{ fn fmt(&self, f: &mut fmt::Formatter) -> result::Result<(), fmt::Error> { match *self { LineInstruction::Special(opcode) => write!(f, "Special opcode {}", opcode), @@ -559,7 +580,7 @@ impl LineInstructions { pub fn next_instruction( &mut self, header: &LineProgramHeader, - ) -> Result>> { + ) -> Result>> { if self.input.is_empty() { return Ok(None); } @@ -655,7 +676,7 @@ impl LineRow { pub fn file<'header, R: Reader>( &self, header: &'header LineProgramHeader, - ) -> Option<&'header FileEntry> { + ) -> Option<&'header FileEntry> { header.file(self.file) } @@ -755,7 +776,7 @@ impl LineRow { #[inline] pub fn execute( &mut self, - instruction: LineInstruction, + instruction: LineInstruction, program: &mut Program, ) -> bool where @@ -1018,6 +1039,9 @@ where /// whose value is `opcode_base - 1`." standard_opcode_lengths: R, + /// "A sequence of directory entry format descriptions." + directory_entry_format: Vec, + /// > Entries in this sequence describe each path that was searched for /// > included source files in this compilation. (The paths include those /// > directories specified explicitly by the user for the compiler to search @@ -1026,21 +1050,24 @@ where /// > of the compilation. /// > /// > The last entry is followed by a single null byte. - include_directories: Vec, + include_directories: Vec>, + + /// "A sequence of file entry format descriptions." + file_name_entry_format: Vec, /// "Entries in this sequence describe source files that contribute to the /// line number information for this compilation unit or is used in other /// contexts." - file_names: Vec>, + file_names: Vec>, /// The encoded line program instructions. program_buf: R, - /// The `DW_AT_comp_dir` value from the compilation unit. - comp_dir: Option, + /// The current directory of the compilation. + comp_dir: Option>, - /// The `DW_AT_name` value from the compilation unit. - comp_name: Option>, + /// The primary source file. + comp_file: Option>, } impl LineProgramHeader @@ -1124,28 +1151,73 @@ where &self.standard_opcode_lengths } + /// Get the format of a directory entry. + pub fn directory_entry_format(&self) -> &[FileEntryFormat] { + &self.directory_entry_format[..] + } + /// Get the set of include directories for this header's line program. /// - /// The compilation's current directory is not included in the return value, - /// but is implicitly considered to be in the set per spec. - pub fn include_directories(&self) -> &[R] { + /// For DWARF version <= 4, the compilation's current directory is not included + /// in the return value, but is implicitly considered to be in the set per spec. + pub fn include_directories(&self) -> &[AttributeValue] { &self.include_directories[..] } /// The include directory with the given directory index. /// /// A directory index of 0 corresponds to the compilation unit directory. - pub fn directory(&self, directory: u64) -> Option { - if directory == 0 { - self.comp_dir.clone() + pub fn directory(&self, directory: u64) -> Option> { + if self.encoding.version <= 4 { + if directory == 0 { + self.comp_dir.clone() + } else { + let directory = directory as usize - 1; + self.include_directories.get(directory).cloned() + } } else { - let directory = directory as usize - 1; - self.include_directories.get(directory).cloned() + self.include_directories.get(directory as usize).cloned() } } + /// Get the format of a file name entry. + pub fn file_name_entry_format(&self) -> &[FileEntryFormat] { + &self.file_name_entry_format[..] + } + + /// Return true if the file entries may have valid timestamps. + /// + /// Only returns false if we definitely know that all timestamp fields + /// are invalid. + pub fn file_has_timestamp(&self) -> bool { + self.encoding.version <= 4 + || self + .file_name_entry_format + .iter() + .any(|x| x.content_type == constants::DW_LNCT_timestamp) + } + + /// Return true if the file entries may have valid sizes. + /// + /// Only returns false if we definitely know that all size fields + /// are invalid. + pub fn file_has_size(&self) -> bool { + self.encoding.version <= 4 + || self + .file_name_entry_format + .iter() + .any(|x| x.content_type == constants::DW_LNCT_size) + } + + /// Return true if the file name entry format contains an MD5 field. + pub fn file_has_md5(&self) -> bool { + self.file_name_entry_format + .iter() + .any(|x| x.content_type == constants::DW_LNCT_MD5) + } + /// Get the list of source files that appear in this header's line program. - pub fn file_names(&self) -> &[FileEntry] { + pub fn file_names(&self) -> &[FileEntry] { &self.file_names[..] } @@ -1154,12 +1226,16 @@ where /// A file index of 0 corresponds to the compilation unit file. /// Note that a file index of 0 is invalid for DWARF version <= 4, /// but we support it anyway. - pub fn file(&self, file: u64) -> Option<&FileEntry> { - if file == 0 { - self.comp_name.as_ref() + pub fn file(&self, file: u64) -> Option<&FileEntry> { + if self.encoding.version <= 4 { + if file == 0 { + self.comp_file.as_ref() + } else { + let file = file as usize - 1; + self.file_names.get(file) + } } else { - let file = file as usize - 1; - self.file_names.get(file) + self.file_names.get(file as usize) } } @@ -1196,18 +1272,32 @@ where fn parse( input: &mut R, offset: DebugLineOffset, - address_size: u8, - comp_dir: Option, - comp_name: Option, + mut address_size: u8, + mut comp_dir: Option>, + comp_name: Option>, ) -> Result> { let (unit_length, format) = input.read_initial_length()?; let rest = &mut input.split(unit_length)?; let version = rest.read_u16()?; - if version < 2 || version > 4 { + if version < 2 || version > 5 { return Err(Error::UnknownVersion(u64::from(version))); } + if version >= 5 { + address_size = rest.read_u8()?; + let segment_selector_size = rest.read_u8()?; + if segment_selector_size != 0 { + return Err(Error::UnsupportedSegmentSize); + } + } + + let encoding = Encoding { + format, + version, + address_size, + }; + let header_length = rest.read_length(format)?; let mut program_buf = rest.clone(); @@ -1241,36 +1331,59 @@ where let standard_opcode_count = R::Offset::from_u8(opcode_base - 1); let standard_opcode_lengths = rest.split(standard_opcode_count)?; + let directory_entry_format; let mut include_directories = Vec::new(); - loop { - let directory = rest.read_null_terminated_slice()?; - if directory.is_empty() { - break; + if version <= 4 { + directory_entry_format = Vec::new(); + loop { + let directory = rest.read_null_terminated_slice()?; + if directory.is_empty() { + break; + } + include_directories.push(AttributeValue::String(directory)); + } + } else { + comp_dir = None; + directory_entry_format = FileEntryFormat::parse(rest)?; + let count = rest.read_uleb128()?; + for _ in 0..count { + include_directories.push(parse_directory_v5( + rest, + encoding, + &directory_entry_format, + )?); } - include_directories.push(directory); } + let comp_file; + let file_name_entry_format; let mut file_names = Vec::new(); - loop { - let path_name = rest.read_null_terminated_slice()?; - if path_name.is_empty() { - break; + if version <= 4 { + comp_file = comp_name.map(|name| FileEntry { + path_name: name, + directory_index: 0, + timestamp: 0, + size: 0, + md5: [0; 16], + }); + + file_name_entry_format = Vec::new(); + loop { + let path_name = rest.read_null_terminated_slice()?; + if path_name.is_empty() { + break; + } + file_names.push(FileEntry::parse(rest, path_name)?); + } + } else { + comp_file = None; + file_name_entry_format = FileEntryFormat::parse(rest)?; + let count = rest.read_uleb128()?; + for _ in 0..count { + file_names.push(parse_file_v5(rest, encoding, &file_name_entry_format)?); } - file_names.push(FileEntry::parse(rest, path_name)?); } - let comp_name = comp_name.map(|name| FileEntry { - path_name: name, - directory_index: 0, - last_modification: 0, - length: 0, - }); - - let encoding = Encoding { - format, - version, - address_size, - }; let header = LineProgramHeader { encoding, offset, @@ -1283,11 +1396,13 @@ where line_range, opcode_base, standard_opcode_lengths, + directory_entry_format, include_directories, + file_name_entry_format, file_names, program_buf, comp_dir, - comp_name, + comp_file, }; Ok(header) } @@ -1443,24 +1558,35 @@ where /// An entry in the `LineProgramHeader`'s `file_names` set. #[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct FileEntry { - path_name: R, +pub struct FileEntry +where + R: Reader, + Offset: ReaderOffset, +{ + path_name: AttributeValue, directory_index: u64, - last_modification: u64, - length: u64, + timestamp: u64, + size: u64, + md5: [u8; 16], } -impl FileEntry { - fn parse(input: &mut R, path_name: R) -> Result> { +impl FileEntry +where + R: Reader, + Offset: ReaderOffset, +{ + // version 2-4 + fn parse(input: &mut R, path_name: R) -> Result> { let directory_index = input.read_uleb128()?; - let last_modification = input.read_uleb128()?; - let length = input.read_uleb128()?; + let timestamp = input.read_uleb128()?; + let size = input.read_uleb128()?; let entry = FileEntry { - path_name, + path_name: AttributeValue::String(path_name), directory_index, - last_modification, - length, + timestamp, + size, + md5: [0; 16], }; Ok(entry) @@ -1471,7 +1597,7 @@ impl FileEntry { /// > name, the file is located relative to either the compilation directory /// > (as specified by the DW_AT_comp_dir attribute given in the compilation /// > unit) or one of the directories in the include_directories section. - pub fn path_name(&self) -> R { + pub fn path_name(&self) -> AttributeValue { self.path_name.clone() } @@ -1493,30 +1619,269 @@ impl FileEntry { /// Get this file's directory. /// /// A directory index of 0 corresponds to the compilation unit directory. - pub fn directory(&self, header: &LineProgramHeader) -> Option { + pub fn directory( + &self, + header: &LineProgramHeader, + ) -> Option> { header.directory(self.directory_index) } + /// The implementation-defined time of last modification of the file, + /// or 0 if not available. + pub fn timestamp(&self) -> u64 { + self.timestamp + } + /// "An unsigned LEB128 number representing the time of last modification of /// the file, or 0 if not available." + // Terminology changed in DWARF version 5. + #[doc(hidden)] pub fn last_modification(&self) -> u64 { - self.last_modification + self.timestamp + } + + /// The size of the file in bytes, or 0 if not available. + pub fn size(&self) -> u64 { + self.size } /// "An unsigned LEB128 number representing the length in bytes of the file, /// or 0 if not available." + // Terminology changed in DWARF version 5. + #[doc(hidden)] pub fn length(&self) -> u64 { - self.length + self.size + } + + /// A 16-byte MD5 digest of the file contents. + /// + /// Only valid if `LineProgramHeader::file_has_md5` returns `true`. + pub fn md5(&self) -> &[u8; 16] { + &self.md5 + } +} + +/// The format of a compononent of an include directory or file name entry. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct FileEntryFormat { + /// The type of information that is represented by the component. + pub content_type: constants::DwLnct, + + /// The encoding form of the component value. + pub form: constants::DwForm, +} + +impl FileEntryFormat { + fn parse(input: &mut R) -> Result> { + let format_count = input.read_u8()? as usize; + let mut format = Vec::with_capacity(format_count); + let mut path_count = 0; + for _ in 0..format_count { + let content_type = input.read_uleb128()?; + let content_type = if content_type > u64::from(u16::max_value()) { + constants::DwLnct(u16::max_value()) + } else { + constants::DwLnct(content_type as u16) + }; + if content_type == constants::DW_LNCT_path { + path_count += 1; + } + + let form = constants::DwForm(input.read_uleb128()?); + + format.push(FileEntryFormat { content_type, form }); + } + if path_count != 1 { + return Err(Error::MissingFileEntryFormatPath); + } + Ok(format) + } +} + +fn parse_directory_v5( + input: &mut R, + encoding: Encoding, + formats: &[FileEntryFormat], +) -> Result> { + let mut path_name = None; + + for format in formats { + let value = parse_attribute(input, encoding, format.form)?; + if format.content_type == constants::DW_LNCT_path { + path_name = Some(value); + } + } + + Ok(path_name.unwrap()) +} + +fn parse_file_v5( + input: &mut R, + encoding: Encoding, + formats: &[FileEntryFormat], +) -> Result> { + let mut path_name = None; + let mut directory_index = 0; + let mut timestamp = 0; + let mut size = 0; + let mut md5 = [0; 16]; + + for format in formats { + let value = parse_attribute(input, encoding, format.form)?; + match format.content_type { + constants::DW_LNCT_path => path_name = Some(value), + constants::DW_LNCT_directory_index => { + if let Some(value) = value.udata_value() { + directory_index = value; + } + } + constants::DW_LNCT_timestamp => { + if let Some(value) = value.udata_value() { + timestamp = value; + } + } + constants::DW_LNCT_size => { + if let Some(value) = value.udata_value() { + size = value; + } + } + constants::DW_LNCT_MD5 => { + if let AttributeValue::Block(mut value) = value { + if value.len().into_u64() == 16 { + md5 = value.read_u8_array()?; + } + } + } + // Ignore unknown content types. + _ => {} + } } + + Ok(FileEntry { + path_name: path_name.unwrap(), + directory_index, + timestamp, + size, + md5, + }) +} + +// TODO: this should be shared with unit::parse_attribute(), but that is hard to do. +fn parse_attribute( + input: &mut R, + encoding: Encoding, + form: constants::DwForm, +) -> Result> { + Ok(match form { + constants::DW_FORM_block1 => { + let len = input.read_u8().map(R::Offset::from_u8)?; + let block = input.split(len)?; + AttributeValue::Block(block) + } + constants::DW_FORM_block2 => { + let len = input.read_u16().map(R::Offset::from_u16)?; + let block = input.split(len)?; + AttributeValue::Block(block) + } + constants::DW_FORM_block4 => { + let len = input.read_u32().map(R::Offset::from_u32)?; + let block = input.split(len)?; + AttributeValue::Block(block) + } + constants::DW_FORM_block => { + let len = input.read_uleb128().and_then(R::Offset::from_u64)?; + let block = input.split(len)?; + AttributeValue::Block(block) + } + constants::DW_FORM_data1 => { + let data = input.read_u8()?; + AttributeValue::Data1(data) + } + constants::DW_FORM_data2 => { + let data = input.read_u16()?; + AttributeValue::Data2(data) + } + constants::DW_FORM_data4 => { + let data = input.read_u32()?; + AttributeValue::Data4(data) + } + constants::DW_FORM_data8 => { + let data = input.read_u64()?; + AttributeValue::Data8(data) + } + constants::DW_FORM_data16 => { + let block = input.split(R::Offset::from_u8(16))?; + AttributeValue::Block(block) + } + constants::DW_FORM_udata => { + let data = input.read_uleb128()?; + AttributeValue::Udata(data) + } + constants::DW_FORM_sdata => { + let data = input.read_sleb128()?; + AttributeValue::Sdata(data) + } + constants::DW_FORM_flag => { + let present = input.read_u8()?; + AttributeValue::Flag(present != 0) + } + constants::DW_FORM_sec_offset => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::SecOffset(offset) + } + constants::DW_FORM_string => { + let string = input.read_null_terminated_slice()?; + AttributeValue::String(string) + } + constants::DW_FORM_strp => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::DebugStrRef(DebugStrOffset(offset)) + } + constants::DW_FORM_strp_sup | constants::DW_FORM_GNU_strp_alt => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::DebugStrRefSup(DebugStrOffset(offset)) + } + constants::DW_FORM_line_strp => { + let offset = input.read_offset(encoding.format)?; + AttributeValue::DebugLineStrRef(DebugLineStrOffset(offset)) + } + constants::DW_FORM_strx | constants::DW_FORM_GNU_str_index => { + let index = input.read_uleb128().and_then(R::Offset::from_u64)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_strx1 => { + let index = input.read_u8().map(R::Offset::from_u8)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_strx2 => { + let index = input.read_u16().map(R::Offset::from_u16)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_strx3 => { + let index = input.read_uint(3).and_then(R::Offset::from_u64)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + constants::DW_FORM_strx4 => { + let index = input.read_u32().map(R::Offset::from_u32)?; + AttributeValue::DebugStrOffsetsIndex(DebugStrOffsetsIndex(index)) + } + _ => { + return Err(Error::UnknownForm); + } + }) } #[cfg(test)] mod tests { + extern crate test_assembler; + + use self::test_assembler::{Endian, Label, LabelMaker, Section}; use super::*; use constants; use endianity::LittleEndian; use read::{EndianSlice, Error}; use std::u8; + use test_util::GimliSectionMethods; #[test] fn test_parse_debug_line_32_ok() { @@ -1572,8 +1937,8 @@ mod tests { ]; let rest = &mut EndianSlice::new(&buf, LittleEndian); - let comp_dir = EndianSlice::new(b"/comp_dir", LittleEndian); - let comp_name = EndianSlice::new(b"/comp_name", LittleEndian); + let comp_dir = AttributeValue::String(EndianSlice::new(b"/comp_dir", LittleEndian)); + let comp_name = AttributeValue::String(EndianSlice::new(b"/comp_name", LittleEndian)); let header = LineProgramHeader::parse(rest, DebugLineOffset(0), 4, Some(comp_dir), Some(comp_name)) @@ -1599,23 +1964,25 @@ mod tests { assert_eq!(header.standard_opcode_lengths().slice(), &expected_lengths); let expected_include_directories = [ - EndianSlice::new(b"/inc", LittleEndian), - EndianSlice::new(b"/inc2", LittleEndian), + AttributeValue::String(EndianSlice::new(b"/inc", LittleEndian)), + AttributeValue::String(EndianSlice::new(b"/inc2", LittleEndian)), ]; assert_eq!(header.include_directories(), &expected_include_directories); let expected_file_names = [ FileEntry { - path_name: EndianSlice::new(b"foo.rs", LittleEndian), + path_name: AttributeValue::String(EndianSlice::new(b"foo.rs", LittleEndian)), directory_index: 0, - last_modification: 0, - length: 0, + timestamp: 0, + size: 0, + md5: [0; 16], }, FileEntry { - path_name: EndianSlice::new(b"bar.h", LittleEndian), + path_name: AttributeValue::String(EndianSlice::new(b"bar.h", LittleEndian)), directory_index: 1, - last_modification: 0, - length: 0, + timestamp: 0, + size: 0, + md5: [0; 16], }, ]; assert_eq!(&*header.file_names(), &expected_file_names); @@ -1765,25 +2132,29 @@ mod tests { header_length: 1, file_names: vec![ FileEntry { - path_name: EndianSlice::new(b"foo.c", LittleEndian), + path_name: AttributeValue::String(EndianSlice::new(b"foo.c", LittleEndian)), directory_index: 0, - last_modification: 0, - length: 0, + timestamp: 0, + size: 0, + md5: [0; 16], }, FileEntry { - path_name: EndianSlice::new(b"bar.rs", LittleEndian), + path_name: AttributeValue::String(EndianSlice::new(b"bar.rs", LittleEndian)), directory_index: 0, - last_modification: 0, - length: 0, + timestamp: 0, + size: 0, + md5: [0; 16], }, ], line_base: -3, + line_range: 12, unit_length: 1, standard_opcode_lengths: EndianSlice::new(STANDARD_OPCODE_LENGTHS, LittleEndian), include_directories: vec![], - line_range: 12, + directory_entry_format: vec![], + file_name_entry_format: vec![], comp_dir: None, - comp_name: None, + comp_file: None, } } @@ -2017,10 +2388,11 @@ mod tests { constants::DW_LNE_define_file, file, LineInstruction::DefineFile(FileEntry { - path_name: EndianSlice::new(b"foo.c", LittleEndian), + path_name: AttributeValue::String(EndianSlice::new(b"foo.c", LittleEndian)), directory_index: 0, - last_modification: 1, - length: 2, + timestamp: 1, + size: 2, + md5: [0; 16], }), ); @@ -2039,15 +2411,16 @@ mod tests { let path_name = [b'f', b'o', b'o', b'.', b'r', b's', 0]; let mut file = FileEntry { - path_name: EndianSlice::new(&path_name, LittleEndian), + path_name: AttributeValue::String(EndianSlice::new(&path_name, LittleEndian)), directory_index: 1, - last_modification: 0, - length: 0, + timestamp: 0, + size: 0, + md5: [0; 16], }; let mut header = make_test_header(EndianSlice::new(&[], LittleEndian)); - let dir = EndianSlice::new(b"dir", LittleEndian); + let dir = AttributeValue::String(EndianSlice::new(b"dir", LittleEndian)); header.include_directories.push(dir); assert_eq!(file.directory(&header), Some(dir)); @@ -2425,10 +2798,11 @@ mod tests { let mut row = LineRow::new(program.header()); let file = FileEntry { - path_name: EndianSlice::new(b"test.cpp", LittleEndian), + path_name: AttributeValue::String(EndianSlice::new(b"test.cpp", LittleEndian)), directory_index: 0, - last_modification: 0, - length: 0, + timestamp: 0, + size: 0, + md5: [0; 16], }; let opcode = LineInstruction::DefineFile(file); @@ -2472,4 +2846,146 @@ mod tests { let a: &OneShotLineRows> = unimplemented!(); let _: &OneShotLineRows> = a; } + + #[test] + fn test_parse_debug_line_v5_ok() { + let expected_lengths = &[1, 2]; + let expected_program = &[0, 1, 2, 3, 4]; + let expected_rest = &[5, 6, 7, 8, 9]; + let expected_include_directories = [ + AttributeValue::String(EndianSlice::new(b"dir1", LittleEndian)), + AttributeValue::String(EndianSlice::new(b"dir2", LittleEndian)), + ]; + let expected_file_names = [ + FileEntry { + path_name: AttributeValue::String(EndianSlice::new(b"file1", LittleEndian)), + directory_index: 0, + timestamp: 0, + size: 0, + md5: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + }, + FileEntry { + path_name: AttributeValue::String(EndianSlice::new(b"file2", LittleEndian)), + directory_index: 1, + timestamp: 0, + size: 0, + md5: [ + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + ], + }, + ]; + + for format in vec![Format::Dwarf32, Format::Dwarf64] { + let length = Label::new(); + let header_length = Label::new(); + let start = Label::new(); + let header_start = Label::new(); + let end = Label::new(); + let header_end = Label::new(); + let mut section = Section::with_endian(Endian::Little) + .initial_length(format, &length, &start) + .D16(5) + // Address size. + .D8(4) + // Segment selector size. + .D8(0) + .word_label(format.word_size(), &header_length) + .mark(&header_start) + // Minimum instruction length. + .D8(1) + // Maximum operations per byte. + .D8(1) + // Default is_stmt. + .D8(1) + // Line base. + .D8(0) + // Line range. + .D8(1) + // Opcode base. + .D8(expected_lengths.len() as u8 + 1) + // Standard opcode lengths for opcodes 1 .. opcode base - 1. + .append_bytes(expected_lengths) + // Directory entry format count. + .D8(1) + .uleb(constants::DW_LNCT_path.0 as u64) + .uleb(constants::DW_FORM_string.0 as u64) + // Directory count. + .D8(2) + .append_bytes(b"dir1\0") + .append_bytes(b"dir2\0") + // File entry format count. + .D8(3) + .uleb(constants::DW_LNCT_path.0 as u64) + .uleb(constants::DW_FORM_string.0 as u64) + .uleb(constants::DW_LNCT_directory_index.0 as u64) + .uleb(constants::DW_FORM_data1.0 as u64) + .uleb(constants::DW_LNCT_MD5.0 as u64) + .uleb(constants::DW_FORM_data16.0 as u64) + // File count. + .D8(2) + .append_bytes(b"file1\0") + .D8(0) + .append_bytes(&expected_file_names[0].md5) + .append_bytes(b"file2\0") + .D8(1) + .append_bytes(&expected_file_names[1].md5) + .mark(&header_end) + // Dummy line program data. + .append_bytes(expected_program) + .mark(&end) + // Dummy trailing data. + .append_bytes(expected_rest); + length.set_const((&end - &start) as u64); + header_length.set_const((&header_end - &header_start) as u64); + let section = section.get_contents().unwrap(); + + let input = &mut EndianSlice::new(§ion, LittleEndian); + + let header = LineProgramHeader::parse(input, DebugLineOffset(0), 0, None, None) + .expect("should parse header ok"); + println!("{:?}", header); + + assert_eq!(header.raw_program_buf().slice(), expected_program); + assert_eq!(input.slice(), expected_rest); + + assert_eq!(header.offset, DebugLineOffset(0)); + assert_eq!(header.version(), 5); + assert_eq!(header.address_size(), 4); + assert_eq!(header.minimum_instruction_length(), 1); + assert_eq!(header.maximum_operations_per_instruction(), 1); + assert_eq!(header.default_is_stmt(), true); + assert_eq!(header.line_base(), 0); + assert_eq!(header.line_range(), 1); + assert_eq!(header.opcode_base(), expected_lengths.len() as u8 + 1); + assert_eq!(header.standard_opcode_lengths().slice(), expected_lengths); + assert_eq!( + header.directory_entry_format(), + &[FileEntryFormat { + content_type: constants::DW_LNCT_path, + form: constants::DW_FORM_string, + }] + ); + assert_eq!(header.include_directories(), expected_include_directories); + assert_eq!(header.directory(0), Some(expected_include_directories[0])); + assert_eq!( + header.file_name_entry_format(), + &[ + FileEntryFormat { + content_type: constants::DW_LNCT_path, + form: constants::DW_FORM_string, + }, + FileEntryFormat { + content_type: constants::DW_LNCT_directory_index, + form: constants::DW_FORM_data1, + }, + FileEntryFormat { + content_type: constants::DW_LNCT_MD5, + form: constants::DW_FORM_data16, + } + ] + ); + assert_eq!(header.file_names(), expected_file_names); + assert_eq!(header.file(0), Some(&expected_file_names[0])); + } + } } diff --git a/src/read/mod.rs b/src/read/mod.rs index 62eab7c19..f2883c57c 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -213,6 +213,10 @@ pub enum Error { MissingUnitDie, /// A DIE attribute used an unsupported form. UnsupportedAttributeForm, + /// Missing DW_LNCT_path in file entry format. + MissingFileEntryFormatPath, + /// Expected an attribute value to be a string form. + ExpectedStringAttributeValue, } impl fmt::Display for Error { @@ -349,6 +353,10 @@ impl Error { "A compilation unit or type unit is missing its top level DIE." } Error::UnsupportedAttributeForm => "A DIE attribute used an unsupported form.", + Error::MissingFileEntryFormatPath => "Missing DW_LNCT_path in file entry format.", + Error::ExpectedStringAttributeValue => { + "Expected an attribute value to be a string form." + } } } } diff --git a/src/read/str.rs b/src/read/str.rs index 31541c62b..2e7133933 100644 --- a/src/read/str.rs +++ b/src/read/str.rs @@ -1,4 +1,4 @@ -use common::{DebugStrOffset, DebugStrOffsetsBase, DebugStrOffsetsIndex}; +use common::{DebugLineStrOffset, DebugStrOffset, DebugStrOffsetsBase, DebugStrOffsetsIndex}; use endianity::Endianity; use read::{EndianSlice, Reader, ReaderOffset, Result, Section}; use Format; @@ -115,6 +115,34 @@ impl From for DebugStrOffsets { } } +/// The `DebugLineStr` struct represents the DWARF strings +/// found in the `.debug_line_str` section. +#[derive(Debug, Default, Clone, Copy)] +pub struct DebugLineStr { + section: R, +} + +impl DebugLineStr { + /// Lookup a string from the `.debug_line_str` section by DebugLineStrOffset. + pub fn get_str(&self, offset: DebugLineStrOffset) -> Result { + let input = &mut self.section.clone(); + input.skip(offset.0)?; + input.read_null_terminated_slice() + } +} + +impl Section for DebugLineStr { + fn section_name() -> &'static str { + ".debug_line_str" + } +} + +impl From for DebugLineStr { + fn from(section: R) -> Self { + DebugLineStr { section } + } +} + #[cfg(test)] mod tests { extern crate test_assembler; diff --git a/src/read/unit.rs b/src/read/unit.rs index ac04beb79..8e37b7403 100644 --- a/src/read/unit.rs +++ b/src/read/unit.rs @@ -7,9 +7,10 @@ use std::{u16, u8}; use common::{ DebugAbbrevOffset, DebugAddrBase, DebugAddrIndex, DebugInfoOffset, DebugLineOffset, - DebugLocListsBase, DebugLocListsIndex, DebugMacinfoOffset, DebugRngListsBase, - DebugRngListsIndex, DebugStrOffset, DebugStrOffsetsBase, DebugStrOffsetsIndex, - DebugTypeSignature, DebugTypesOffset, Encoding, Format, LocationListsOffset, RangeListsOffset, + DebugLineStrOffset, DebugLocListsBase, DebugLocListsIndex, DebugMacinfoOffset, + DebugRngListsBase, DebugRngListsIndex, DebugStrOffset, DebugStrOffsetsBase, + DebugStrOffsetsIndex, DebugTypeSignature, DebugTypesOffset, Encoding, Format, + LocationListsOffset, RangeListsOffset, }; use constants; use endianity::Endianity; @@ -875,7 +876,10 @@ where /// Find the first attribute in this entry which has the given name, /// and return its raw value. Returns `Ok(None)` if no attribute is found. - pub fn attr_value_raw(&self, name: constants::DwAt) -> Result>> { + pub fn attr_value_raw( + &self, + name: constants::DwAt, + ) -> Result>> { self.attr(name) .map(|attr| attr.map(|attr| attr.raw_value())) } @@ -883,7 +887,10 @@ where /// Find the first attribute in this entry which has the given name, /// and return its normalized value. Returns `Ok(None)` if no /// attribute is found. - pub fn attr_value(&self, name: constants::DwAt) -> Result>> { + pub fn attr_value( + &self, + name: constants::DwAt, + ) -> Result>> { self.attr(name).map(|attr| attr.map(|attr| attr.value())) } @@ -946,7 +953,11 @@ where // for their data. This gives better code generation in `parse_attribute`. #[repr(u64)] #[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum AttributeValue { +pub enum AttributeValue +where + R: Reader, + Offset: ReaderOffset, +{ /// "Refers to some location in the address space of the described program." Addr(u64), @@ -1005,62 +1016,65 @@ pub enum AttributeValue { /// An offset into another section. Which section this is an offset into /// depends on context. - SecOffset(R::Offset), + SecOffset(Offset), /// An offset to a set of addresses in the `.debug_addr` section. - DebugAddrBase(DebugAddrBase), + DebugAddrBase(DebugAddrBase), /// An index into a set of addresses in the `.debug_addr` section. - DebugAddrIndex(DebugAddrIndex), + DebugAddrIndex(DebugAddrIndex), /// An offset into the current compilation unit. - UnitRef(UnitOffset), + UnitRef(UnitOffset), /// An offset into the current `.debug_info` section, but possibly a /// different compilation unit from the current one. - DebugInfoRef(DebugInfoOffset), + DebugInfoRef(DebugInfoOffset), /// An offset into the `.debug_info` section of the supplementary object file. - DebugInfoRefSup(DebugInfoOffset), + DebugInfoRefSup(DebugInfoOffset), /// An offset into the `.debug_line` section. - DebugLineRef(DebugLineOffset), + DebugLineRef(DebugLineOffset), /// An offset into either the `.debug_loc` section or the `.debug_loclists` section. - LocationListsRef(LocationListsOffset), + LocationListsRef(LocationListsOffset), /// An offset to a set of offsets in the `.debug_loclists` section. - DebugLocListsBase(DebugLocListsBase), + DebugLocListsBase(DebugLocListsBase), /// An index into a set of offsets in the `.debug_loclists` section. - DebugLocListsIndex(DebugLocListsIndex), + DebugLocListsIndex(DebugLocListsIndex), /// An offset into the `.debug_macinfo` section. - DebugMacinfoRef(DebugMacinfoOffset), + DebugMacinfoRef(DebugMacinfoOffset), /// An offset into the `.debug_ranges` section. - RangeListsRef(RangeListsOffset), + RangeListsRef(RangeListsOffset), /// An offset to a set of offsets in the `.debug_rnglists` section. - DebugRngListsBase(DebugRngListsBase), + DebugRngListsBase(DebugRngListsBase), /// An index into a set of offsets in the `.debug_rnglists` section. - DebugRngListsIndex(DebugRngListsIndex), + DebugRngListsIndex(DebugRngListsIndex), /// A type signature. DebugTypesRef(DebugTypeSignature), /// An offset into the `.debug_str` section. - DebugStrRef(DebugStrOffset), + DebugStrRef(DebugStrOffset), /// An offset into the `.debug_str` section of the supplementary object file. - DebugStrRefSup(DebugStrOffset), + DebugStrRefSup(DebugStrOffset), /// An offset to a set of entries in the `.debug_str_offsets` section. - DebugStrOffsetsBase(DebugStrOffsetsBase), + DebugStrOffsetsBase(DebugStrOffsetsBase), /// An index into a set of entries in the `.debug_str_offsets` section. - DebugStrOffsetsIndex(DebugStrOffsetsIndex), + DebugStrOffsetsIndex(DebugStrOffsetsIndex), + + /// An offset into the `.debug_line_str` section. + DebugLineStrRef(DebugLineStrOffset), /// A slice of bytes representing a string. Does not include a final null byte. /// Not guaranteed to be UTF-8 or anything like that. @@ -1112,7 +1126,7 @@ pub enum AttributeValue { #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct Attribute { name: constants::DwAt, - value: AttributeValue, + value: AttributeValue, } impl Attribute { @@ -1122,7 +1136,7 @@ impl Attribute { } /// Get this attribute's raw value. - pub fn raw_value(&self) -> AttributeValue { + pub fn raw_value(&self) -> AttributeValue { self.value.clone() } @@ -1136,7 +1150,7 @@ impl Attribute { /// See "Figure 20. Attribute encodings" and "Figure 21. Attribute form encodings". #[allow(clippy::cyclomatic_complexity)] #[allow(clippy::match_same_arms)] - pub fn value(&self) -> AttributeValue { + pub fn value(&self) -> AttributeValue { // Figure 20 shows the possible attribute classes for each name. // Figure 21 shows the possible attribute classes for each form. // For each attribute name, we need to match on the form, and @@ -1574,6 +1588,85 @@ impl Attribute { self.value.clone() } + /// Try to convert this attribute's value to a u8. + #[inline] + pub fn u8_value(&self) -> Option { + self.value.u8_value() + } + + /// Try to convert this attribute's value to a u16. + #[inline] + pub fn u16_value(&self) -> Option { + self.value.u16_value() + } + + /// Try to convert this attribute's value to an unsigned integer. + #[inline] + pub fn udata_value(&self) -> Option { + self.value.udata_value() + } + + /// Try to convert this attribute's value to a signed integer. + #[inline] + pub fn sdata_value(&self) -> Option { + self.value.sdata_value() + } + + /// Try to convert this attribute's value to an offset. + #[inline] + pub fn offset_value(&self) -> Option { + self.value.offset_value() + } + + /// Try to convert this attribute's value to an expression or location buffer. + /// + /// Expressions and locations may be `DW_FORM_block*` or `DW_FORM_exprloc`. + /// The standard doesn't mention `DW_FORM_block*` as a possible form, but + /// it is encountered in practice. + #[inline] + fn exprloc_value(&self) -> Option> { + self.value.exprloc_value() + } + + /// Try to return this attribute's value as a string slice. + /// + /// If this attribute's value is either an inline `DW_FORM_string` string, + /// or a `DW_FORM_strp` reference to an offset into the `.debug_str` + /// section, return the attribute's string value as `Some`. Other attribute + /// value forms are returned as `None`. + /// + /// Warning: this function does not handle all possible string forms. + /// Use `Dwarf::attr_string` instead. + #[inline] + pub fn string_value(&self, debug_str: &DebugStr) -> Option { + self.value.string_value(debug_str) + } + + /// Try to return this attribute's value as a string slice. + /// + /// If this attribute's value is either an inline `DW_FORM_string` string, + /// or a `DW_FORM_strp` reference to an offset into the `.debug_str` + /// section, or a `DW_FORM_strp_sup` reference to an offset into a supplementary + /// object file, return the attribute's string value as `Some`. Other attribute + /// value forms are returned as `None`. + /// + /// Warning: this function does not handle all possible string forms. + /// Use `Dwarf::attr_string` instead. + #[inline] + pub fn string_value_sup( + &self, + debug_str: &DebugStr, + debug_str_sup: Option<&DebugStr>, + ) -> Option { + self.value.string_value_sup(debug_str, debug_str_sup) + } +} + +impl AttributeValue +where + R: Reader, + Offset: ReaderOffset, +{ /// Try to convert this attribute's value to a u8. pub fn u8_value(&self) -> Option { if let Some(value) = self.udata_value() { @@ -1596,7 +1689,7 @@ impl Attribute { /// Try to convert this attribute's value to an unsigned integer. pub fn udata_value(&self) -> Option { - Some(match self.value { + Some(match *self { AttributeValue::Data1(data) => u64::from(data), AttributeValue::Data2(data) => u64::from(data), AttributeValue::Data4(data) => u64::from(data), @@ -1615,7 +1708,7 @@ impl Attribute { /// Try to convert this attribute's value to a signed integer. pub fn sdata_value(&self) -> Option { - Some(match self.value { + Some(match *self { AttributeValue::Data1(data) => i64::from(data as i8), AttributeValue::Data2(data) => i64::from(data as i16), AttributeValue::Data4(data) => i64::from(data as i32), @@ -1636,7 +1729,7 @@ impl Attribute { pub fn offset_value(&self) -> Option { // While offsets will be DW_FORM_data4/8 in DWARF version 2/3, // these have already been converted to `SecOffset. - if let AttributeValue::SecOffset(offset) = self.value { + if let AttributeValue::SecOffset(offset) = *self { Some(offset) } else { None @@ -1649,7 +1742,7 @@ impl Attribute { /// The standard doesn't mention `DW_FORM_block*` as a possible form, but /// it is encountered in practice. fn exprloc_value(&self) -> Option> { - Some(match self.value { + Some(match *self { AttributeValue::Block(ref data) => Expression(data.clone()), AttributeValue::Exprloc(ref data) => data.clone(), _ => return None, @@ -1662,8 +1755,11 @@ impl Attribute { /// or a `DW_FORM_strp` reference to an offset into the `.debug_str` /// section, return the attribute's string value as `Some`. Other attribute /// value forms are returned as `None`. + /// + /// Warning: this function does not handle all possible string forms. + /// Use `Dwarf::attr_string` instead. pub fn string_value(&self, debug_str: &DebugStr) -> Option { - match self.value { + match *self { AttributeValue::String(ref string) => Some(string.clone()), AttributeValue::DebugStrRef(offset) => debug_str.get_str(offset).ok(), _ => None, @@ -1677,12 +1773,15 @@ impl Attribute { /// section, or a `DW_FORM_strp_sup` reference to an offset into a supplementary /// object file, return the attribute's string value as `Some`. Other attribute /// value forms are returned as `None`. + /// + /// Warning: this function does not handle all possible string forms. + /// Use `Dwarf::attr_string` instead. pub fn string_value_sup( &self, debug_str: &DebugStr, debug_str_sup: Option<&DebugStr>, ) -> Option { - match self.value { + match *self { AttributeValue::String(ref string) => Some(string.clone()), AttributeValue::DebugStrRef(offset) => debug_str.get_str(offset).ok(), AttributeValue::DebugStrRefSup(offset) => { @@ -1805,6 +1904,10 @@ pub(crate) fn parse_attribute<'unit, 'abbrev, R: Reader>( AttributeValue::Data8(data) } } + constants::DW_FORM_data16 => { + let block = input.split(R::Offset::from_u8(16))?; + AttributeValue::Block(block) + } constants::DW_FORM_udata => { let data = input.read_uleb128()?; AttributeValue::Udata(data) @@ -1889,6 +1992,10 @@ pub(crate) fn parse_attribute<'unit, 'abbrev, R: Reader>( let offset = input.read_offset(unit.format())?; AttributeValue::DebugStrRefSup(DebugStrOffset(offset)) } + constants::DW_FORM_line_strp => { + let offset = input.read_offset(unit.format())?; + AttributeValue::DebugLineStrRef(DebugLineStrOffset(offset)) + } constants::DW_FORM_implicit_const => AttributeValue::Sdata(spec.implicit_const_value()), constants::DW_FORM_strx | constants::DW_FORM_GNU_str_index => { let index = input.read_uleb128().and_then(R::Offset::from_u64)?; diff --git a/src/read/value.rs b/src/read/value.rs index 6c75112e1..b2e08d822 100644 --- a/src/read/value.rs +++ b/src/read/value.rs @@ -369,7 +369,7 @@ impl Value { // It's unclear if these should implicity convert to a signed value. // For now, we don't support them. Value::U8(_) | Value::U16(_) | Value::U32(_) | Value::U64(_) => { - return Err(Error::UnsupportedTypeOperation) + return Err(Error::UnsupportedTypeOperation); } }; Ok(value) @@ -666,7 +666,7 @@ impl Value { // It's unclear if signed values should implicity convert to an unsigned value. // For now, we don't support them. Value::I8(_) | Value::I16(_) | Value::I32(_) | Value::I64(_) => { - return Err(Error::UnsupportedTypeOperation) + return Err(Error::UnsupportedTypeOperation); } _ => return Err(Error::IntegralTypeRequired), }; @@ -739,7 +739,7 @@ impl Value { // It's unclear if unsigned values should implicity convert to a signed value. // For now, we don't support them. Value::U8(_) | Value::U16(_) | Value::U32(_) | Value::U64(_) => { - return Err(Error::UnsupportedTypeOperation) + return Err(Error::UnsupportedTypeOperation); } _ => return Err(Error::IntegralTypeRequired), }; diff --git a/src/test_util.rs b/src/test_util.rs index 1602f17a0..5b37cc64a 100644 --- a/src/test_util.rs +++ b/src/test_util.rs @@ -13,6 +13,7 @@ pub trait GimliSectionMethods { fn uleb(self, val: u64) -> Self; fn initial_length(self, format: Format, length: &Label, start: &Label) -> Self; fn word(self, size: u8, val: u64) -> Self; + fn word_label(self, size: u8, val: &Label) -> Self; } impl GimliSectionMethods for Section { @@ -42,4 +43,12 @@ impl GimliSectionMethods for Section { _ => panic!("unsupported word size"), } } + + fn word_label(self, size: u8, val: &Label) -> Self { + match size { + 4 => self.D32(val), + 8 => self.D64(val), + _ => panic!("unsupported word size"), + } + } } diff --git a/src/write/line.rs b/src/write/line.rs index c93810ecc..6bf70d606 100644 --- a/src/write/line.rs +++ b/src/write/line.rs @@ -5,7 +5,10 @@ use vec::Vec; use common::{DebugLineOffset, Encoding, Format}; use constants; use leb128; -use write::{Address, Error, Result, Section, SectionId, Writer}; +use write::{ + Address, DebugLineStrOffsets, DebugStrOffsets, Error, LineStringId, LineStringTable, Result, + Section, SectionId, StringId, Writer, +}; /// A table of line number programs that will be stored in a `.debug_line` section. #[derive(Debug, Default)] @@ -48,10 +51,15 @@ impl LineProgramTable { } /// Write the line number programs to the given section. - pub fn write(&self, debug_line: &mut DebugLine) -> Result { + pub fn write( + &self, + debug_line: &mut DebugLine, + debug_line_str_offsets: &DebugLineStrOffsets, + debug_str_offsets: &DebugStrOffsets, + ) -> Result { let mut offsets = Vec::new(); for program in &self.programs { - offsets.push(program.write(debug_line)?); + offsets.push(program.write(debug_line, debug_line_str_offsets, debug_str_offsets)?); } Ok(DebugLineOffsets { offsets }) } @@ -93,7 +101,7 @@ pub struct LineProgram { /// directory of the compilation unit. /// /// The first entry is for the working directory of the compilation unit. - directories: IndexSet>, + directories: IndexSet, /// A list of source file entries. /// @@ -104,7 +112,27 @@ pub struct LineProgram { /// /// For version >= 5, the first entry is for the primary source file /// of the compilation unit. - files: IndexMap<(Vec, DirectoryId), FileInfo>, + files: IndexMap<(LineString, DirectoryId), FileInfo>, + + /// True if the file entries may have valid timestamps. + /// + /// Entries may still have a timestamp of 0 even if this is set. + /// For version <= 4, this is ignored. + /// For version 5, this controls whether to emit `DW_LNCT_timestamp`. + pub file_has_timestamp: bool, + + /// True if the file entries may have valid sizes. + /// + /// Entries may still have a size of 0 even if this is set. + /// For version <= 4, this is ignored. + /// For version 5, this controls whether to emit `DW_LNCT_size`. + pub file_has_size: bool, + + /// True if the file entries have valid MD5 checksums. + /// + /// For version <= 4, this is ignored. + /// For version 5, this controls whether to emit `DW_LNCT_MD5`. + pub file_has_md5: bool, prev_row: LineRow, row: LineRow, @@ -127,7 +155,12 @@ impl LineProgram { /// # Panics /// /// Panics if `line_base` > 0. + /// /// Panics if `line_base` + `line_range` <= 0. + /// + /// Panics if `comp_dir` is empty or contains a null byte. + /// + /// Panics if `comp_file` is empty or contains a null byte. #[allow(clippy::too_many_arguments)] #[allow(clippy::new_ret_no_self)] pub fn new( @@ -136,8 +169,8 @@ impl LineProgram { maximum_operations_per_instruction: u8, line_base: i8, line_range: u8, - comp_dir: &[u8], - comp_file: &[u8], + comp_dir: LineString, + comp_file: LineString, comp_file_info: Option, ) -> LineProgram { // We require a special opcode for a line advance of 0. @@ -156,6 +189,9 @@ impl LineProgram { row: LineRow::new(encoding.version), instructions: Vec::new(), in_sequence: false, + file_has_timestamp: false, + file_has_size: false, + file_has_md5: false, }; // For all DWARF versions, directory index 0 is comp_dir. // For version <= 4, the entry is implicit. We still add @@ -203,16 +239,14 @@ impl LineProgram { /// /// # Panics /// - /// Panics if `directory` contains a null byte. - pub fn add_directory(&mut self, directory: &[u8]) -> DirectoryId { - // Duplicate entries are common, so only allocate if it doesn't exist. - if let Some((index, _)) = self.directories.get_full(directory) { - DirectoryId(index) - } else { - assert!(!directory.contains(&0)); - let (index, _) = self.directories.insert_full(directory.to_vec()); - DirectoryId(index) + /// Panics if `directory` is empty or contains a null byte. + pub fn add_directory(&mut self, directory: LineString) -> DirectoryId { + if let LineString::String(ref val) = directory { + assert!(!val.is_empty()); + assert!(!val.contains(&0)); } + let (index, _) = self.directories.insert_full(directory); + DirectoryId(index) } /// Get a reference to a directory entry. @@ -220,8 +254,8 @@ impl LineProgram { /// # Panics /// /// Panics if `id` is invalid. - pub fn get_directory(&self, id: DirectoryId) -> &[u8] { - self.directories.get_index(id.0).map(Vec::as_slice).unwrap() + pub fn get_directory(&self, id: DirectoryId) -> &LineString { + self.directories.get_index(id.0).unwrap() } /// Add a file entry and return its id. @@ -240,16 +274,19 @@ impl LineProgram { /// /// # Panics /// - /// Panics if 'file' contain a null byte. + /// Panics if 'file' is empty or contains a null byte. pub fn add_file( &mut self, - file: &[u8], + file: LineString, directory: DirectoryId, info: Option, ) -> FileId { - assert!(!file.contains(&0)); - // Always allocates because we can't implement Borrow for this. - let key = (file.to_vec(), directory); + if let LineString::String(ref val) = file { + assert!(!val.is_empty()); + assert!(!val.contains(&0)); + } + + let key = (file, directory); let index = if let Some(info) = info { let (index, _) = self.files.insert_full(key, info); index @@ -267,10 +304,10 @@ impl LineProgram { /// # Panics /// /// Panics if `id` is invalid. - pub fn get_file(&self, id: FileId) -> (&[u8], DirectoryId) { + pub fn get_file(&self, id: FileId) -> (&LineString, DirectoryId) { self.files .get_index(id.index(self.version())) - .map(|entry| ((entry.0).0.as_slice(), (entry.0).1)) + .map(|entry| (&(entry.0).0, (entry.0).1)) .unwrap() } @@ -465,17 +502,28 @@ impl LineProgram { } /// Write the line number program to the given section. - pub fn write(&self, w: &mut DebugLine) -> Result { + pub fn write( + &self, + w: &mut DebugLine, + debug_line_str_offsets: &DebugLineStrOffsets, + debug_str_offsets: &DebugStrOffsets, + ) -> Result { let offset = w.offset(); let length_offset = w.write_initial_length(self.format())?; let length_base = w.len(); - if self.version() < 2 || self.version() > 4 { + if self.version() < 2 || self.version() > 5 { return Err(Error::UnsupportedVersion(self.version())); } w.write_u16(self.version())?; + if self.version() >= 5 { + w.write_u8(self.address_size())?; + // Segment selector size. + w.write_u8(0)?; + } + let header_length_offset = w.len(); w.write_word(0, self.format().word_size())?; let header_length_base = w.len(); @@ -492,21 +540,97 @@ impl LineProgram { w.write_u8(OPCODE_BASE)?; w.write(&[0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1])?; - let dir_base = if self.version() <= 4 { 1 } else { 0 }; - for dir in self.directories.iter().skip(dir_base) { - w.write(dir)?; + if self.version() <= 4 { + // The first directory is stored as DW_AT_comp_dir. + for dir in self.directories.iter().skip(1) { + dir.write( + w, + constants::DW_FORM_string, + self.encoding, + debug_line_str_offsets, + debug_str_offsets, + )?; + } w.write_u8(0)?; - } - w.write_u8(0)?; - for ((file, dir), info) in self.files.iter() { - w.write(file)?; + for ((file, dir), info) in self.files.iter() { + file.write( + w, + constants::DW_FORM_string, + self.encoding, + debug_line_str_offsets, + debug_str_offsets, + )?; + w.write_uleb128(dir.0 as u64)?; + w.write_uleb128(info.timestamp)?; + w.write_uleb128(info.size)?; + } w.write_u8(0)?; - w.write_uleb128(dir.0 as u64)?; - w.write_uleb128(info.last_modification)?; - w.write_uleb128(info.length)?; + } else { + // Directory entry formats (only ever 1). + w.write_u8(1)?; + w.write_uleb128(u64::from(constants::DW_LNCT_path.0))?; + let dir_form = self.directories.get_index(0).unwrap().form(); + w.write_uleb128(u64::from(dir_form.0))?; + + // Directory entries. + w.write_uleb128(self.directories.len() as u64)?; + for dir in self.directories.iter() { + dir.write( + w, + dir_form, + self.encoding, + debug_line_str_offsets, + debug_str_offsets, + )?; + } + + // File name entry formats. + let count = 2 + + if self.file_has_timestamp { 1 } else { 0 } + + if self.file_has_size { 1 } else { 0 } + + if self.file_has_md5 { 1 } else { 0 }; + w.write_u8(count)?; + w.write_uleb128(u64::from(constants::DW_LNCT_path.0))?; + let file_form = (self.files.get_index(0).unwrap().0).0.form(); + w.write_uleb128(u64::from(file_form.0))?; + w.write_uleb128(u64::from(constants::DW_LNCT_directory_index.0))?; + w.write_uleb128(u64::from(constants::DW_FORM_udata.0))?; + if self.file_has_timestamp { + w.write_uleb128(u64::from(constants::DW_LNCT_timestamp.0))?; + w.write_uleb128(u64::from(constants::DW_FORM_udata.0))?; + } + if self.file_has_size { + w.write_uleb128(u64::from(constants::DW_LNCT_size.0))?; + w.write_uleb128(u64::from(constants::DW_FORM_udata.0))?; + } + if self.file_has_md5 { + w.write_uleb128(u64::from(constants::DW_LNCT_MD5.0))?; + w.write_uleb128(u64::from(constants::DW_FORM_data16.0))?; + } + + // File name entries. + w.write_uleb128(self.files.len() as u64)?; + for ((file, dir), info) in self.files.iter() { + file.write( + w, + file_form, + self.encoding, + debug_line_str_offsets, + debug_str_offsets, + )?; + w.write_uleb128(dir.0 as u64)?; + if self.file_has_timestamp { + w.write_uleb128(info.timestamp)?; + } + if self.file_has_size { + w.write_uleb128(info.size)?; + } + if self.file_has_md5 { + w.write(&info.md5)?; + } + } } - w.write_u8(0)?; let header_length = (w.len() - header_length_base) as u64; w.write_word_at( @@ -682,6 +806,80 @@ impl LineInstruction { } } +/// A string value for use in defining paths in line number programs. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum LineString { + /// A slice of bytes representing a string. Must not include null bytes. + /// Not guaranteed to be UTF-8 or anything like that. + String(Vec), + + /// A reference to a string in the `.debug_str` section. + StringRef(StringId), + + /// A reference to a string in the `.debug_line_str` section. + LineStringRef(LineStringId), +} + +impl LineString { + /// Create a `LineString` using the normal form for the given encoding. + pub fn new(val: &[u8], encoding: Encoding, line_strings: &mut LineStringTable) -> Self { + if encoding.version <= 4 { + LineString::String(val.to_vec()) + } else { + LineString::LineStringRef(line_strings.add(val)) + } + } + + fn form(&self) -> constants::DwForm { + match *self { + LineString::String(..) => constants::DW_FORM_string, + LineString::StringRef(..) => constants::DW_FORM_strp, + LineString::LineStringRef(..) => constants::DW_FORM_line_strp, + } + } + + fn write( + &self, + w: &mut DebugLine, + form: constants::DwForm, + encoding: Encoding, + debug_line_str_offsets: &DebugLineStrOffsets, + debug_str_offsets: &DebugStrOffsets, + ) -> Result<()> { + if form != self.form() { + return Err(Error::LineStringFormMismatch); + } + + match *self { + LineString::String(ref val) => { + w.write(val)?; + w.write_u8(0)?; + } + LineString::StringRef(val) => { + if encoding.version < 5 { + return Err(Error::NeedVersion(5)); + } + w.write_offset( + debug_str_offsets.get(val).0, + SectionId::DebugStr, + encoding.format.word_size(), + )?; + } + LineString::LineStringRef(val) => { + if encoding.version < 5 { + return Err(Error::NeedVersion(5)); + } + w.write_offset( + debug_line_str_offsets.get(val).0, + SectionId::DebugLineStr, + encoding.format.word_size(), + )?; + } + } + Ok(()) + } +} + /// An identifier for a directory in a `LineProgram`. /// /// Defaults to the working directory of the compilation unit. @@ -733,9 +931,15 @@ pub use self::id::*; pub struct FileInfo { /// The implementation defined timestamp of the last modification of the file, /// or 0 if not available. - pub last_modification: u64, + pub timestamp: u64, + /// The size of the file in bytes, or 0 if not available. - pub length: u64, + pub size: u64, + + /// A 16-byte MD5 digest of the file contents. + /// + /// Only used if version >= 5 and `LineProgram::file_has_md5` is `true`. + pub md5: [u8; 16], } define_section!( @@ -753,7 +957,7 @@ define_offsets!( mod convert { use super::*; use read::{self, Reader}; - use write::{ConvertError, ConvertResult}; + use write::{self, ConvertError, ConvertResult}; impl LineProgram { /// Create a line number program by reading the data from the given program. @@ -761,6 +965,9 @@ mod convert { /// Return the program and a mapping from file index to `FileId`. pub fn from>( mut from_program: read::IncompleteLineProgram, + dwarf: &read::Dwarf, + line_strings: &mut write::LineStringTable, + strings: &mut write::StringTable, convert_address: &Fn(u64) -> Option
, ) -> ConvertResult<(LineProgram, Vec)> { // Create mappings in case the source has duplicate files or directories. @@ -773,16 +980,20 @@ mod convert { let comp_dir = from_header .directory(0) .ok_or(ConvertError::MissingCompilationDirectory)?; + let comp_dir = LineString::from(comp_dir, dwarf, line_strings, strings)?; let comp_file = from_header .file(0) .ok_or(ConvertError::MissingCompilationFile)?; + let comp_name = + LineString::from(comp_file.path_name(), dwarf, line_strings, strings)?; if comp_file.directory_index() != 0 { return Err(ConvertError::InvalidDirectoryIndex); } let comp_file_info = FileInfo { - last_modification: comp_file.last_modification(), - length: comp_file.length(), + timestamp: comp_file.timestamp(), + size: comp_file.size(), + md5: *comp_file.md5(), }; if from_header.line_base() > 0 { @@ -794,8 +1005,8 @@ mod convert { from_header.maximum_operations_per_instruction(), from_header.line_base(), from_header.line_range(), - &*comp_dir.to_slice()?, - &*comp_file.path_name().to_slice()?, + comp_dir, + comp_name, Some(comp_file_info), ); @@ -808,24 +1019,28 @@ mod convert { } for from_dir in from_header.include_directories() { - dirs.push(program.add_directory(&*from_dir.to_slice()?)); + let from_dir = + LineString::from(from_dir.clone(), dwarf, line_strings, strings)?; + dirs.push(program.add_directory(from_dir)); } + program.file_has_timestamp = from_header.file_has_timestamp(); + program.file_has_size = from_header.file_has_size(); + program.file_has_md5 = from_header.file_has_md5(); for from_file in from_header.file_names() { + let from_name = + LineString::from(from_file.path_name(), dwarf, line_strings, strings)?; let from_dir = from_file.directory_index(); if from_dir >= dirs.len() as u64 { return Err(ConvertError::InvalidDirectoryIndex); } let from_dir = dirs[from_dir as usize]; let from_info = Some(FileInfo { - last_modification: from_file.last_modification(), - length: from_file.length(), + timestamp: from_file.timestamp(), + size: from_file.size(), + md5: *from_file.md5(), }); - files.push(program.add_file( - &*from_file.path_name().to_slice()?, - from_dir, - from_info, - )); + files.push(program.add_file(from_name, from_dir, from_info)); } program @@ -890,101 +1105,159 @@ mod convert { Ok((program, files)) } } + + impl LineString { + fn from>( + from_attr: read::AttributeValue, + dwarf: &read::Dwarf, + line_strings: &mut write::LineStringTable, + strings: &mut write::StringTable, + ) -> ConvertResult { + Ok(match from_attr { + read::AttributeValue::String(r) => LineString::String(r.to_slice()?.to_vec()), + read::AttributeValue::DebugStrRef(offset) => { + let r = dwarf.debug_str.get_str(offset)?; + let id = strings.add(r.to_slice()?); + LineString::StringRef(id) + } + read::AttributeValue::DebugLineStrRef(offset) => { + let r = dwarf.debug_line_str.get_str(offset)?; + let id = line_strings.add(r.to_slice()?); + LineString::LineStringRef(id) + } + _ => return Err(ConvertError::UnsupportedLineStringForm), + }) + } + } } #[cfg(test)] mod tests { use super::*; use read; - use write::EndianVec; + use write::{DebugLineStr, DebugStr, EndianVec, StringTable}; use LittleEndian; #[test] fn test_line_program_table() { + let dir1 = LineString::String(b"dir1".to_vec()); + let file1 = LineString::String(b"file1".to_vec()); + let dir2 = LineString::String(b"dir2".to_vec()); + let file2 = LineString::String(b"file2".to_vec()); + let mut programs = LineProgramTable::default(); + let mut program_ids = Vec::new(); + for &version in &[2, 3, 4, 5] { + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + let program = + LineProgram::new(encoding, 1, 1, -5, 14, dir1.clone(), file1.clone(), None); + let program_id = programs.add(program); - let encoding = Encoding { - version: 4, - address_size: 8, - format: Format::Dwarf32, - }; - let dir1 = &b"dir1"[..]; - let file1 = &b"file1"[..]; - let program1 = LineProgram::new(encoding, 4, 2, -5, 14, dir1, file1, None); - let program_id1 = programs.add(program1); + { + let program = programs.get_mut(program_id); + assert_eq!(&dir1, program.get_directory(program.default_directory())); + program.file_has_timestamp = true; + program.file_has_size = true; + if encoding.version >= 5 { + program.file_has_md5 = true; + } - let encoding = Encoding { - version: 2, - address_size: 4, - format: Format::Dwarf64, - }; - let dir2 = &b"dir2"[..]; - let file2 = &b"file2"[..]; - let program2 = LineProgram::new(encoding, 1, 1, -3, 12, dir2, file2, None); - let program_id2 = programs.add(program2); - { - let program2 = programs.get_mut(program_id2); - assert_eq!(dir2, program2.get_directory(program2.default_directory())); - - let dir3 = &b"dir3"[..]; - let dir3_id = program2.add_directory(dir3); - assert_eq!(dir3, program2.get_directory(dir3_id)); - assert_eq!(dir3_id, program2.add_directory(dir3)); - - let file3 = &b"file3"[..]; - let file3_info = FileInfo { - last_modification: 1, - length: 2, - }; - let file3_id = program2.add_file(file3, dir3_id, Some(file3_info)); - assert_eq!((file3, dir3_id), program2.get_file(file3_id)); - assert_eq!(file3_info, *program2.get_file_info(file3_id)); - - program2.get_file_info_mut(file3_id).length = 3; - assert_ne!(file3_info, *program2.get_file_info(file3_id)); - assert_eq!(file3_id, program2.add_file(file3, dir3_id, None)); - assert_ne!(file3_info, *program2.get_file_info(file3_id)); - assert_eq!( - file3_id, - program2.add_file(file3, dir3_id, Some(file3_info)) - ); - assert_eq!(file3_info, *program2.get_file_info(file3_id)); + let dir_id = program.add_directory(dir2.clone()); + assert_eq!(&dir2, program.get_directory(dir_id)); + assert_eq!(dir_id, program.add_directory(dir2.clone())); + + let file_info = FileInfo { + timestamp: 1, + size: 2, + md5: if encoding.version >= 5 { + [3; 16] + } else { + [0; 16] + }, + }; + let file_id = program.add_file(file2.clone(), dir_id, Some(file_info)); + assert_eq!((&file2, dir_id), program.get_file(file_id)); + assert_eq!(file_info, *program.get_file_info(file_id)); + + program.get_file_info_mut(file_id).size = 3; + assert_ne!(file_info, *program.get_file_info(file_id)); + assert_eq!(file_id, program.add_file(file2.clone(), dir_id, None)); + assert_ne!(file_info, *program.get_file_info(file_id)); + assert_eq!( + file_id, + program.add_file(file2.clone(), dir_id, Some(file_info)) + ); + assert_eq!(file_info, *program.get_file_info(file_id)); + + program_ids.push((program_id, file_id, encoding)); + } + } + } } - assert_eq!(programs.count(), 2); + assert_eq!(programs.count(), program_ids.len()); + let debug_line_str_offsets = DebugLineStrOffsets::default(); + let debug_str_offsets = DebugStrOffsets::default(); let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); - let debug_line_offsets = programs.write(&mut debug_line).unwrap(); - assert_eq!(debug_line_offsets.count(), 2); + let debug_line_offsets = programs + .write(&mut debug_line, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); + assert_eq!(debug_line_offsets.count(), program_ids.len()); let read_debug_line = read::DebugLine::new(debug_line.slice(), LittleEndian); - let read_program1 = read_debug_line - .program( - debug_line_offsets.get(program_id1), - 8, - Some(read::EndianSlice::new(dir1, LittleEndian)), - Some(read::EndianSlice::new(file1, LittleEndian)), - ) - .unwrap(); - let read_program2 = read_debug_line - .program( - debug_line_offsets.get(program_id2), - 4, - Some(read::EndianSlice::new(dir2, LittleEndian)), - Some(read::EndianSlice::new(file2, LittleEndian)), - ) - .unwrap(); let convert_address = &|address| Some(Address::Absolute(address)); - for (program_id, read_program) in - vec![(program_id1, read_program1), (program_id2, read_program2)] - { - let program = programs.get(program_id); - let (convert_program, _convert_files) = - LineProgram::from(read_program, convert_address).unwrap(); + for (program_id, file_id, encoding) in program_ids.iter() { + let read_program = read_debug_line + .program( + debug_line_offsets.get(*program_id), + encoding.address_size, + Some(read::AttributeValue::String(read::EndianSlice::new( + b"dir1", + LittleEndian, + ))), + Some(read::AttributeValue::String(read::EndianSlice::new( + b"file1", + LittleEndian, + ))), + ) + .unwrap(); + + let dwarf = read::Dwarf::default(); + let mut convert_line_strings = LineStringTable::default(); + let mut convert_strings = StringTable::default(); + let (convert_program, convert_files) = LineProgram::from( + read_program, + &dwarf, + &mut convert_line_strings, + &mut convert_strings, + convert_address, + ) + .unwrap(); + let program = programs.get(*program_id); assert_eq!(convert_program.version(), program.version()); assert_eq!(convert_program.address_size(), program.address_size()); assert_eq!(convert_program.format(), program.format()); + + let convert_file_id = convert_files[file_id.index(encoding.version)]; + let (file, dir) = program.get_file(*file_id); + let (convert_file, convert_dir) = convert_program.get_file(convert_file_id); + assert_eq!(file, convert_file); + assert_eq!( + program.get_directory(dir), + convert_program.get_directory(convert_dir) + ); + assert_eq!( + program.get_file_info(*file_id), + convert_program.get_file_info(convert_file_id) + ); } } @@ -995,8 +1268,10 @@ mod tests { let file2 = &b"file2"[..]; let convert_address = &|address| Some(Address::Absolute(address)); - // TODO: version 5 - for &version in &[2, 3, 4] { + let debug_line_str_offsets = DebugLineStrOffsets::default(); + let debug_str_offsets = DebugStrOffsets::default(); + + for &version in &[2, 3, 4, 5] { for &address_size in &[4, 8] { for &format in &[Format::Dwarf32, Format::Dwarf64] { let encoding = Encoding { @@ -1007,11 +1282,20 @@ mod tests { let line_base = -5; let line_range = 14; let neg_line_base = (-line_base) as u8; - let mut program = - LineProgram::new(encoding, 1, 1, line_base, line_range, dir1, file1, None); + let mut program = LineProgram::new( + encoding, + 1, + 1, + line_base, + line_range, + LineString::String(dir1.to_vec()), + LineString::String(file1.to_vec()), + None, + ); let dir_id = program.default_directory(); - program.add_file(file1, dir_id, None); - let file_id = program.add_file(file2, dir_id, None); + program.add_file(LineString::String(file1.to_vec()), dir_id, None); + let file_id = + program.add_file(LineString::String(file2.to_vec()), dir_id, None); // Test sequences. { @@ -1246,7 +1530,9 @@ mod tests { let program_id = programs.add(program); let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); - let debug_line_offsets = programs.write(&mut debug_line).unwrap(); + let debug_line_offsets = programs + .write(&mut debug_line, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); let read_debug_line = read::DebugLine::new(debug_line.slice(), LittleEndian); @@ -1254,13 +1540,28 @@ mod tests { .program( debug_line_offsets.get(program_id), address_size, - Some(read::EndianSlice::new(dir1, LittleEndian)), - Some(read::EndianSlice::new(file1, LittleEndian)), + Some(read::AttributeValue::String(read::EndianSlice::new( + dir1, + LittleEndian, + ))), + Some(read::AttributeValue::String(read::EndianSlice::new( + file1, + LittleEndian, + ))), ) .unwrap(); - let (convert_program, _convert_files) = - LineProgram::from(read_program, convert_address).unwrap(); + let dwarf = read::Dwarf::default(); + let mut convert_line_strings = LineStringTable::default(); + let mut convert_strings = StringTable::default(); + let (convert_program, _convert_files) = LineProgram::from( + read_program, + &dwarf, + &mut convert_line_strings, + &mut convert_strings, + convert_address, + ) + .unwrap(); assert_eq!( &convert_program.instructions[base_instructions.len()..], &test.1[..] @@ -1276,8 +1577,10 @@ mod tests { let dir1 = &b"dir1"[..]; let file1 = &b"file1"[..]; - // TODO: version 5 - for &version in &[2, 3, 4] { + let debug_line_str_offsets = DebugLineStrOffsets::default(); + let debug_str_offsets = DebugStrOffsets::default(); + + for &version in &[2, 3, 4, 5] { for &address_size in &[4, 8] { for &format in &[Format::Dwarf32, Format::Dwarf64] { let encoding = Encoding { @@ -1285,9 +1588,19 @@ mod tests { version, address_size, }; - let mut program = LineProgram::new(encoding, 1, 1, -5, 14, dir1, file1, None); + let mut program = LineProgram::new( + encoding, + 1, + 1, + -5, + 14, + LineString::String(dir1.to_vec()), + LineString::String(file1.to_vec()), + None, + ); let dir_id = program.default_directory(); - let file_id = program.add_file(file1, dir_id, None); + let file_id = + program.add_file(LineString::String(file1.to_vec()), dir_id, None); for &(ref inst, ref expect_inst) in &[ ( @@ -1359,7 +1672,9 @@ mod tests { let program_id = programs.add(program); let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); - let debug_line_offsets = programs.write(&mut debug_line).unwrap(); + let debug_line_offsets = programs + .write(&mut debug_line, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); let read_debug_line = read::DebugLine::new(debug_line.slice(), LittleEndian); @@ -1367,8 +1682,14 @@ mod tests { .program( debug_line_offsets.get(program_id), address_size, - Some(read::EndianSlice::new(dir1, LittleEndian)), - Some(read::EndianSlice::new(file1, LittleEndian)), + Some(read::AttributeValue::String(read::EndianSlice::new( + dir1, + LittleEndian, + ))), + Some(read::AttributeValue::String(read::EndianSlice::new( + file1, + LittleEndian, + ))), ) .unwrap(); let read_header = read_program.header(); @@ -1400,6 +1721,9 @@ mod tests { let addresses = 0..50; let lines = -10..25i64; + let debug_line_str_offsets = DebugLineStrOffsets::default(); + let debug_str_offsets = DebugStrOffsets::default(); + for minimum_instruction_length in vec![1, 4] { for maximum_operations_per_instruction in vec![1, 3] { for line_base in vec![-5, 0] { @@ -1410,8 +1734,8 @@ mod tests { maximum_operations_per_instruction, line_base, line_range, - dir1, - file1, + LineString::String(dir1.to_vec()), + LineString::String(file1.to_vec()), None, ); for address_advance in addresses.clone() { @@ -1435,7 +1759,9 @@ mod tests { let mut programs = LineProgramTable::default(); let program_id = programs.add(program); let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); - let debug_line_offsets = programs.write(&mut debug_line).unwrap(); + let debug_line_offsets = programs + .write(&mut debug_line, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); let read_debug_line = read::DebugLine::new(debug_line.slice(), LittleEndian); @@ -1443,8 +1769,14 @@ mod tests { .program( debug_line_offsets.get(program_id), 8, - Some(read::EndianSlice::new(dir1, LittleEndian)), - Some(read::EndianSlice::new(file1, LittleEndian)), + Some(read::AttributeValue::String(read::EndianSlice::new( + dir1, + LittleEndian, + ))), + Some(read::AttributeValue::String(read::EndianSlice::new( + file1, + LittleEndian, + ))), ) .unwrap(); @@ -1480,4 +1812,73 @@ mod tests { } } } + + #[test] + fn test_line_string() { + let version = 5; + + let file = b"file1"; + + let mut strings = StringTable::default(); + let string_id = strings.add("file2"); + let mut debug_str = DebugStr::from(EndianVec::new(LittleEndian)); + let debug_str_offsets = strings.write(&mut debug_str).unwrap(); + + let mut line_strings = LineStringTable::default(); + let line_string_id = line_strings.add("file3"); + let mut debug_line_str = DebugLineStr::from(EndianVec::new(LittleEndian)); + let debug_line_str_offsets = line_strings.write(&mut debug_line_str).unwrap(); + + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + + for (file, expect_file) in vec![ + ( + LineString::String(file.to_vec()), + read::AttributeValue::String(read::EndianSlice::new(file, LittleEndian)), + ), + ( + LineString::StringRef(string_id), + read::AttributeValue::DebugStrRef(debug_str_offsets.get(string_id)), + ), + ( + LineString::LineStringRef(line_string_id), + read::AttributeValue::DebugLineStrRef( + debug_line_str_offsets.get(line_string_id), + ), + ), + ] { + let mut programs = LineProgramTable::default(); + let program = LineProgram::new( + encoding, + 1, + 1, + -5, + 14, + LineString::String(b"dir".to_vec()), + file, + None, + ); + let program_id = programs.add(program); + + let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); + let debug_line_offsets = programs + .write(&mut debug_line, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); + + let read_debug_line = read::DebugLine::new(debug_line.slice(), LittleEndian); + let read_program = read_debug_line + .program(debug_line_offsets.get(program_id), address_size, None, None) + .unwrap(); + let read_header = read_program.header(); + assert_eq!(read_header.file(0).unwrap().path_name(), expect_file); + } + } + } + } } diff --git a/src/write/mod.rs b/src/write/mod.rs index dee08694d..755d9c1f8 100644 --- a/src/write/mod.rs +++ b/src/write/mod.rs @@ -121,6 +121,8 @@ pub enum Error { InvalidAddress, /// A requested feature requires a different DWARF version. NeedVersion(u16), + /// Strings in line number program have mismatched forms. + LineStringFormMismatch, } impl fmt::Display for Error { @@ -146,6 +148,9 @@ impl fmt::Display for Error { "A requested feature requires a DWARF version {}.", version ), + Error::LineStringFormMismatch => { + write!(f, "Strings in line number program have mismatched forms.",) + } } } } @@ -164,6 +169,8 @@ pub enum SectionId { DebugInfo, /// The `.debug_line` section. DebugLine, + /// The `.debug_line_str` section. + DebugLineStr, /// The `.debug_loc` section. DebugLoc, /// The `.debug_loclists` section. @@ -187,6 +194,7 @@ impl SectionId { SectionId::DebugAbbrev => ".debug_abbrev", SectionId::DebugRanges => ".debug_ranges", SectionId::DebugLine => ".debug_line", + SectionId::DebugLineStr => ".debug_line_str", SectionId::DebugLoc => ".debug_loc", SectionId::DebugLocLists => ".debug_loclists", SectionId::DebugRngLists => ".debug_rnglists", @@ -249,6 +257,8 @@ mod convert { MissingCompilationFile, /// Writing this line number instruction is not implemented yet. UnsupportedLineInstruction, + /// Writing this form of line string is not implemented yet. + UnsupportedLineStringForm, /// A `.debug_line` file index is invalid. InvalidFileIndex, /// A `.debug_line` directory index is invalid. @@ -288,6 +298,10 @@ mod convert { f, "Writing this line number instruction is not implemented yet." ), + UnsupportedLineStringForm => write!( + f, + "Writing this form of line string is not implemented yet." + ), InvalidFileIndex => write!(f, "A `.debug_line` file index is invalid."), InvalidDirectoryIndex => write!(f, "A `.debug_line` directory index is invalid."), InvalidLineBase => write!(f, "A `.debug_line` line base is invalid."), diff --git a/src/write/str.rs b/src/write/str.rs index af9b874af..701edc89c 100644 --- a/src/write/str.rs +++ b/src/write/str.rs @@ -2,14 +2,9 @@ use indexmap::IndexSet; use std::ops::{Deref, DerefMut}; use vec::Vec; -use common::DebugStrOffset; +use common::{DebugLineStrOffset, DebugStrOffset}; use write::{Result, Section, SectionId, Writer}; -/// An identifier for a string in a `StringTable.` -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct StringId(usize); - -/// A table of strings that will be stored in a `.debug_str` section. // Requirements: // - values are `[u8]`, null bytes are not allowed // - insertion returns a fixed id @@ -27,59 +22,76 @@ pub struct StringId(usize); // - calculate offsets as we add values, and use that as the id. // This would avoid the need for DebugStrOffsets but would make it // hard to implement `get`. -#[derive(Debug, Default)] -pub struct StringTable { - strings: IndexSet>, -} - -impl StringTable { - /// Add a string to the string table and return its id. - /// - /// If the string already exists, then return the id of the existing string. - /// - /// # Panics - /// - /// Panics if `bytes` contains a null byte. - pub fn add(&mut self, bytes: T) -> StringId - where - T: Into>, - { - let bytes = bytes.into(); - assert!(!bytes.contains(&0)); - let (index, _) = self.strings.insert_full(bytes); - StringId(index) - } - - /// Return the number of strings in the table. - #[inline] - pub fn count(&self) -> usize { - self.strings.len() - } - - /// Get a reference to a string in the table. - /// - /// # Panics - /// - /// Panics if `id` is invalid. - pub fn get(&self, id: StringId) -> &[u8] { - self.strings.get_index(id.0).map(Vec::as_slice).unwrap() - } - - /// Write the string table to the `.debug_str` section. - /// - /// Returns the offsets at which the strings are written. - pub fn write(&self, w: &mut DebugStr) -> Result { - let mut offsets = Vec::new(); - for bytes in self.strings.iter() { - offsets.push(w.offset()); - w.write(bytes)?; - w.write_u8(0)?; +macro_rules! define_string_table { + ($name:ident, $id:ident, $section:ident, $offsets:ident, $docs:expr) => { + #[doc=$docs] + #[derive(Debug, Default)] + pub struct $name { + strings: IndexSet>, } - Ok(DebugStrOffsets { offsets }) - } + impl $name { + /// Add a string to the string table and return its id. + /// + /// If the string already exists, then return the id of the existing string. + /// + /// # Panics + /// + /// Panics if `bytes` contains a null byte. + pub fn add(&mut self, bytes: T) -> $id + where + T: Into>, + { + let bytes = bytes.into(); + assert!(!bytes.contains(&0)); + let (index, _) = self.strings.insert_full(bytes); + $id(index) + } + + /// Return the number of strings in the table. + #[inline] + pub fn count(&self) -> usize { + self.strings.len() + } + + /// Get a reference to a string in the table. + /// + /// # Panics + /// + /// Panics if `id` is invalid. + pub fn get(&self, id: $id) -> &[u8] { + self.strings.get_index(id.0).map(Vec::as_slice).unwrap() + } + + /// Write the string table to the `.debug_str` section. + /// + /// Returns the offsets at which the strings are written. + pub fn write(&self, w: &mut $section) -> Result<$offsets> { + let mut offsets = Vec::new(); + for bytes in self.strings.iter() { + offsets.push(w.offset()); + w.write(bytes)?; + w.write_u8(0)?; + } + + Ok($offsets { offsets }) + } + } + }; } +/// An identifier for a string in a `StringTable`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct StringId(usize); + +define_string_table!( + StringTable, + StringId, + DebugStr, + DebugStrOffsets, + "A table of strings that will be stored in a `.debug_str` section." +); + define_section!(DebugStr, DebugStrOffset, "A writable `.debug_str` section."); define_offsets!( @@ -87,6 +99,29 @@ define_offsets!( "The section offsets of all strings within a `.debug_str` section." ); +/// An identifier for a string in a `LineStringTable`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct LineStringId(usize); + +define_string_table!( + LineStringTable, + LineStringId, + DebugLineStr, + DebugLineStrOffsets, + "A table of strings that will be stored in a `.debug_line_str` section." +); + +define_section!( + DebugLineStr, + DebugLineStrOffset, + "A writable `.debug_line_str` section." +); + +define_offsets!( + DebugLineStrOffsets: LineStringId => DebugLineStrOffset, + "The section offsets of all strings within a `.debug_line_str` section." +); + #[cfg(test)] mod tests { use super::*; diff --git a/src/write/unit.rs b/src/write/unit.rs index e6839ab4f..ccf7291b4 100644 --- a/src/write/unit.rs +++ b/src/write/unit.rs @@ -10,8 +10,9 @@ use common::{ use constants; use write::{ Abbreviation, AbbreviationTable, Address, AttributeSpecification, DebugAbbrev, - DebugLineOffsets, DebugStrOffsets, Error, FileId, LineProgram, LineProgramId, RangeList, - RangeListId, RangeListOffsets, Result, Section, SectionId, StringId, Writer, + DebugLineOffsets, DebugLineStrOffsets, DebugStrOffsets, Error, FileId, LineProgram, + LineProgramId, LineStringId, RangeList, RangeListId, RangeListOffsets, Result, Section, + SectionId, StringId, Writer, }; /// An identifier for a unit in a `UnitTable`. @@ -81,6 +82,7 @@ impl UnitTable { debug_abbrev: &mut DebugAbbrev, debug_info: &mut DebugInfo, line_programs: &DebugLineOffsets, + line_strings: &DebugLineStrOffsets, range_lists: &RangeListOffsets, strings: &DebugStrOffsets, ) -> Result { @@ -96,6 +98,7 @@ impl UnitTable { abbrev_offset, &mut abbrevs, line_programs, + line_strings, strings, range_lists, &mut debug_info_refs, @@ -221,6 +224,7 @@ impl CompilationUnit { abbrev_offset: DebugAbbrevOffset, abbrevs: &mut AbbreviationTable, line_programs: &DebugLineOffsets, + line_strings: &DebugLineStrOffsets, strings: &DebugStrOffsets, range_lists: &RangeListOffsets, debug_info_refs: &mut Vec<(DebugInfoOffset, (UnitId, UnitEntryId), u8)>, @@ -261,6 +265,7 @@ impl CompilationUnit { &mut offsets, abbrevs, line_programs, + line_strings, strings, range_lists, &mut unit_refs, @@ -446,6 +451,7 @@ impl DebuggingInformationEntry { offsets: &mut UnitOffsets, abbrevs: &mut AbbreviationTable, line_programs: &DebugLineOffsets, + line_strings: &DebugLineStrOffsets, strings: &DebugStrOffsets, range_lists: &RangeListOffsets, unit_refs: &mut Vec<(DebugInfoOffset, UnitEntryId)>, @@ -468,6 +474,7 @@ impl DebuggingInformationEntry { w, unit, line_programs, + line_strings, strings, range_lists, unit_refs, @@ -483,6 +490,7 @@ impl DebuggingInformationEntry { offsets, abbrevs, line_programs, + line_strings, strings, range_lists, unit_refs, @@ -544,6 +552,7 @@ impl Attribute { w: &mut DebugInfo, unit: &CompilationUnit, line_programs: &DebugLineOffsets, + line_strings: &DebugLineStrOffsets, strings: &DebugStrOffsets, range_lists: &RangeListOffsets, unit_refs: &mut Vec<(DebugInfoOffset, UnitEntryId)>, @@ -553,6 +562,7 @@ impl Attribute { w, unit, line_programs, + line_strings, strings, range_lists, unit_refs, @@ -668,7 +678,7 @@ pub enum AttributeValue { /// `.debug_types` sections is implemented. DebugTypesRef(DebugTypeSignature), - /// A reference to a string. + /// A reference to a string in the `.debug_str` section. StringRef(StringId), /// An offset into the `.debug_str` section of the supplementary object file. @@ -678,6 +688,9 @@ pub enum AttributeValue { /// supplementary object files is implemented. DebugStrRefSup(DebugStrOffset), + /// A reference to a string in the `.debug_line_str` section. + LineStringRef(LineStringId), + /// A slice of bytes representing a string. Must not include null bytes. /// Not guaranteed to be UTF-8 or anything like that. String(Vec), @@ -778,6 +791,7 @@ impl AttributeValue { AttributeValue::DebugTypesRef(_) => constants::DW_FORM_ref_sig8, AttributeValue::StringRef(_) => constants::DW_FORM_strp, AttributeValue::DebugStrRefSup(_) => constants::DW_FORM_strp_sup, + AttributeValue::LineStringRef(_) => constants::DW_FORM_line_strp, AttributeValue::String(_) => constants::DW_FORM_string, AttributeValue::Encoding(_) | AttributeValue::DecimalSign(_) @@ -808,6 +822,7 @@ impl AttributeValue { w: &mut DebugInfo, unit: &CompilationUnit, line_programs: &DebugLineOffsets, + line_strings: &DebugLineStrOffsets, strings: &DebugStrOffsets, range_lists: &RangeListOffsets, unit_refs: &mut Vec<(DebugInfoOffset, UnitEntryId)>, @@ -949,6 +964,14 @@ impl AttributeValue { debug_assert_form!(constants::DW_FORM_strp_sup); w.write_word(val.0 as u64, unit.format().word_size())?; } + AttributeValue::LineStringRef(val) => { + debug_assert_form!(constants::DW_FORM_line_strp); + w.write_offset( + line_strings.get(val).0, + SectionId::DebugLineStr, + unit.format().word_size(), + )?; + } AttributeValue::String(ref val) => { debug_assert_form!(constants::DW_FORM_string); w.write(&val)?; @@ -1063,6 +1086,7 @@ mod convert { pub(crate) struct ConvertUnitContext<'a, R: Reader + 'a> { pub dwarf: &'a read::Dwarf, + pub line_strings: &'a mut write::LineStringTable, pub strings: &'a mut write::StringTable, pub ranges: &'a mut write::RangeListTable, pub convert_address: &'a Fn(u64) -> Option
, @@ -1077,8 +1101,8 @@ mod convert { impl UnitTable { /// Create a compilation unit table by reading the data in the given sections. /// - /// This also updates the given string table with the strings that are read - /// from the `debug_str` section. + /// This also updates the given tables with the values that are referenced from + /// attributes in this section. /// /// `convert_address` is a function to convert read addresses into the `Address` /// type. For non-relocatable addresses, this function may simply return @@ -1088,6 +1112,7 @@ mod convert { pub fn from>( dwarf: &read::Dwarf, line_programs: &mut write::LineProgramTable, + line_strings: &mut write::LineStringTable, strings: &mut write::StringTable, ranges: &mut write::RangeListTable, convert_address: &Fn(u64) -> Option
, @@ -1104,6 +1129,7 @@ mod convert { &mut unit_entry_offsets, dwarf, line_programs, + line_strings, strings, ranges, convert_address, @@ -1148,6 +1174,7 @@ mod convert { unit_entry_offsets: &mut HashMap, dwarf: &read::Dwarf, line_programs: &mut write::LineProgramTable, + line_strings: &mut write::LineStringTable, strings: &mut write::StringTable, ranges: &mut write::RangeListTable, convert_address: &Fn(u64) -> Option
, @@ -1165,12 +1192,8 @@ mod convert { let mut rnglists_base = DebugRngListsBase(0); { let from_root = from_root.entry(); - let comp_dir = from_root - .attr(constants::DW_AT_comp_dir)? - .and_then(|attr| dwarf.attr_string(&attr)); - let comp_file = from_root - .attr(constants::DW_AT_name)? - .and_then(|attr| dwarf.attr_string(&attr)); + let comp_dir = from_root.attr_value(constants::DW_AT_comp_dir)?; + let comp_file = from_root.attr_value(constants::DW_AT_name)?; if let Some(read::AttributeValue::DebugLineRef(offset)) = from_root.attr_value(constants::DW_AT_stmt_list)? { @@ -1180,7 +1203,13 @@ mod convert { comp_dir, comp_file, )?; - let (program, files) = LineProgram::from(from_program, convert_address)?; + let (program, files) = LineProgram::from( + from_program, + dwarf, + line_strings, + strings, + convert_address, + )?; line_program = Some((offset, line_programs.add(program))); line_program_files = files; } @@ -1207,6 +1236,7 @@ mod convert { } let mut context = ConvertUnitContext { dwarf, + line_strings, strings, ranges, convert_address, @@ -1430,6 +1460,11 @@ mod convert { let id = context.strings.add(r.to_slice()?); AttributeValue::StringRef(id) } + read::AttributeValue::DebugLineStrRef(offset) => { + let r = context.dwarf.debug_line_str.get_str(offset)?; + let id = context.line_strings.add(r.to_slice()?); + AttributeValue::LineStringRef(id) + } read::AttributeValue::String(r) => AttributeValue::String(r.to_slice()?.into()), read::AttributeValue::Encoding(val) => AttributeValue::Encoding(val), read::AttributeValue::DecimalSign(val) => AttributeValue::DecimalSign(val), @@ -1468,8 +1503,8 @@ mod tests { use read; use std::mem; use write::{ - DebugLine, DebugRanges, DebugRngLists, DebugStr, EndianVec, LineProgramTable, Range, - RangeListOffsets, RangeListTable, StringTable, + DebugLine, DebugLineStr, DebugRanges, DebugRngLists, DebugStr, EndianVec, LineProgramTable, + LineString, LineStringTable, Range, RangeListOffsets, RangeListTable, StringTable, }; use LittleEndian; @@ -1586,6 +1621,7 @@ mod tests { } let debug_line_offsets = DebugLineOffsets::default(); + let debug_line_str_offsets = DebugLineStrOffsets::default(); let range_list_offsets = RangeListOffsets::default(); let mut debug_str = DebugStr::from(EndianVec::new(LittleEndian)); @@ -1598,6 +1634,7 @@ mod tests { &mut debug_abbrev, &mut debug_info, &debug_line_offsets, + &debug_line_str_offsets, &range_list_offsets, &debug_str_offsets, ) @@ -1638,8 +1675,11 @@ mod tests { otherwise => panic!("unexpected {:?}", otherwise), }; assert_eq!(producer, b"root"); - let read_producer = read_root.attr(constants::DW_AT_producer).unwrap().unwrap(); - assert_eq!(dwarf.attr_string(&read_producer).unwrap().slice(), producer); + let read_producer = read_root + .attr_value(constants::DW_AT_producer) + .unwrap() + .unwrap(); + assert_eq!(dwarf.attr_string(read_producer).unwrap().slice(), producer); } let mut children = root.children().cloned(); @@ -1659,8 +1699,11 @@ mod tests { }; let name = strings.get(name); assert_eq!(name, b"child1"); - let read_name = read_child.attr(constants::DW_AT_name).unwrap().unwrap(); - assert_eq!(dwarf.attr_string(&read_name).unwrap().slice(), name); + let read_name = read_child + .attr_value(constants::DW_AT_name) + .unwrap() + .unwrap(); + assert_eq!(dwarf.attr_string(read_name).unwrap().slice(), name); } { @@ -1678,8 +1721,11 @@ mod tests { }; let name = strings.get(name); assert_eq!(name, b"child2"); - let read_name = read_child.attr(constants::DW_AT_name).unwrap().unwrap(); - assert_eq!(dwarf.attr_string(&read_name).unwrap().slice(), name); + let read_name = read_child + .attr_value(constants::DW_AT_name) + .unwrap() + .unwrap(); + assert_eq!(dwarf.attr_string(read_name).unwrap().slice(), name); } assert!(read_entries.next_dfs().unwrap().is_none()); @@ -1730,11 +1776,13 @@ mod tests { assert!(read_units.next().unwrap().is_none()); let mut convert_line_programs = LineProgramTable::default(); + let mut convert_line_strings = LineStringTable::default(); let mut convert_strings = StringTable::default(); let mut convert_ranges = RangeListTable::default(); let convert_units = UnitTable::from( &dwarf, &mut convert_line_programs, + &mut convert_line_strings, &mut convert_strings, &mut convert_ranges, &|address| Some(Address::Absolute(address)), @@ -1775,6 +1823,15 @@ mod tests { let mut debug_str = DebugStr::from(EndianVec::new(LittleEndian)); let debug_str_offsets = strings.write(&mut debug_str).unwrap(); let read_debug_str = read::DebugStr::new(debug_str.slice(), LittleEndian); + + let mut line_strings = LineStringTable::default(); + line_strings.add("line string one"); + let line_string_id = line_strings.add("line string two"); + let mut debug_line_str = DebugLineStr::from(EndianVec::new(LittleEndian)); + let debug_line_str_offsets = line_strings.write(&mut debug_line_str).unwrap(); + let read_debug_line_str = + read::DebugLineStr::from(read::EndianSlice::new(debug_line_str.slice(), LittleEndian)); + let data = vec![1, 2, 3, 4]; let read_data = read::EndianSlice::new(&[1, 2, 3, 4], LittleEndian); @@ -1911,6 +1968,13 @@ mod tests { AttributeValue::DebugStrRefSup(DebugStrOffset(0x1234)), read::AttributeValue::DebugStrRefSup(DebugStrOffset(0x1234)), ), + ( + constants::DW_AT_name, + AttributeValue::LineStringRef(line_string_id), + read::AttributeValue::DebugLineStrRef( + debug_line_str_offsets.get(line_string_id), + ), + ), ( constants::DW_AT_name, AttributeValue::String(data.clone()), @@ -1992,6 +2056,7 @@ mod tests { &mut debug_info, &unit, &debug_line_offsets, + &debug_line_str_offsets, &debug_str_offsets, &range_list_offsets, &mut unit_refs, @@ -2016,6 +2081,7 @@ mod tests { let dwarf = read::Dwarf { debug_str: read_debug_str.clone(), + debug_line_str: read_debug_line_str.clone(), ranges: read::RangeLists::new( read_debug_ranges.clone(), read_debug_rnglists, @@ -2026,6 +2092,7 @@ mod tests { let mut context = convert::ConvertUnitContext { dwarf: &dwarf, + line_strings: &mut line_strings, strings: &mut strings, ranges: &mut ranges, convert_address: &|address| Some(Address::Absolute(address)), @@ -2113,6 +2180,7 @@ mod tests { } let debug_line_offsets = DebugLineOffsets::default(); + let debug_line_str_offsets = DebugLineStrOffsets::default(); let debug_str_offsets = DebugStrOffsets::default(); let range_list_offsets = RangeListOffsets::default(); let mut debug_info = DebugInfo::from(EndianVec::new(LittleEndian)); @@ -2122,6 +2190,7 @@ mod tests { &mut debug_abbrev, &mut debug_info, &debug_line_offsets, + &debug_line_str_offsets, &range_list_offsets, &debug_str_offsets, ) @@ -2134,6 +2203,7 @@ mod tests { debug_abbrev: read::DebugAbbrev::new(debug_abbrev.slice(), LittleEndian), debug_info: read::DebugInfo::new(debug_info.slice(), LittleEndian), debug_line: read::DebugLine::new(&[], LittleEndian), + debug_line_str: read::DebugLineStr::from(read::EndianSlice::new(&[], LittleEndian)), debug_str: read::DebugStr::new(&[], LittleEndian), ..Default::default() }; @@ -2199,11 +2269,13 @@ mod tests { } let mut convert_line_programs = LineProgramTable::default(); + let mut convert_line_strings = LineStringTable::default(); let mut convert_strings = StringTable::default(); let mut convert_ranges = RangeListTable::default(); let convert_units = UnitTable::from( &dwarf, &mut convert_line_programs, + &mut convert_line_strings, &mut convert_strings, &mut convert_ranges, &|address| Some(Address::Absolute(address)), @@ -2315,6 +2387,7 @@ mod tests { add_children(&mut units, unit_id2); let debug_line_offsets = DebugLineOffsets::default(); + let debug_line_str_offsets = DebugLineStrOffsets::default(); let debug_str_offsets = DebugStrOffsets::default(); let range_list_offsets = RangeListOffsets::default(); let mut debug_info = DebugInfo::from(EndianVec::new(LittleEndian)); @@ -2324,6 +2397,7 @@ mod tests { &mut debug_abbrev, &mut debug_info, &debug_line_offsets, + &debug_line_str_offsets, &range_list_offsets, &debug_str_offsets, ) @@ -2341,14 +2415,7 @@ mod tests { #[test] fn test_line_ref() { - let mut strings = StringTable::default(); - let mut ranges = RangeListTable::default(); - let mut debug_str = DebugStr::from(EndianVec::new(LittleEndian)); - let debug_str_offsets = strings.write(&mut debug_str).unwrap(); - let read_debug_str = read::DebugStr::new(debug_str.slice(), LittleEndian); - - // TODO: version 5 - for &version in &[2, 3, 4] { + for &version in &[2, 3, 4, 5] { for &address_size in &[4, 8] { for &format in &[Format::Dwarf32, Format::Dwarf64] { let encoding = Encoding { @@ -2360,33 +2427,59 @@ mod tests { // Create a line program table with two programs. // We'll test with a reference to the second program. let mut line_programs = LineProgramTable::default(); - let mut line_program = - LineProgram::new(encoding, 1, 1, -5, 14, b"comp_dir", b"comp_name", None); + let mut line_program = LineProgram::new( + encoding, + 1, + 1, + -5, + 14, + LineString::String(b"comp_dir".to_vec()), + LineString::String(b"comp_name".to_vec()), + None, + ); line_programs.add(line_program.clone()); let dir = line_program.default_directory(); - let file1 = line_program.add_file(b"file1", dir, None); - let file2 = line_program.add_file(b"file2", dir, None); + let file1 = + line_program.add_file(LineString::String(b"file1".to_vec()), dir, None); + let file2 = + line_program.add_file(LineString::String(b"file2".to_vec()), dir, None); let line_program_id = line_programs.add(line_program); // Write, read, and convert the line programs, so that we have the info // required to convert the attributes. + let line_strings = DebugLineStrOffsets::default(); + let strings = DebugStrOffsets::default(); let mut debug_line = DebugLine::from(EndianVec::new(LittleEndian)); - let debug_line_offsets = line_programs.write(&mut debug_line).unwrap(); + let debug_line_offsets = line_programs + .write(&mut debug_line, &line_strings, &strings) + .unwrap(); let line_program_offset = debug_line_offsets.get(line_program_id); let read_debug_line = read::DebugLine::new(debug_line.slice(), LittleEndian); let read_line_program = read_debug_line .program( line_program_offset, address_size, - Some(read::EndianSlice::new(b"comp_dir", LittleEndian)), - Some(read::EndianSlice::new(b"comp_name", LittleEndian)), + Some(read::AttributeValue::String(read::EndianSlice::new( + b"comp_dir", + LittleEndian, + ))), + Some(read::AttributeValue::String(read::EndianSlice::new( + b"comp_name", + LittleEndian, + ))), ) .unwrap(); - let (_, line_program_files) = - LineProgram::from(read_line_program, &|address| { - Some(Address::Absolute(address)) - }) - .unwrap(); + let dwarf = read::Dwarf::default(); + let mut convert_line_strings = LineStringTable::default(); + let mut convert_strings = StringTable::default(); + let (_, line_program_files) = LineProgram::from( + read_line_program, + &dwarf, + &mut convert_line_strings, + &mut convert_strings, + &|address| Some(Address::Absolute(address)), + ) + .unwrap(); // Fake the unit. let mut units = UnitTable::default(); @@ -2419,6 +2512,12 @@ mod tests { ), ][..] { + let mut ranges = RangeListTable::default(); + let mut strings = StringTable::default(); + let debug_str_offsets = DebugStrOffsets::default(); + let mut line_strings = LineStringTable::default(); + let debug_line_str_offsets = DebugLineStrOffsets::default(); + let form = value.form(encoding).unwrap(); let attr = Attribute { name: *name, @@ -2433,6 +2532,7 @@ mod tests { &mut debug_info, &unit, &debug_line_offsets, + &debug_line_str_offsets, &debug_str_offsets, &range_list_offsets, &mut unit_refs, @@ -2455,13 +2555,9 @@ mod tests { }; assert_eq!(read_value, expect_value); - let dwarf = read::Dwarf { - debug_str: read_debug_str.clone(), - ..Default::default() - }; - let mut context = convert::ConvertUnitContext { dwarf: &dwarf, + line_strings: &mut line_strings, strings: &mut strings, ranges: &mut ranges, convert_address: &|address| Some(Address::Absolute(address)), diff --git a/tests/convert_self.rs b/tests/convert_self.rs index 4e09e30b9..73df0ca3a 100644 --- a/tests/convert_self.rs +++ b/tests/convert_self.rs @@ -57,12 +57,14 @@ fn test_convert_debug_info() { ..Default::default() }; - let mut strings = write::StringTable::default(); let mut line_programs = write::LineProgramTable::default(); + let mut line_strings = write::LineStringTable::default(); + let mut strings = write::StringTable::default(); let mut ranges = write::RangeListTable::default(); let units = write::UnitTable::from( &dwarf, &mut line_programs, + &mut line_strings, &mut strings, &mut ranges, &|address| Some(Address::Absolute(address)), @@ -73,16 +75,11 @@ fn test_convert_debug_info() { .map(|id| units.get(write::UnitId(id)).count()) .sum(); assert_eq!(entries, 29_560); + assert_eq!(line_strings.count(), 0); assert_eq!(strings.count(), 3921); // Write to new sections - let mut write_debug_line = write::DebugLine::from(EndianVec::new(LittleEndian)); - let debug_line_offsets = line_programs - .write(&mut write_debug_line) - .expect("Should write line programs"); - let debug_line_data = write_debug_line.slice(); - assert_eq!(debug_line_offsets.count(), 23); - assert_eq!(debug_line_data.len(), 105_797); + let debug_line_str_offsets = write::DebugLineStrOffsets::default(); let mut write_debug_str = write::DebugStr::from(EndianVec::new(LittleEndian)); let debug_str_offsets = strings @@ -92,6 +89,18 @@ fn test_convert_debug_info() { assert_eq!(debug_str_offsets.count(), 3921); assert_eq!(debug_str_data.len(), 144_731); + let mut write_debug_line = write::DebugLine::from(EndianVec::new(LittleEndian)); + let debug_line_offsets = line_programs + .write( + &mut write_debug_line, + &debug_line_str_offsets, + &debug_str_offsets, + ) + .expect("Should write line programs"); + let debug_line_data = write_debug_line.slice(); + assert_eq!(debug_line_offsets.count(), 23); + assert_eq!(debug_line_data.len(), 105_797); + let mut write_debug_ranges = write::DebugRanges::from(EndianVec::new(LittleEndian)); let mut write_debug_rnglists = write::DebugRngLists::from(EndianVec::new(LittleEndian)); let range_list_offsets = ranges @@ -109,6 +118,7 @@ fn test_convert_debug_info() { &mut write_debug_abbrev, &mut write_debug_info, &debug_line_offsets, + &debug_line_str_offsets, &range_list_offsets, &debug_str_offsets, ) @@ -140,11 +150,13 @@ fn test_convert_debug_info() { }; let mut line_programs = write::LineProgramTable::default(); + let mut line_strings = write::LineStringTable::default(); let mut strings = write::StringTable::default(); let mut ranges = write::RangeListTable::default(); let units = write::UnitTable::from( &dwarf, &mut line_programs, + &mut line_strings, &mut strings, &mut ranges, &|address| Some(Address::Absolute(address)), diff --git a/tests/parse_self.rs b/tests/parse_self.rs index c1ad2e51d..d5ed2c583 100644 --- a/tests/parse_self.rs +++ b/tests/parse_self.rs @@ -2,8 +2,8 @@ extern crate gimli; use gimli::{ AttributeValue, DebugAbbrev, DebugAddr, DebugAddrBase, DebugAranges, DebugInfo, DebugLine, - DebugLoc, DebugLocLists, DebugPubNames, DebugPubTypes, DebugRanges, DebugRngLists, DebugStr, - Encoding, EndianSlice, Expression, LittleEndian, LocationLists, Operation, RangeLists, Reader, + DebugLoc, DebugLocLists, DebugPubNames, DebugPubTypes, DebugRanges, DebugRngLists, Encoding, + EndianSlice, Expression, LittleEndian, LocationLists, Operation, RangeLists, Reader, }; use std::collections::hash_map::HashMap; use std::env; @@ -103,9 +103,6 @@ fn test_parse_self_debug_line() { let debug_line = read_section("debug_line"); let debug_line = DebugLine::new(&debug_line, LittleEndian); - let debug_str = read_section("debug_str"); - let debug_str = DebugStr::new(&debug_str, LittleEndian); - let mut iter = debug_info.units(); while let Some(unit) = iter.next().expect("Should parse compilation unit") { let abbrevs = unit @@ -118,13 +115,11 @@ fn test_parse_self_debug_line() { let unit_entry = cursor.current().expect("Should have a root entry"); let comp_dir = unit_entry - .attr(gimli::DW_AT_comp_dir) - .expect("Should parse comp_dir attribute") - .and_then(|attr| attr.string_value(&debug_str)); + .attr_value(gimli::DW_AT_comp_dir) + .expect("Should parse comp_dir attribute"); let comp_name = unit_entry - .attr(gimli::DW_AT_name) - .expect("Should parse name attribute") - .and_then(|attr| attr.string_value(&debug_str)); + .attr_value(gimli::DW_AT_name) + .expect("Should parse name attribute"); if let Some(AttributeValue::DebugLineRef(offset)) = unit_entry .attr_value(gimli::DW_AT_stmt_list)